From 1dd104db59d145d516a5e9cbb081ed01262961ef Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Timm=20B=C3=A4der?= <tbaeder@redhat.com>
Date: Tue, 12 Mar 2024 07:47:48 +0100
Subject: [clang][Interp] Implement _Complex Not unary operators

This only happens in C as far as I can tell. The complex varialbe
will have undergone a conversion to bool in C++ before reaching
the unary operator.
---
 clang/lib/AST/Interp/ByteCodeExprGen.cpp | 11 +++++++++++
 clang/test/AST/Interp/complex.c          |  5 +++++
 2 files changed, 16 insertions(+)
diff --git a/clang/lib/AST/Interp/ByteCodeExprGen.cpp b/clang/lib/AST/Interp/ByteCodeExprGen.cpp
index 0dd645990d1d..da4a8f88f139 100644
--- a/clang/lib/AST/Interp/ByteCodeExprGen.cpp
+++ b/clang/lib/AST/Interp/ByteCodeExprGen.cpp
@@ -3182,6 +3182,17 @@ bool ByteCodeExprGen<Emitter>::VisitComplexUnaryOperator(
   case UO_AddrOf:
     return this->delegate(SubExpr);
 
+  case UO_LNot:
+    if (!this->visit(SubExpr))
+      return false;
+    if (!this->emitComplexBoolCast(SubExpr))
+      return false;
+    if (!this->emitInvBool(E))
+      return false;
+    if (PrimType ET = classifyPrim(E->getType()); ET != PT_Bool)
+      return this->emitCast(PT_Bool, ET, E);
+    return true;
+
   case UO_Real:
     return this->emitComplexReal(SubExpr);
 
diff --git a/clang/test/AST/Interp/complex.c b/clang/test/AST/Interp/complex.c
index c9c2efb59745..b5f30b87baa7 100644
--- a/clang/test/AST/Interp/complex.c
+++ b/clang/test/AST/Interp/complex.c
@@ -14,3 +14,8 @@ void blah() {
 _Static_assert((0.0 + 0.0j) == (0.0 + 0.0j), "");
 _Static_assert((0.0 + 0.0j) != (0.0 + 0.0j), ""); // both-error {{static assertion}} \
                                                   // both-note {{evaluates to}}
+
+const _Complex float FC = {0.0f, 0.0f};
+_Static_assert(!FC, "");
+const _Complex float FI = {0, 0};
+_Static_assert(!FI, "");
-- 
cgit v1.2.3


From 103469b5f7467d5df15799c2d8ad150729bc33bd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Timm=20B=C3=A4der?= <tbaeder@redhat.com>
Date: Tue, 12 Mar 2024 08:50:51 +0100
Subject: [clang][Interp] Implement more easy _Complex unary operators

---
 clang/lib/AST/Interp/ByteCodeExprGen.cpp | 25 +++++++++++++++----------
 1 file changed, 15 insertions(+), 10 deletions(-)

diff --git a/clang/lib/AST/Interp/ByteCodeExprGen.cpp b/clang/lib/AST/Interp/ByteCodeExprGen.cpp
index da4a8f88f139..86304a54473c 100644
--- a/clang/lib/AST/Interp/ByteCodeExprGen.cpp
+++ b/clang/lib/AST/Interp/ByteCodeExprGen.cpp
@@ -3138,16 +3138,17 @@ bool ByteCodeExprGen<Emitter>::VisitComplexUnaryOperator(
     return this->discard(SubExpr);
 
   std::optional<PrimType> ResT = classify(E);
+  auto prepareResult = [=]() -> bool {
+    if (!ResT && !Initializing) {
+      std::optional<unsigned> LocalIndex =
+          allocateLocal(SubExpr, /*IsExtended=*/false);
+      if (!LocalIndex)
+        return false;
+      return this->emitGetPtrLocal(*LocalIndex, E);
+    }
 
-  // Prepare storage for result.
-  if (!ResT && !Initializing) {
-    std::optional<unsigned> LocalIndex =
-        allocateLocal(SubExpr, /*IsExtended=*/false);
-    if (!LocalIndex)
-      return false;
-    if (!this->emitGetPtrLocal(*LocalIndex, E))
-      return false;
-  }
+    return true;
+  };
 
   // The offset of the temporary, if we created one.
   unsigned SubExprOffset = ~0u;
@@ -3167,6 +3168,8 @@ bool ByteCodeExprGen<Emitter>::VisitComplexUnaryOperator(
 
   switch (E->getOpcode()) {
   case UO_Minus:
+    if (!prepareResult())
+      return false;
     if (!createTemp())
       return false;
     for (unsigned I = 0; I != 2; ++I) {
@@ -3179,7 +3182,9 @@ bool ByteCodeExprGen<Emitter>::VisitComplexUnaryOperator(
     }
     break;
 
-  case UO_AddrOf:
+  case UO_Plus:   // +x
+  case UO_AddrOf: // &x
+  case UO_Deref:  // *x
     return this->delegate(SubExpr);
 
   case UO_LNot:
-- 
cgit v1.2.3


From 85f6669de59b2bb75c6848afa79de63be988721c Mon Sep 17 00:00:00 2001
From: jeanPerier <jperier@nvidia.com>
Date: Tue, 12 Mar 2024 09:04:25 +0100
Subject: [flang] implement sizeof lowering for polymorphic entities (#84498)

For non polymorphic entities, semantics knows the type size and rewrite
sizeof to `"cst element size" * size(x)`.

Lowering has to deal with the polymorphic case where the type size must
be retrieved from the descriptor (note that the lowering implementation
would work with any entity, polymorphic on not, it is just not used for
the non polymorphic cases).
---
 .../flang/Optimizer/Builder/IntrinsicCall.h        |  1 +
 flang/lib/Optimizer/Builder/IntrinsicCall.cpp      | 18 +++++++++++++++++
 flang/test/Lower/Intrinsics/sizeof.f90             | 23 ++++++++++++++++++++++
 3 files changed, 42 insertions(+)
 create mode 100644 flang/test/Lower/Intrinsics/sizeof.f90

diff --git a/flang/include/flang/Optimizer/Builder/IntrinsicCall.h b/flang/include/flang/Optimizer/Builder/IntrinsicCall.h
index 7cb99d61a686..ca15b4bc34b2 100644
--- a/flang/include/flang/Optimizer/Builder/IntrinsicCall.h
+++ b/flang/include/flang/Optimizer/Builder/IntrinsicCall.h
@@ -338,6 +338,7 @@ struct IntrinsicLibrary {
   mlir::Value genSign(mlir::Type, llvm::ArrayRef<mlir::Value>);
   mlir::Value genSind(mlir::Type, llvm::ArrayRef<mlir::Value>);
   fir::ExtendedValue genSize(mlir::Type, llvm::ArrayRef<fir::ExtendedValue>);
+  fir::ExtendedValue genSizeOf(mlir::Type, llvm::ArrayRef<fir::ExtendedValue>);
   mlir::Value genSpacing(mlir::Type resultType,
                          llvm::ArrayRef<mlir::Value> args);
   fir::ExtendedValue genSpread(mlir::Type, llvm::ArrayRef<fir::ExtendedValue>);
diff --git a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
index 2f7ace658e47..ca5ab6fcea34 100644
--- a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
+++ b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
@@ -567,6 +567,10 @@ static constexpr IntrinsicHandler handlers[]{
        {"dim", asAddr, handleDynamicOptional},
        {"kind", asValue}}},
      /*isElemental=*/false},
+    {"sizeof",
+     &I::genSizeOf,
+     {{{"a", asBox}}},
+     /*isElemental=*/false},
     {"sleep", &I::genSleep, {{{"seconds", asValue}}}, /*isElemental=*/false},
     {"spacing", &I::genSpacing},
     {"spread",
@@ -5946,6 +5950,20 @@ IntrinsicLibrary::genSize(mlir::Type resultType,
       .getResults()[0];
 }
 
+// SIZEOF
+fir::ExtendedValue
+IntrinsicLibrary::genSizeOf(mlir::Type resultType,
+                            llvm::ArrayRef<fir::ExtendedValue> args) {
+  assert(args.size() == 1);
+  mlir::Value box = fir::getBase(args[0]);
+  mlir::Value eleSize = builder.create<fir::BoxEleSizeOp>(loc, resultType, box);
+  if (!fir::isArray(args[0]))
+    return eleSize;
+  mlir::Value arraySize = builder.createConvert(
+      loc, resultType, fir::runtime::genSize(builder, loc, box));
+  return builder.create<mlir::arith::MulIOp>(loc, eleSize, arraySize);
+}
+
 // TAND
 mlir::Value IntrinsicLibrary::genTand(mlir::Type resultType,
                                       llvm::ArrayRef<mlir::Value> args) {
diff --git a/flang/test/Lower/Intrinsics/sizeof.f90 b/flang/test/Lower/Intrinsics/sizeof.f90
new file mode 100644
index 000000000000..959ca1692b51
--- /dev/null
+++ b/flang/test/Lower/Intrinsics/sizeof.f90
@@ -0,0 +1,23 @@
+! Test SIZEOF lowering for polymorphic entities.
+! RUN: bbc -emit-hlfir --polymorphic-type -o - %s | FileCheck %s
+
+integer(8) function test1(x)
+  class(*) :: x
+  test1 = sizeof(x)
+end function
+! CHECK-LABEL:   func.func @_QPtest1(
+! CHECK:           %[[VAL_3:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFtest1Ex"} : (!fir.class<none>) -> (!fir.class<none>, !fir.class<none>)
+! CHECK:           %[[VAL_4:.*]] = fir.box_elesize %[[VAL_3]]#1 : (!fir.class<none>) -> i64
+! CHECK:           hlfir.assign %[[VAL_4]] to %{{.*}} : i64, !fir.ref<i64>
+
+integer(8) function test2(x)
+  class(*) :: x(:, :)
+  test2 = sizeof(x)
+end function
+! CHECK-LABEL:   func.func @_QPtest2(
+! CHECK:           %[[VAL_3:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFtest2Ex"} : (!fir.class<!fir.array<?x?xnone>>) -> (!fir.class<!fir.array<?x?xnone>>, !fir.class<!fir.array<?x?xnone>>)
+! CHECK:           %[[VAL_4:.*]] = fir.box_elesize %[[VAL_3]]#1 : (!fir.class<!fir.array<?x?xnone>>) -> i64
+! CHECK:           %[[VAL_7:.*]] = fir.convert %[[VAL_3]]#1 : (!fir.class<!fir.array<?x?xnone>>) -> !fir.box<none>
+! CHECK:           %[[VAL_9:.*]] = fir.call @_FortranASize(%[[VAL_7]], %{{.*}}, %{{.*}}) fastmath<contract> : (!fir.box<none>, !fir.ref<i8>, i32) -> i64
+! CHECK:           %[[VAL_10:.*]] = arith.muli %[[VAL_4]], %[[VAL_9]] : i64
+! CHECK:           hlfir.assign %[[VAL_10]] to %{{.*}} : i64, !fir.ref<i64>
-- 
cgit v1.2.3


From 8e0f4b943fee13afc970ca8277a8e76b9da63b96 Mon Sep 17 00:00:00 2001
From: Adrian Kuegel <akuegel@google.com>
Date: Tue, 12 Mar 2024 09:12:44 +0100
Subject: [NVPTX] Add support for atomic add for f16 type (#84295)

atom.add.noftz.f16 is supported since SM 7.0
---
 llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp |   3 +
 llvm/lib/Target/NVPTX/NVPTXIntrinsics.td    |  15 ++++
 llvm/test/CodeGen/NVPTX/atomics-sm70.ll     | 121 ++++++++++++++++++++++++++++
 llvm/test/CodeGen/NVPTX/atomics.ll          |   7 ++
 4 files changed, 146 insertions(+)
 create mode 100644 llvm/test/CodeGen/NVPTX/atomics-sm70.ll

diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
index c979c03dc1b8..c411c8ef9528 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -6100,6 +6100,9 @@ NVPTXTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
 
   if (AI->isFloatingPointOperation()) {
     if (AI->getOperation() == AtomicRMWInst::BinOp::FAdd) {
+      if (Ty->isHalfTy() && STI.getSmVersion() >= 70 &&
+          STI.getPTXVersion() >= 63)
+        return AtomicExpansionKind::None;
       if (Ty->isFloatTy())
         return AtomicExpansionKind::None;
       if (Ty->isDoubleTy() && STI.hasAtomAddF64())
diff --git a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
index 477789a164ea..869b13369e87 100644
--- a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
+++ b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
@@ -1630,6 +1630,13 @@ defm INT_PTX_ATOM_ADD_GEN_64 : F_ATOMIC_2<i64, Int64Regs, "", ".u64", ".add",
 defm INT_PTX_ATOM_ADD_GEN_64_USE_G : F_ATOMIC_2<i64, Int64Regs, ".global", ".u64",
   ".add", atomic_load_add_64_gen, i64imm, imm>;
 
+defm INT_PTX_ATOM_ADD_G_F16 : F_ATOMIC_2<f16, Int16Regs, ".global", ".f16", ".add.noftz",
+  atomic_load_add_g, f16imm, fpimm, [hasSM<70>, hasPTX<63>]>;
+defm INT_PTX_ATOM_ADD_S_F16 : F_ATOMIC_2<f16, Int16Regs, ".shared", ".f16", ".add.noftz",
+  atomic_load_add_s, f16imm, fpimm, [hasSM<70>, hasPTX<63>]>;
+defm INT_PTX_ATOM_ADD_GEN_F16 : F_ATOMIC_2<f16, Int16Regs, "", ".f16", ".add.noftz",
+  atomic_load_add_gen, f16imm, fpimm, [hasSM<70>, hasPTX<63>]>;
+
 defm INT_PTX_ATOM_ADD_G_F32 : F_ATOMIC_2<f32, Float32Regs, ".global", ".f32", ".add",
   atomic_load_add_g, f32imm, fpimm>;
 defm INT_PTX_ATOM_ADD_S_F32 : F_ATOMIC_2<f32, Float32Regs, ".shared", ".f32", ".add",
@@ -2007,6 +2014,9 @@ multiclass ATOM2P_impl<string AsmStr,  Intrinsic Intr,
                        SDNode Imm, ValueType ImmTy,
                        list<Predicate> Preds> {
   let AddedComplexity = 1 in {
+    def : ATOM23_impl<AsmStr, regT, regclass, Preds,
+                      (ins Int16Regs:$src, regclass:$b),
+                      (Intr (i16 Int16Regs:$src), (regT regclass:$b))>;
     def : ATOM23_impl<AsmStr, regT, regclass, Preds,
                       (ins Int32Regs:$src, regclass:$b),
                       (Intr (i32 Int32Regs:$src), (regT regclass:$b))>;
@@ -2017,6 +2027,9 @@ multiclass ATOM2P_impl<string AsmStr,  Intrinsic Intr,
   // tablegen can't infer argument types from Intrinsic (though it can
   // from Instruction) so we have to enforce specific type on
   // immediates via explicit cast to ImmTy.
+  def : ATOM23_impl<AsmStr, regT, regclass, Preds,
+                    (ins Int16Regs:$src, ImmType:$b),
+                    (Intr (i16 Int16Regs:$src), (ImmTy Imm:$b))>;
   def : ATOM23_impl<AsmStr, regT, regclass, Preds,
                     (ins Int32Regs:$src, ImmType:$b),
                     (Intr (i32 Int32Regs:$src), (ImmTy Imm:$b))>;
@@ -2136,6 +2149,8 @@ multiclass ATOM2_add_impl<string OpStr> {
    defm _s32  : ATOM2S_impl<OpStr, "i", "s32", i32, Int32Regs, i32imm, imm, i32, []>;
    defm _u32  : ATOM2S_impl<OpStr, "i", "u32", i32, Int32Regs, i32imm, imm, i32, []>;
    defm _u64  : ATOM2S_impl<OpStr, "i", "u64", i64, Int64Regs, i64imm, imm, i64, []>;
+   defm _f16  : ATOM2S_impl<OpStr, "f", "f16", f16, Int16Regs, f16imm, fpimm, f16,
+                            [hasSM<70>, hasPTX<63>]>;
    defm _f32  : ATOM2S_impl<OpStr, "f", "f32", f32, Float32Regs, f32imm, fpimm, f32,
                             []>;
    defm _f64  : ATOM2S_impl<OpStr, "f", "f64", f64, Float64Regs, f64imm, fpimm, f64,
diff --git a/llvm/test/CodeGen/NVPTX/atomics-sm70.ll b/llvm/test/CodeGen/NVPTX/atomics-sm70.ll
new file mode 100644
index 000000000000..f68bb2049d00
--- /dev/null
+++ b/llvm/test/CodeGen/NVPTX/atomics-sm70.ll
@@ -0,0 +1,121 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc < %s -march=nvptx -mcpu=sm_70 -mattr=+ptx63 | FileCheck %s --check-prefixes=CHECK
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_70 -mattr=+ptx63 | FileCheck %s --check-prefixes=CHECK64
+; RUN: llc < %s -march=nvptx -mcpu=sm_70 -mattr=+ptx62 | FileCheck %s --check-prefixes=CHECKPTX62
+; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -march=nvptx -mcpu=sm_70 -mattr=+ptx63 | %ptxas-verify -arch=sm_70 %}
+; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_70 -mattr=+ptx63 | %ptxas-verify -arch=sm_70 %}
+; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -march=nvptx -mcpu=sm_70 -mattr=+ptx62 | %ptxas-verify -arch=sm_70 %}
+
+target triple = "nvptx64-nvidia-cuda"
+
+define void @test(ptr %dp0, ptr addrspace(1) %dp1, ptr addrspace(3) %dp3, half %val) {
+; CHECK-LABEL: test(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b16 %rs<5>;
+; CHECK-NEXT:    .reg .b32 %r<4>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.u32 %r1, [test_param_0];
+; CHECK-NEXT:    ld.param.b16 %rs1, [test_param_3];
+; CHECK-NEXT:    atom.add.noftz.f16 %rs2, [%r1], %rs1;
+; CHECK-NEXT:    ld.param.u32 %r2, [test_param_1];
+; CHECK-NEXT:    atom.global.add.noftz.f16 %rs3, [%r2], %rs1;
+; CHECK-NEXT:    ld.param.u32 %r3, [test_param_2];
+; CHECK-NEXT:    atom.shared.add.noftz.f16 %rs4, [%r3], %rs1;
+; CHECK-NEXT:    ret;
+;
+; CHECK64-LABEL: test(
+; CHECK64:       {
+; CHECK64-NEXT:    .reg .b16 %rs<5>;
+; CHECK64-NEXT:    .reg .b64 %rd<4>;
+; CHECK64-EMPTY:
+; CHECK64-NEXT:  // %bb.0:
+; CHECK64-NEXT:    ld.param.u64 %rd1, [test_param_0];
+; CHECK64-NEXT:    ld.param.b16 %rs1, [test_param_3];
+; CHECK64-NEXT:    atom.add.noftz.f16 %rs2, [%rd1], %rs1;
+; CHECK64-NEXT:    ld.param.u64 %rd2, [test_param_1];
+; CHECK64-NEXT:    atom.global.add.noftz.f16 %rs3, [%rd2], %rs1;
+; CHECK64-NEXT:    ld.param.u64 %rd3, [test_param_2];
+; CHECK64-NEXT:    atom.shared.add.noftz.f16 %rs4, [%rd3], %rs1;
+; CHECK64-NEXT:    ret;
+;
+; CHECKPTX62-LABEL: test(
+; CHECKPTX62:       {
+; CHECKPTX62-NEXT:    .reg .pred %p<4>;
+; CHECKPTX62-NEXT:    .reg .b16 %rs<14>;
+; CHECKPTX62-NEXT:    .reg .b32 %r<49>;
+; CHECKPTX62-EMPTY:
+; CHECKPTX62-NEXT:  // %bb.0:
+; CHECKPTX62-NEXT:    ld.param.b16 %rs1, [test_param_3];
+; CHECKPTX62-NEXT:    ld.param.u32 %r20, [test_param_2];
+; CHECKPTX62-NEXT:    ld.param.u32 %r19, [test_param_1];
+; CHECKPTX62-NEXT:    ld.param.u32 %r21, [test_param_0];
+; CHECKPTX62-NEXT:    and.b32 %r1, %r21, -4;
+; CHECKPTX62-NEXT:    and.b32 %r22, %r21, 3;
+; CHECKPTX62-NEXT:    shl.b32 %r2, %r22, 3;
+; CHECKPTX62-NEXT:    mov.b32 %r23, 65535;
+; CHECKPTX62-NEXT:    shl.b32 %r24, %r23, %r2;
+; CHECKPTX62-NEXT:    not.b32 %r3, %r24;
+; CHECKPTX62-NEXT:    ld.u32 %r46, [%r1];
+; CHECKPTX62-NEXT:  $L__BB0_1: // %atomicrmw.start
+; CHECKPTX62-NEXT:    // =>This Inner Loop Header: Depth=1
+; CHECKPTX62-NEXT:    shr.u32 %r25, %r46, %r2;
+; CHECKPTX62-NEXT:    cvt.u16.u32 %rs2, %r25;
+; CHECKPTX62-NEXT:    add.rn.f16 %rs4, %rs2, %rs1;
+; CHECKPTX62-NEXT:    cvt.u32.u16 %r26, %rs4;
+; CHECKPTX62-NEXT:    shl.b32 %r27, %r26, %r2;
+; CHECKPTX62-NEXT:    and.b32 %r28, %r46, %r3;
+; CHECKPTX62-NEXT:    or.b32 %r29, %r28, %r27;
+; CHECKPTX62-NEXT:    atom.cas.b32 %r6, [%r1], %r46, %r29;
+; CHECKPTX62-NEXT:    setp.ne.s32 %p1, %r6, %r46;
+; CHECKPTX62-NEXT:    mov.u32 %r46, %r6;
+; CHECKPTX62-NEXT:    @%p1 bra $L__BB0_1;
+; CHECKPTX62-NEXT:  // %bb.2: // %atomicrmw.end
+; CHECKPTX62-NEXT:    and.b32 %r7, %r19, -4;
+; CHECKPTX62-NEXT:    shl.b32 %r30, %r19, 3;
+; CHECKPTX62-NEXT:    and.b32 %r8, %r30, 24;
+; CHECKPTX62-NEXT:    shl.b32 %r32, %r23, %r8;
+; CHECKPTX62-NEXT:    not.b32 %r9, %r32;
+; CHECKPTX62-NEXT:    ld.global.u32 %r47, [%r7];
+; CHECKPTX62-NEXT:  $L__BB0_3: // %atomicrmw.start9
+; CHECKPTX62-NEXT:    // =>This Inner Loop Header: Depth=1
+; CHECKPTX62-NEXT:    shr.u32 %r33, %r47, %r8;
+; CHECKPTX62-NEXT:    cvt.u16.u32 %rs6, %r33;
+; CHECKPTX62-NEXT:    add.rn.f16 %rs8, %rs6, %rs1;
+; CHECKPTX62-NEXT:    cvt.u32.u16 %r34, %rs8;
+; CHECKPTX62-NEXT:    shl.b32 %r35, %r34, %r8;
+; CHECKPTX62-NEXT:    and.b32 %r36, %r47, %r9;
+; CHECKPTX62-NEXT:    or.b32 %r37, %r36, %r35;
+; CHECKPTX62-NEXT:    atom.global.cas.b32 %r12, [%r7], %r47, %r37;
+; CHECKPTX62-NEXT:    setp.ne.s32 %p2, %r12, %r47;
+; CHECKPTX62-NEXT:    mov.u32 %r47, %r12;
+; CHECKPTX62-NEXT:    @%p2 bra $L__BB0_3;
+; CHECKPTX62-NEXT:  // %bb.4: // %atomicrmw.end8
+; CHECKPTX62-NEXT:    and.b32 %r13, %r20, -4;
+; CHECKPTX62-NEXT:    shl.b32 %r38, %r20, 3;
+; CHECKPTX62-NEXT:    and.b32 %r14, %r38, 24;
+; CHECKPTX62-NEXT:    shl.b32 %r40, %r23, %r14;
+; CHECKPTX62-NEXT:    not.b32 %r15, %r40;
+; CHECKPTX62-NEXT:    ld.shared.u32 %r48, [%r13];
+; CHECKPTX62-NEXT:  $L__BB0_5: // %atomicrmw.start27
+; CHECKPTX62-NEXT:    // =>This Inner Loop Header: Depth=1
+; CHECKPTX62-NEXT:    shr.u32 %r41, %r48, %r14;
+; CHECKPTX62-NEXT:    cvt.u16.u32 %rs10, %r41;
+; CHECKPTX62-NEXT:    add.rn.f16 %rs12, %rs10, %rs1;
+; CHECKPTX62-NEXT:    cvt.u32.u16 %r42, %rs12;
+; CHECKPTX62-NEXT:    shl.b32 %r43, %r42, %r14;
+; CHECKPTX62-NEXT:    and.b32 %r44, %r48, %r15;
+; CHECKPTX62-NEXT:    or.b32 %r45, %r44, %r43;
+; CHECKPTX62-NEXT:    atom.shared.cas.b32 %r18, [%r13], %r48, %r45;
+; CHECKPTX62-NEXT:    setp.ne.s32 %p3, %r18, %r48;
+; CHECKPTX62-NEXT:    mov.u32 %r48, %r18;
+; CHECKPTX62-NEXT:    @%p3 bra $L__BB0_5;
+; CHECKPTX62-NEXT:  // %bb.6: // %atomicrmw.end26
+; CHECKPTX62-NEXT:    ret;
+  %r1 = atomicrmw fadd ptr %dp0, half %val seq_cst
+  %r2 = atomicrmw fadd ptr addrspace(1) %dp1, half %val seq_cst
+  %ret = atomicrmw fadd ptr addrspace(3) %dp3, half %val seq_cst
+  ret void
+}
+
+attributes #1 = { argmemonly nounwind }
diff --git a/llvm/test/CodeGen/NVPTX/atomics.ll b/llvm/test/CodeGen/NVPTX/atomics.ll
index e99d0fd05e34..6f2b5dcf47f1 100644
--- a/llvm/test/CodeGen/NVPTX/atomics.ll
+++ b/llvm/test/CodeGen/NVPTX/atomics.ll
@@ -175,6 +175,13 @@ define float @atomicrmw_add_f32_generic(ptr %addr, float %val) {
   ret float %ret
 }
 
+; CHECK-LABEL: atomicrmw_add_f16_generic
+define half @atomicrmw_add_f16_generic(ptr %addr, half %val) {
+; CHECK: atom.cas
+  %ret = atomicrmw fadd ptr %addr, half %val seq_cst
+  ret half %ret
+}
+
 ; CHECK-LABEL: atomicrmw_add_f32_addrspace1
 define float @atomicrmw_add_f32_addrspace1(ptr addrspace(1) %addr, float %val) {
 ; CHECK: atom.global.add.f32
-- 
cgit v1.2.3


From 36dece001325bbf00129c48ddb3c83668b0ac36e Mon Sep 17 00:00:00 2001
From: Jay Foad <jay.foad@amd.com>
Date: Tue, 12 Mar 2024 08:20:08 +0000
Subject: [AMDGPU] Add missing GFX10 buffer format d16 hi instructions (#84809)

---
 llvm/lib/Target/AMDGPU/BUFInstructions.td        | 5 ++---
 llvm/test/MC/AMDGPU/gfx10_asm_mubuf.s            | 6 ++++++
 llvm/test/MC/Disassembler/AMDGPU/gfx10_mubuf.txt | 6 ++++++
 3 files changed, 14 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/BUFInstructions.td b/llvm/lib/Target/AMDGPU/BUFInstructions.td
index a1bbe170ee29..c7091028b3b5 100644
--- a/llvm/lib/Target/AMDGPU/BUFInstructions.td
+++ b/llvm/lib/Target/AMDGPU/BUFInstructions.td
@@ -2691,9 +2691,8 @@ defm BUFFER_LOAD_SBYTE_D16        : MUBUF_Real_AllAddr_gfx10<0x022>;
 defm BUFFER_LOAD_SBYTE_D16_HI     : MUBUF_Real_AllAddr_gfx10<0x023>;
 defm BUFFER_LOAD_SHORT_D16        : MUBUF_Real_AllAddr_gfx10<0x024>;
 defm BUFFER_LOAD_SHORT_D16_HI     : MUBUF_Real_AllAddr_gfx10<0x025>;
-// FIXME-GFX10: Add following instructions:
-//defm BUFFER_LOAD_FORMAT_D16_HI_X  : MUBUF_Real_AllAddr_gfx10<0x026>;
-//defm BUFFER_STORE_FORMAT_D16_HI_X : MUBUF_Real_AllAddr_gfx10<0x027>;
+defm BUFFER_LOAD_FORMAT_D16_HI_X  : MUBUF_Real_AllAddr_gfx10<0x026>;
+defm BUFFER_STORE_FORMAT_D16_HI_X : MUBUF_Real_AllAddr_gfx10<0x027>;
 defm BUFFER_LOAD_FORMAT_D16_X     : MUBUF_Real_AllAddr_gfx10<0x080>;
 defm BUFFER_LOAD_FORMAT_D16_XY    : MUBUF_Real_AllAddr_gfx10<0x081>;
 defm BUFFER_LOAD_FORMAT_D16_XYZ   : MUBUF_Real_AllAddr_gfx10<0x082>;
diff --git a/llvm/test/MC/AMDGPU/gfx10_asm_mubuf.s b/llvm/test/MC/AMDGPU/gfx10_asm_mubuf.s
index aacdfcb4e871..b77f8e0a3192 100644
--- a/llvm/test/MC/AMDGPU/gfx10_asm_mubuf.s
+++ b/llvm/test/MC/AMDGPU/gfx10_asm_mubuf.s
@@ -17,6 +17,9 @@ buffer_load_format_d16_xyz v[1:2], off, s[4:7], s1
 buffer_load_format_d16_xyzw v[1:2], off, s[4:7], s1
 // GFX10: encoding: [0x00,0x00,0x0c,0xe2,0x00,0x01,0x01,0x01]
 
+buffer_load_format_d16_hi_x v1, off, s[4:7], s1
+// GFX10: encoding: [0x00,0x00,0x98,0xe0,0x00,0x01,0x01,0x01]
+
 buffer_load_format_x v5, off, s[8:11], s3 offset:4095
 // GFX10: encoding: [0xff,0x0f,0x00,0xe0,0x00,0x05,0x02,0x03]
 
@@ -245,6 +248,9 @@ buffer_store_format_d16_xyz v[1:2], off, s[4:7], s1
 buffer_store_format_d16_xyzw v[1:2], off, s[4:7], s1
 // GFX10: encoding: [0x00,0x00,0x1c,0xe2,0x00,0x01,0x01,0x01]
 
+buffer_store_format_d16_hi_x v1, off, s[4:7], s1
+// GFX10: encoding: [0x00,0x00,0x9c,0xe0,0x00,0x01,0x01,0x01]
+
 buffer_store_format_x v1, off, s[12:15], s4 offset:4095
 // GFX10: encoding: [0xff,0x0f,0x10,0xe0,0x00,0x01,0x03,0x04]
 
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx10_mubuf.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx10_mubuf.txt
index b0731be4484c..849c89e37011 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx10_mubuf.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx10_mubuf.txt
@@ -1328,6 +1328,9 @@
 # GFX10: buffer_load_format_d16_xyzw v[1:2], off, s[4:7], s1 ; encoding: [0x00,0x00,0x0c,0xe2,0x00,0x01,0x01,0x01]
 0x00,0x00,0x0c,0xe2,0x00,0x01,0x01,0x01
 
+# GFX10: buffer_load_format_d16_hi_x v1, off, s[4:7], s1 ; encoding: [0x00,0x00,0x98,0xe0,0x00,0x01,0x01,0x01]
+0x00,0x00,0x98,0xe0,0x00,0x01,0x01,0x01
+
 # GFX10: buffer_load_format_x v255, off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x00,0xe0,0x00,0xff,0x02,0x03]
 0xff,0x0f,0x00,0xe0,0x00,0xff,0x02,0x03
 
@@ -2039,6 +2042,9 @@
 # GFX10: buffer_store_format_d16_xyzw v[1:2], off, s[4:7], s1 ; encoding: [0x00,0x00,0x1c,0xe2,0x00,0x01,0x01,0x01]
 0x00,0x00,0x1c,0xe2,0x00,0x01,0x01,0x01
 
+# GFX10: buffer_store_format_d16_hi_x v1, off, s[4:7], s1 ; encoding: [0x00,0x00,0x9c,0xe0,0x00,0x01,0x01,0x01]
+0x00,0x00,0x9c,0xe0,0x00,0x01,0x01,0x01
+
 # GFX10: buffer_store_format_x v1, off, s[12:15], -1 offset:4095 ; encoding: [0xff,0x0f,0x10,0xe0,0x00,0x01,0x03,0xc1]
 0xff,0x0f,0x10,0xe0,0x00,0x01,0x03,0xc1
 
-- 
cgit v1.2.3


From 6bbe8a296ee91754d423c59c35727eaa624f7140 Mon Sep 17 00:00:00 2001
From: Aiden Grossman <agrossman154@yahoo.com>
Date: Tue, 12 Mar 2024 01:24:21 -0700
Subject: [llvm-exegesis] Add thread IDs to subprocess memory names (#84451)

This patch adds the thread ID to the subprocess memory shared memory
names. This avoids conflicts for downstream consumers that might want to
consume llvm-exegesis across multiple threads, which would otherwise run
into conflicts due to the same PID running multiple instances.
---
 llvm/tools/llvm-exegesis/lib/BenchmarkRunner.cpp   |  9 +++----
 llvm/tools/llvm-exegesis/lib/SubprocessMemory.cpp  | 28 +++++++++++++++-------
 llvm/tools/llvm-exegesis/lib/SubprocessMemory.h    |  5 +++-
 .../llvm-exegesis/X86/SubprocessMemoryTest.cpp     |  5 +++-
 4 files changed, 33 insertions(+), 14 deletions(-)

diff --git a/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.cpp b/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.cpp
index 5c9848f3c688..4e97d188d172 100644
--- a/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.cpp
+++ b/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.cpp
@@ -301,6 +301,7 @@ private:
     if (AddMemDefError)
       return AddMemDefError;
 
+    long ParentTID = SubprocessMemory::getCurrentTID();
     pid_t ParentOrChildPID = fork();
 
     if (ParentOrChildPID == -1) {
@@ -314,7 +315,7 @@ private:
       // Unregister handlers, signal handling is now handled through ptrace in
       // the host process.
       sys::unregisterHandlers();
-      prepareAndRunBenchmark(PipeFiles[0], Key);
+      prepareAndRunBenchmark(PipeFiles[0], Key, ParentTID);
       // The child process terminates in the above function, so we should never
       // get to this point.
       llvm_unreachable("Child process didn't exit when expected.");
@@ -415,8 +416,8 @@ private:
     setrlimit(RLIMIT_CORE, &rlim);
   }
 
-  [[noreturn]] void prepareAndRunBenchmark(int Pipe,
-                                           const BenchmarkKey &Key) const {
+  [[noreturn]] void prepareAndRunBenchmark(int Pipe, const BenchmarkKey &Key,
+                                           long ParentTID) const {
     // Disable core dumps in the child process as otherwise everytime we
     // encounter an execution failure like a segmentation fault, we will create
     // a core dump. We report the information directly rather than require the
@@ -473,7 +474,7 @@ private:
 
     Expected<int> AuxMemFDOrError =
         SubprocessMemory::setupAuxiliaryMemoryInSubprocess(
-            Key.MemoryValues, ParentPID, CounterFileDescriptor);
+            Key.MemoryValues, ParentPID, ParentTID, CounterFileDescriptor);
     if (!AuxMemFDOrError)
       exit(ChildProcessExitCodeE::AuxiliaryMemorySetupFailed);
 
diff --git a/llvm/tools/llvm-exegesis/lib/SubprocessMemory.cpp b/llvm/tools/llvm-exegesis/lib/SubprocessMemory.cpp
index a49fa077257d..11ad72a914c4 100644
--- a/llvm/tools/llvm-exegesis/lib/SubprocessMemory.cpp
+++ b/llvm/tools/llvm-exegesis/lib/SubprocessMemory.cpp
@@ -9,11 +9,13 @@
 #include "SubprocessMemory.h"
 #include "Error.h"
 #include "llvm/Support/Error.h"
+#include "llvm/Support/FormatVariadic.h"
 #include <cerrno>
 
 #ifdef __linux__
 #include <fcntl.h>
 #include <sys/mman.h>
+#include <sys/syscall.h>
 #include <unistd.h>
 #endif
 
@@ -22,12 +24,21 @@ namespace exegesis {
 
 #if defined(__linux__) && !defined(__ANDROID__)
 
+long SubprocessMemory::getCurrentTID() {
+  // We're using the raw syscall here rather than the gettid() function provided
+  // by most libcs for compatibility as gettid() was only added to glibc in
+  // version 2.30.
+  return syscall(SYS_gettid);
+}
+
 Error SubprocessMemory::initializeSubprocessMemory(pid_t ProcessID) {
   // Add the PID to the shared memory name so that if we're running multiple
   // processes at the same time, they won't interfere with each other.
   // This comes up particularly often when running the exegesis tests with
-  // llvm-lit
-  std::string AuxiliaryMemoryName = "/auxmem" + std::to_string(ProcessID);
+  // llvm-lit. Additionally add the TID so that downstream consumers
+  // using multiple threads don't run into conflicts.
+  std::string AuxiliaryMemoryName =
+      formatv("/{0}auxmem{1}", getCurrentTID(), ProcessID);
   int AuxiliaryMemoryFD = shm_open(AuxiliaryMemoryName.c_str(),
                                    O_RDWR | O_CREAT, S_IRUSR | S_IWUSR);
   if (AuxiliaryMemoryFD == -1)
@@ -47,8 +58,8 @@ Error SubprocessMemory::addMemoryDefinition(
     pid_t ProcessPID) {
   SharedMemoryNames.reserve(MemoryDefinitions.size());
   for (auto &[Name, MemVal] : MemoryDefinitions) {
-    std::string SharedMemoryName = "/" + std::to_string(ProcessPID) + "memdef" +
-                                   std::to_string(MemVal.Index);
+    std::string SharedMemoryName =
+        formatv("/{0}t{1}memdef{2}", ProcessPID, getCurrentTID(), MemVal.Index);
     SharedMemoryNames.push_back(SharedMemoryName);
     int SharedMemoryFD =
         shm_open(SharedMemoryName.c_str(), O_RDWR | O_CREAT, S_IRUSR | S_IWUSR);
@@ -82,8 +93,9 @@ Error SubprocessMemory::addMemoryDefinition(
 
 Expected<int> SubprocessMemory::setupAuxiliaryMemoryInSubprocess(
     std::unordered_map<std::string, MemoryValue> MemoryDefinitions,
-    pid_t ParentPID, int CounterFileDescriptor) {
-  std::string AuxiliaryMemoryName = "/auxmem" + std::to_string(ParentPID);
+    pid_t ParentPID, long ParentTID, int CounterFileDescriptor) {
+  std::string AuxiliaryMemoryName =
+      formatv("/{0}auxmem{1}", ParentTID, ParentPID);
   int AuxiliaryMemoryFileDescriptor =
       shm_open(AuxiliaryMemoryName.c_str(), O_RDWR, S_IRUSR | S_IWUSR);
   if (AuxiliaryMemoryFileDescriptor == -1)
@@ -97,8 +109,8 @@ Expected<int> SubprocessMemory::setupAuxiliaryMemoryInSubprocess(
     return make_error<Failure>("Mapping auxiliary memory failed");
   AuxiliaryMemoryMapping[0] = CounterFileDescriptor;
   for (auto &[Name, MemVal] : MemoryDefinitions) {
-    std::string MemoryValueName = "/" + std::to_string(ParentPID) + "memdef" +
-                                  std::to_string(MemVal.Index);
+    std::string MemoryValueName =
+        formatv("/{0}t{1}memdef{2}", ParentPID, ParentTID, MemVal.Index);
     AuxiliaryMemoryMapping[AuxiliaryMemoryOffset + MemVal.Index] =
         shm_open(MemoryValueName.c_str(), O_RDWR, S_IRUSR | S_IWUSR);
     if (AuxiliaryMemoryMapping[AuxiliaryMemoryOffset + MemVal.Index] == -1)
diff --git a/llvm/tools/llvm-exegesis/lib/SubprocessMemory.h b/llvm/tools/llvm-exegesis/lib/SubprocessMemory.h
index e20b50cdc811..572d1085d9cf 100644
--- a/llvm/tools/llvm-exegesis/lib/SubprocessMemory.h
+++ b/llvm/tools/llvm-exegesis/lib/SubprocessMemory.h
@@ -35,6 +35,9 @@ public:
   static constexpr const size_t AuxiliaryMemoryOffset = 1;
   static constexpr const size_t AuxiliaryMemorySize = 4096;
 
+  // Gets the thread ID for the calling thread.
+  static long getCurrentTID();
+
   Error initializeSubprocessMemory(pid_t ProcessID);
 
   // The following function sets up memory definitions. It creates shared
@@ -54,7 +57,7 @@ public:
   // section.
   static Expected<int> setupAuxiliaryMemoryInSubprocess(
       std::unordered_map<std::string, MemoryValue> MemoryDefinitions,
-      pid_t ParentPID, int CounterFileDescriptor);
+      pid_t ParentPID, long ParentTID, int CounterFileDescriptor);
 
   ~SubprocessMemory();
 
diff --git a/llvm/unittests/tools/llvm-exegesis/X86/SubprocessMemoryTest.cpp b/llvm/unittests/tools/llvm-exegesis/X86/SubprocessMemoryTest.cpp
index c07ec188a602..7c23e7b7e9c5 100644
--- a/llvm/unittests/tools/llvm-exegesis/X86/SubprocessMemoryTest.cpp
+++ b/llvm/unittests/tools/llvm-exegesis/X86/SubprocessMemoryTest.cpp
@@ -17,6 +17,7 @@
 #include <endian.h>
 #include <fcntl.h>
 #include <sys/mman.h>
+#include <sys/syscall.h>
 #include <unistd.h>
 #endif // __linux__
 
@@ -49,7 +50,9 @@ protected:
 
   std::string getSharedMemoryName(const unsigned TestNumber,
                                   const unsigned DefinitionNumber) {
-    return "/" + std::to_string(getSharedMemoryNumber(TestNumber)) + "memdef" +
+    long CurrentTID = syscall(SYS_gettid);
+    return "/" + std::to_string(getSharedMemoryNumber(TestNumber)) + "t" +
+           std::to_string(CurrentTID) + "memdef" +
            std::to_string(DefinitionNumber);
   }
 
-- 
cgit v1.2.3


From aefad27096bba513f06162fac2763089578f3de4 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo@fhahn.com>
Date: Tue, 12 Mar 2024 08:51:45 +0000
Subject: Revert "[llvm-exegesis] Add thread IDs to subprocess memory names
 (#84451)"

This reverts commit 6bbe8a296ee91754d423c59c35727eaa624f7140.

This breaks building LLVM on macOS, failing with

    llvm/tools/llvm-exegesis/lib/SubprocessMemory.cpp:146:33: error: out-of-line definition of 'setupAuxiliaryMemoryInSubprocess' does not match any declaration in 'llvm::exegesis::SubprocessMemory'
    Expected<int> SubprocessMemory::setupAuxiliaryMemoryInSubprocess(
---
 llvm/tools/llvm-exegesis/lib/BenchmarkRunner.cpp   |  9 ++++---
 llvm/tools/llvm-exegesis/lib/SubprocessMemory.cpp  | 28 +++++++---------------
 llvm/tools/llvm-exegesis/lib/SubprocessMemory.h    |  5 +---
 .../llvm-exegesis/X86/SubprocessMemoryTest.cpp     |  5 +---
 4 files changed, 14 insertions(+), 33 deletions(-)

diff --git a/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.cpp b/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.cpp
index 4e97d188d172..5c9848f3c688 100644
--- a/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.cpp
+++ b/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.cpp
@@ -301,7 +301,6 @@ private:
     if (AddMemDefError)
       return AddMemDefError;
 
-    long ParentTID = SubprocessMemory::getCurrentTID();
     pid_t ParentOrChildPID = fork();
 
     if (ParentOrChildPID == -1) {
@@ -315,7 +314,7 @@ private:
       // Unregister handlers, signal handling is now handled through ptrace in
       // the host process.
       sys::unregisterHandlers();
-      prepareAndRunBenchmark(PipeFiles[0], Key, ParentTID);
+      prepareAndRunBenchmark(PipeFiles[0], Key);
       // The child process terminates in the above function, so we should never
       // get to this point.
       llvm_unreachable("Child process didn't exit when expected.");
@@ -416,8 +415,8 @@ private:
     setrlimit(RLIMIT_CORE, &rlim);
   }
 
-  [[noreturn]] void prepareAndRunBenchmark(int Pipe, const BenchmarkKey &Key,
-                                           long ParentTID) const {
+  [[noreturn]] void prepareAndRunBenchmark(int Pipe,
+                                           const BenchmarkKey &Key) const {
     // Disable core dumps in the child process as otherwise everytime we
     // encounter an execution failure like a segmentation fault, we will create
     // a core dump. We report the information directly rather than require the
@@ -474,7 +473,7 @@ private:
 
     Expected<int> AuxMemFDOrError =
         SubprocessMemory::setupAuxiliaryMemoryInSubprocess(
-            Key.MemoryValues, ParentPID, ParentTID, CounterFileDescriptor);
+            Key.MemoryValues, ParentPID, CounterFileDescriptor);
     if (!AuxMemFDOrError)
       exit(ChildProcessExitCodeE::AuxiliaryMemorySetupFailed);
 
diff --git a/llvm/tools/llvm-exegesis/lib/SubprocessMemory.cpp b/llvm/tools/llvm-exegesis/lib/SubprocessMemory.cpp
index 11ad72a914c4..a49fa077257d 100644
--- a/llvm/tools/llvm-exegesis/lib/SubprocessMemory.cpp
+++ b/llvm/tools/llvm-exegesis/lib/SubprocessMemory.cpp
@@ -9,13 +9,11 @@
 #include "SubprocessMemory.h"
 #include "Error.h"
 #include "llvm/Support/Error.h"
-#include "llvm/Support/FormatVariadic.h"
 #include <cerrno>
 
 #ifdef __linux__
 #include <fcntl.h>
 #include <sys/mman.h>
-#include <sys/syscall.h>
 #include <unistd.h>
 #endif
 
@@ -24,21 +22,12 @@ namespace exegesis {
 
 #if defined(__linux__) && !defined(__ANDROID__)
 
-long SubprocessMemory::getCurrentTID() {
-  // We're using the raw syscall here rather than the gettid() function provided
-  // by most libcs for compatibility as gettid() was only added to glibc in
-  // version 2.30.
-  return syscall(SYS_gettid);
-}
-
 Error SubprocessMemory::initializeSubprocessMemory(pid_t ProcessID) {
   // Add the PID to the shared memory name so that if we're running multiple
   // processes at the same time, they won't interfere with each other.
   // This comes up particularly often when running the exegesis tests with
-  // llvm-lit. Additionally add the TID so that downstream consumers
-  // using multiple threads don't run into conflicts.
-  std::string AuxiliaryMemoryName =
-      formatv("/{0}auxmem{1}", getCurrentTID(), ProcessID);
+  // llvm-lit
+  std::string AuxiliaryMemoryName = "/auxmem" + std::to_string(ProcessID);
   int AuxiliaryMemoryFD = shm_open(AuxiliaryMemoryName.c_str(),
                                    O_RDWR | O_CREAT, S_IRUSR | S_IWUSR);
   if (AuxiliaryMemoryFD == -1)
@@ -58,8 +47,8 @@ Error SubprocessMemory::addMemoryDefinition(
     pid_t ProcessPID) {
   SharedMemoryNames.reserve(MemoryDefinitions.size());
   for (auto &[Name, MemVal] : MemoryDefinitions) {
-    std::string SharedMemoryName =
-        formatv("/{0}t{1}memdef{2}", ProcessPID, getCurrentTID(), MemVal.Index);
+    std::string SharedMemoryName = "/" + std::to_string(ProcessPID) + "memdef" +
+                                   std::to_string(MemVal.Index);
     SharedMemoryNames.push_back(SharedMemoryName);
     int SharedMemoryFD =
         shm_open(SharedMemoryName.c_str(), O_RDWR | O_CREAT, S_IRUSR | S_IWUSR);
@@ -93,9 +82,8 @@ Error SubprocessMemory::addMemoryDefinition(
 
 Expected<int> SubprocessMemory::setupAuxiliaryMemoryInSubprocess(
     std::unordered_map<std::string, MemoryValue> MemoryDefinitions,
-    pid_t ParentPID, long ParentTID, int CounterFileDescriptor) {
-  std::string AuxiliaryMemoryName =
-      formatv("/{0}auxmem{1}", ParentTID, ParentPID);
+    pid_t ParentPID, int CounterFileDescriptor) {
+  std::string AuxiliaryMemoryName = "/auxmem" + std::to_string(ParentPID);
   int AuxiliaryMemoryFileDescriptor =
       shm_open(AuxiliaryMemoryName.c_str(), O_RDWR, S_IRUSR | S_IWUSR);
   if (AuxiliaryMemoryFileDescriptor == -1)
@@ -109,8 +97,8 @@ Expected<int> SubprocessMemory::setupAuxiliaryMemoryInSubprocess(
     return make_error<Failure>("Mapping auxiliary memory failed");
   AuxiliaryMemoryMapping[0] = CounterFileDescriptor;
   for (auto &[Name, MemVal] : MemoryDefinitions) {
-    std::string MemoryValueName =
-        formatv("/{0}t{1}memdef{2}", ParentPID, ParentTID, MemVal.Index);
+    std::string MemoryValueName = "/" + std::to_string(ParentPID) + "memdef" +
+                                  std::to_string(MemVal.Index);
     AuxiliaryMemoryMapping[AuxiliaryMemoryOffset + MemVal.Index] =
         shm_open(MemoryValueName.c_str(), O_RDWR, S_IRUSR | S_IWUSR);
     if (AuxiliaryMemoryMapping[AuxiliaryMemoryOffset + MemVal.Index] == -1)
diff --git a/llvm/tools/llvm-exegesis/lib/SubprocessMemory.h b/llvm/tools/llvm-exegesis/lib/SubprocessMemory.h
index 572d1085d9cf..e20b50cdc811 100644
--- a/llvm/tools/llvm-exegesis/lib/SubprocessMemory.h
+++ b/llvm/tools/llvm-exegesis/lib/SubprocessMemory.h
@@ -35,9 +35,6 @@ public:
   static constexpr const size_t AuxiliaryMemoryOffset = 1;
   static constexpr const size_t AuxiliaryMemorySize = 4096;
 
-  // Gets the thread ID for the calling thread.
-  static long getCurrentTID();
-
   Error initializeSubprocessMemory(pid_t ProcessID);
 
   // The following function sets up memory definitions. It creates shared
@@ -57,7 +54,7 @@ public:
   // section.
   static Expected<int> setupAuxiliaryMemoryInSubprocess(
       std::unordered_map<std::string, MemoryValue> MemoryDefinitions,
-      pid_t ParentPID, long ParentTID, int CounterFileDescriptor);
+      pid_t ParentPID, int CounterFileDescriptor);
 
   ~SubprocessMemory();
 
diff --git a/llvm/unittests/tools/llvm-exegesis/X86/SubprocessMemoryTest.cpp b/llvm/unittests/tools/llvm-exegesis/X86/SubprocessMemoryTest.cpp
index 7c23e7b7e9c5..c07ec188a602 100644
--- a/llvm/unittests/tools/llvm-exegesis/X86/SubprocessMemoryTest.cpp
+++ b/llvm/unittests/tools/llvm-exegesis/X86/SubprocessMemoryTest.cpp
@@ -17,7 +17,6 @@
 #include <endian.h>
 #include <fcntl.h>
 #include <sys/mman.h>
-#include <sys/syscall.h>
 #include <unistd.h>
 #endif // __linux__
 
@@ -50,9 +49,7 @@ protected:
 
   std::string getSharedMemoryName(const unsigned TestNumber,
                                   const unsigned DefinitionNumber) {
-    long CurrentTID = syscall(SYS_gettid);
-    return "/" + std::to_string(getSharedMemoryNumber(TestNumber)) + "t" +
-           std::to_string(CurrentTID) + "memdef" +
+    return "/" + std::to_string(getSharedMemoryNumber(TestNumber)) + "memdef" +
            std::to_string(DefinitionNumber);
   }
 
-- 
cgit v1.2.3


From b274b23665dec30f3ae4fb83ccca8b77e6d3ada3 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo@fhahn.com>
Date: Tue, 12 Mar 2024 08:55:03 +0000
Subject: [ValueTracking] Treat phi as underlying obj when not decomposing
 further (#84339)

At the moment, getUnderlyingObjects simply continues for phis that do
not refer to the same underlying object in loops, without adding them to
the list of underlying objects, effectively ignoring those phis.

Instead of ignoring those phis, add them to the list of underlying
objects. This fixes a miscompile where LoopAccessAnalysis fails to
identify a memory dependence, because no underlying objects can be found
for a set of memory accesses.

Fixes https://github.com/llvm/llvm-project/issues/82665.

PR: https://github.com/llvm/llvm-project/pull/84339
---
 llvm/lib/Analysis/ValueTracking.cpp                                | 2 ++
 .../LoopAccessAnalysis/underlying-object-loop-varying-phi.ll       | 7 ++++++-
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp
index d7f60d85b452..371ad41ee965 100644
--- a/llvm/lib/Analysis/ValueTracking.cpp
+++ b/llvm/lib/Analysis/ValueTracking.cpp
@@ -6131,6 +6131,8 @@ void llvm::getUnderlyingObjects(const Value *V,
       if (!LI || !LI->isLoopHeader(PN->getParent()) ||
           isSameUnderlyingObjectInLoop(PN, LI))
         append_range(Worklist, PN->incoming_values());
+      else
+        Objects.push_back(P);
       continue;
     }
 
diff --git a/llvm/test/Analysis/LoopAccessAnalysis/underlying-object-loop-varying-phi.ll b/llvm/test/Analysis/LoopAccessAnalysis/underlying-object-loop-varying-phi.ll
index 1a5a6ac08d40..106dc8c13a49 100644
--- a/llvm/test/Analysis/LoopAccessAnalysis/underlying-object-loop-varying-phi.ll
+++ b/llvm/test/Analysis/LoopAccessAnalysis/underlying-object-loop-varying-phi.ll
@@ -7,8 +7,13 @@ target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
 define void @indirect_ptr_recurrences_read_write(ptr %A, ptr %B) {
 ; CHECK-LABEL: 'indirect_ptr_recurrences_read_write'
 ; CHECK-NEXT:    loop:
-; CHECK-NEXT:      Memory dependences are safe
+; CHECK-NEXT:      Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
+; CHECK-NEXT:  Unsafe indirect dependence.
 ; CHECK-NEXT:      Dependences:
+; CHECK-NEXT:        IndidrectUnsafe:
+; CHECK-NEXT:            %l = load i32, ptr %ptr.recur, align 4, !tbaa !4 ->
+; CHECK-NEXT:            store i32 %xor, ptr %ptr.recur, align 4, !tbaa !4
+; CHECK-EMPTY:
 ; CHECK-NEXT:      Run-time memory checks:
 ; CHECK-NEXT:      Grouped accesses:
 ; CHECK-EMPTY:
-- 
cgit v1.2.3


From 939f038296e601abd9143955f1b347aee1e99c06 Mon Sep 17 00:00:00 2001
From: jeanPerier <jperier@nvidia.com>
Date: Tue, 12 Mar 2024 10:29:19 +0100
Subject: [flang] lower vector subscripted polymorphic designators (#84778)

A mold argument need to be added to the hlfir.element_addr and set in
lowering so that when the hlfir.element_addr need to be turned into an
hlfir.elemental operation because the designator must be turned into a
value, the mold can be set on the hlfir.elemental to later allocate the
temporary according the the dynamic type.

This situation happens whenever the vector subscripted polymorphic
designator does not appear as an assignment left-hand side, or as an
IO-input item.


I initially thought retrieving the mold would be tricky if the dynamic
type of the designator was set by a part-ref of the right of the vector
subscripts ("array(vector)%polymorphic_comp"), but this turned out to be
impossible because:
1. A derived type component can be polymorphic only if it has the
POINTER or ALLOCATABLE attribute (F2023 C708).
2. Vector-subscripted part are ranked and F2023 C919 prohibits any
part-ref on the right of the rank part to have the POINTER or
ALLOCATABLE attribute.

=> If a vector subscripted designator is polymorphic, the vector
subscripted part is the rightmost part, and the mold is the base of the
vector subscripted part. This makes the retrieval of the mold easy in
lowering. The mold argument is always set to be the base of the vector
subscripted part when lowering the vector subscripted part, and it is
removed at the end of the designator lowering if the designator is not
polymorphic. This way there is no need to find back the mold from the
inside of the hlfir.element_addr body.
---
 flang/include/flang/Optimizer/HLFIR/HLFIROps.td    | 13 ++++--
 flang/lib/Lower/ConvertExprToHLFIR.cpp             | 27 ++++++------
 flang/lib/Optimizer/Builder/HLFIRTools.cpp         |  6 +--
 flang/lib/Optimizer/HLFIR/IR/HLFIROps.cpp          | 50 ++++++++++++----------
 flang/test/HLFIR/element-addr.fir                  | 42 ++++++++++++++++++
 .../test/Lower/HLFIR/vector-subscript-as-value.f90 | 36 +++++++++++++++-
 6 files changed, 132 insertions(+), 42 deletions(-)

diff --git a/flang/include/flang/Optimizer/HLFIR/HLFIROps.td b/flang/include/flang/Optimizer/HLFIR/HLFIROps.td
index c82eae154d31..743a6c98ec1a 100644
--- a/flang/include/flang/Optimizer/HLFIR/HLFIROps.td
+++ b/flang/include/flang/Optimizer/HLFIR/HLFIROps.td
@@ -1358,7 +1358,9 @@ def hlfir_YieldOp : hlfir_Op<"yield", [Terminator, ParentOneOf<["RegionAssignOp"
   let assemblyFormat = "$entity attr-dict `:` type($entity) custom<YieldOpCleanup>($cleanup)";
 }
 
-def hlfir_ElementalAddrOp : hlfir_Op<"elemental_addr", [Terminator, HasParent<"RegionAssignOp">, RecursiveMemoryEffects, RecursivelySpeculatable, hlfir_ElementalOpInterface]> {
+def hlfir_ElementalAddrOp : hlfir_Op<"elemental_addr", [Terminator, HasParent<"RegionAssignOp">,
+    RecursiveMemoryEffects, RecursivelySpeculatable, hlfir_ElementalOpInterface,
+    AttrSizedOperandSegments]> {
   let summary = "Yield the address of a vector subscripted variable inside an hlfir.region_assign";
   let description = [{
     Special terminator node for the left-hand side region of an hlfir.region_assign
@@ -1398,6 +1400,7 @@ def hlfir_ElementalAddrOp : hlfir_Op<"elemental_addr", [Terminator, HasParent<"R
 
   let arguments = (ins
     fir_ShapeType:$shape,
+    Optional<AnyPolymorphicObject>:$mold,
     Variadic<AnyIntegerType>:$typeparams,
     OptionalAttr<UnitAttr>:$unordered
   );
@@ -1406,11 +1409,15 @@ def hlfir_ElementalAddrOp : hlfir_Op<"elemental_addr", [Terminator, HasParent<"R
                          MaxSizedRegion<1>:$cleanup);
 
   let builders = [
-    OpBuilder<(ins "mlir::Value":$shape, CArg<"bool", "false">:$isUnordered)>
+    OpBuilder<(ins "mlir::Value":$shape,
+          CArg<"mlir::Value", "{}">:$mold,
+      CArg<"mlir::ValueRange", "{}">:$typeparams,
+      CArg<"bool", "false">:$isUnordered)>
   ];
 
   let assemblyFormat = [{
-    $shape (`typeparams` $typeparams^)? (`unordered` $unordered^)?
+    $shape (`mold` $mold^)? (`typeparams` $typeparams^)?
+    (`unordered` $unordered^)?
     attr-dict `:` type(operands) $body
     custom<YieldOpCleanup>($cleanup)}];
 
diff --git a/flang/lib/Lower/ConvertExprToHLFIR.cpp b/flang/lib/Lower/ConvertExprToHLFIR.cpp
index 731c5072c45c..c5bfbdf6b8c1 100644
--- a/flang/lib/Lower/ConvertExprToHLFIR.cpp
+++ b/flang/lib/Lower/ConvertExprToHLFIR.cpp
@@ -761,9 +761,17 @@ private:
     // of the whole designator (not the ones of the vector subscripted part).
     // These are not yet known and will be added when finalizing the designator
     // lowering.
-    auto elementalAddrOp =
-        builder.create<hlfir::ElementalAddrOp>(loc, shape,
-                                               /*isUnordered=*/true);
+    // The resulting designator may be polymorphic, in which case the resulting
+    // type is the base of the vector subscripted part because
+    // allocatable/pointer components cannot be referenced after a vector
+    // subscripted part. Set the mold to the current base. It will be erased if
+    // the resulting designator is not polymorphic.
+    assert(partInfo.base.has_value() &&
+           "vector subscripted part must have a base");
+    mlir::Value mold = *partInfo.base;
+    auto elementalAddrOp = builder.create<hlfir::ElementalAddrOp>(
+        loc, shape, mold, mlir::ValueRange{},
+        /*isUnordered=*/true);
     setVectorSubscriptElementAddrOp(elementalAddrOp);
     builder.setInsertionPointToEnd(&elementalAddrOp.getBody().front());
     mlir::Region::BlockArgListType indices = elementalAddrOp.getIndices();
@@ -804,15 +812,8 @@ private:
                              hlfir::EntityWithAttributes elementAddr) {
     fir::FirOpBuilder &builder = getBuilder();
     builder.setInsertionPointToEnd(&elementalAddrOp.getBody().front());
-    // For polymorphic entities, it will be needed to add a mold on the
-    // hlfir.elemental so that we are able to create temporary storage
-    // for it using the dynamic type. It seems that a reference to the mold
-    // entity can be created by evaluating the hlfir.elemental_addr
-    // for a single index. The evaluation should be legal as long as
-    // the hlfir.elemental_addr has no side effects, otherwise,
-    // it is not clear how to get the mold reference.
-    if (elementAddr.isPolymorphic())
-      TODO(loc, "vector subscripted polymorphic entity in HLFIR");
+    if (!elementAddr.isPolymorphic())
+      elementalAddrOp.getMoldMutable().clear();
     builder.create<hlfir::YieldOp>(loc, elementAddr);
     builder.setInsertionPointAfter(elementalAddrOp);
   }
@@ -929,6 +930,8 @@ HlfirDesignatorBuilder::convertVectorSubscriptedExprToElementalAddr(
   hlfir::genLengthParameters(loc, builder, elementAddrEntity, lengths);
   if (!lengths.empty())
     elementalAddrOp.getTypeparamsMutable().assign(lengths);
+  if (!elementAddrEntity.isPolymorphic())
+    elementalAddrOp.getMoldMutable().clear();
   // Create the hlfir.yield terminator inside the hlfir.elemental_body.
   builder.setInsertionPointToEnd(&elementalAddrOp.getBody().front());
   builder.create<hlfir::YieldOp>(loc, elementAddrEntity);
diff --git a/flang/lib/Optimizer/Builder/HLFIRTools.cpp b/flang/lib/Optimizer/Builder/HLFIRTools.cpp
index 0e0b14e8d690..c7a550814e1d 100644
--- a/flang/lib/Optimizer/Builder/HLFIRTools.cpp
+++ b/flang/lib/Optimizer/Builder/HLFIRTools.cpp
@@ -1036,9 +1036,9 @@ hlfir::cloneToElementalOp(mlir::Location loc, fir::FirOpBuilder &builder,
     return hlfir::loadTrivialScalar(l, b, newAddr);
   };
   mlir::Type elementType = scalarAddress.getFortranElementType();
-  return hlfir::genElementalOp(loc, builder, elementType,
-                               elementalAddrOp.getShape(), typeParams,
-                               genKernel, !elementalAddrOp.isOrdered());
+  return hlfir::genElementalOp(
+      loc, builder, elementType, elementalAddrOp.getShape(), typeParams,
+      genKernel, !elementalAddrOp.isOrdered(), elementalAddrOp.getMold());
 }
 
 bool hlfir::elementalOpMustProduceTemp(hlfir::ElementalOp elemental) {
diff --git a/flang/lib/Optimizer/HLFIR/IR/HLFIROps.cpp b/flang/lib/Optimizer/HLFIR/IR/HLFIROps.cpp
index 3568fe202caf..8bad4e445082 100644
--- a/flang/lib/Optimizer/HLFIR/IR/HLFIROps.cpp
+++ b/flang/lib/Optimizer/HLFIR/IR/HLFIROps.cpp
@@ -1406,33 +1406,45 @@ void hlfir::AsExprOp::getEffects(
 // ElementalOp
 //===----------------------------------------------------------------------===//
 
-void hlfir::ElementalOp::build(mlir::OpBuilder &builder,
-                               mlir::OperationState &odsState,
-                               mlir::Type resultType, mlir::Value shape,
-                               mlir::Value mold, mlir::ValueRange typeparams,
-                               bool isUnordered) {
+/// Common builder for ElementalOp and ElementalAddrOp to add the arguments and
+/// create the elemental body. Result and clean-up body must be handled in
+/// specific builders.
+template <typename Op>
+static void buildElemental(mlir::OpBuilder &builder,
+                           mlir::OperationState &odsState, mlir::Value shape,
+                           mlir::Value mold, mlir::ValueRange typeparams,
+                           bool isUnordered) {
   odsState.addOperands(shape);
   if (mold)
     odsState.addOperands(mold);
   odsState.addOperands(typeparams);
-  odsState.addTypes(resultType);
   odsState.addAttribute(
-      getOperandSegmentSizesAttrName(odsState.name),
+      Op::getOperandSegmentSizesAttrName(odsState.name),
       builder.getDenseI32ArrayAttr({/*shape=*/1, (mold ? 1 : 0),
                                     static_cast<int32_t>(typeparams.size())}));
   if (isUnordered)
-    odsState.addAttribute(getUnorderedAttrName(odsState.name),
+    odsState.addAttribute(Op::getUnorderedAttrName(odsState.name),
                           isUnordered ? builder.getUnitAttr() : nullptr);
   mlir::Region *bodyRegion = odsState.addRegion();
   bodyRegion->push_back(new mlir::Block{});
-  if (auto exprType = resultType.dyn_cast<hlfir::ExprType>()) {
-    unsigned dim = exprType.getRank();
+  if (auto shapeType = shape.getType().dyn_cast<fir::ShapeType>()) {
+    unsigned dim = shapeType.getRank();
     mlir::Type indexType = builder.getIndexType();
     for (unsigned d = 0; d < dim; ++d)
       bodyRegion->front().addArgument(indexType, odsState.location);
   }
 }
 
+void hlfir::ElementalOp::build(mlir::OpBuilder &builder,
+                               mlir::OperationState &odsState,
+                               mlir::Type resultType, mlir::Value shape,
+                               mlir::Value mold, mlir::ValueRange typeparams,
+                               bool isUnordered) {
+  odsState.addTypes(resultType);
+  buildElemental<hlfir::ElementalOp>(builder, odsState, shape, mold, typeparams,
+                                     isUnordered);
+}
+
 mlir::Value hlfir::ElementalOp::getElementEntity() {
   return mlir::cast<hlfir::YieldElementOp>(getBody()->back()).getElementValue();
 }
@@ -1681,19 +1693,11 @@ static void printYieldOpCleanup(mlir::OpAsmPrinter &p, YieldOp yieldOp,
 
 void hlfir::ElementalAddrOp::build(mlir::OpBuilder &builder,
                                    mlir::OperationState &odsState,
-                                   mlir::Value shape, bool isUnordered) {
-  odsState.addOperands(shape);
-  if (isUnordered)
-    odsState.addAttribute(getUnorderedAttrName(odsState.name),
-                          isUnordered ? builder.getUnitAttr() : nullptr);
-  mlir::Region *bodyRegion = odsState.addRegion();
-  bodyRegion->push_back(new mlir::Block{});
-  if (auto shapeType = shape.getType().dyn_cast<fir::ShapeType>()) {
-    unsigned dim = shapeType.getRank();
-    mlir::Type indexType = builder.getIndexType();
-    for (unsigned d = 0; d < dim; ++d)
-      bodyRegion->front().addArgument(indexType, odsState.location);
-  }
+                                   mlir::Value shape, mlir::Value mold,
+                                   mlir::ValueRange typeparams,
+                                   bool isUnordered) {
+  buildElemental<hlfir::ElementalAddrOp>(builder, odsState, shape, mold,
+                                         typeparams, isUnordered);
   // Push cleanUp region.
   odsState.addRegion();
 }
diff --git a/flang/test/HLFIR/element-addr.fir b/flang/test/HLFIR/element-addr.fir
index 73946f8b40e3..c3c48edd9b56 100644
--- a/flang/test/HLFIR/element-addr.fir
+++ b/flang/test/HLFIR/element-addr.fir
@@ -114,3 +114,45 @@ func.func @unordered() {
 // CHECK:           }
 // CHECK:           return
 // CHECK:         }
+
+// "X(VECTOR) = Y" with polymorphic X and Y and user defined assignment.
+func.func @test_mold(%x: !fir.class<!fir.array<?x!fir.type<t>>>, %y: !fir.class<!fir.array<?x!fir.type<t>>>, %vector: !fir.box<!fir.array<?xi64>>) {
+  hlfir.region_assign {
+    hlfir.yield %y : !fir.class<!fir.array<?x!fir.type<t>>>
+  } to {
+    %c0 = arith.constant 0 : index
+    %0:3 = fir.box_dims %vector, %c0 : (!fir.box<!fir.array<?xi64>>, index) -> (index, index, index)
+    %1 = fir.shape %0#1 : (index) -> !fir.shape<1>
+    hlfir.elemental_addr %1 mold %x unordered : !fir.shape<1>, !fir.class<!fir.array<?x!fir.type<t>>> {
+    ^bb0(%arg3: index):
+      %2 = hlfir.designate %vector (%arg3)  : (!fir.box<!fir.array<?xi64>>, index) -> !fir.ref<i64>
+      %3 = fir.load %2 : !fir.ref<i64>
+      %4 = hlfir.designate %x (%3)  : (!fir.class<!fir.array<?x!fir.type<t>>>, i64) -> !fir.class<!fir.type<t>>
+      hlfir.yield %4 : !fir.class<!fir.type<t>>
+    }
+  } user_defined_assign  (%arg3: !fir.class<!fir.type<t>>) to (%arg4: !fir.class<!fir.type<t>>) {
+    fir.call @user_def_assign(%arg4, %arg3) : (!fir.class<!fir.type<t>>, !fir.class<!fir.type<t>>) -> ()
+  }
+  return
+}
+func.func private @user_def_assign(!fir.class<!fir.type<t>>, !fir.class<!fir.type<t>>)
+// CHECK-LABEL: func.func @test_mold(
+// CHECK-SAME:                       %[[VAL_0:[^:]*]]: !fir.class<!fir.array<?x!fir.type<t>>>,
+// CHECK-SAME:                       %[[VAL_1:.*]]: !fir.class<!fir.array<?x!fir.type<t>>>,
+// CHECK-SAME:                       %[[VAL_2:.*]]: !fir.box<!fir.array<?xi64>>) {
+// CHECK:         hlfir.region_assign {
+// CHECK:           hlfir.yield %[[VAL_1]] : !fir.class<!fir.array<?x!fir.type<t>>>
+// CHECK:         } to {
+// CHECK:           %[[VAL_3:.*]] = arith.constant 0 : index
+// CHECK:           %[[VAL_4:.*]]:3 = fir.box_dims %[[VAL_2]], %[[VAL_3]] : (!fir.box<!fir.array<?xi64>>, index) -> (index, index, index)
+// CHECK:           %[[VAL_5:.*]] = fir.shape %[[VAL_4]]#1 : (index) -> !fir.shape<1>
+// CHECK:           hlfir.elemental_addr %[[VAL_5]] mold %[[VAL_0]] unordered : !fir.shape<1>, !fir.class<!fir.array<?x!fir.type<t>>> {
+// CHECK:           ^bb0(%[[VAL_6:.*]]: index):
+// CHECK:             %[[VAL_7:.*]] = hlfir.designate %[[VAL_2]] (%[[VAL_6]])  : (!fir.box<!fir.array<?xi64>>, index) -> !fir.ref<i64>
+// CHECK:             %[[VAL_8:.*]] = fir.load %[[VAL_7]] : !fir.ref<i64>
+// CHECK:             %[[VAL_9:.*]] = hlfir.designate %[[VAL_0]] (%[[VAL_8]])  : (!fir.class<!fir.array<?x!fir.type<t>>>, i64) -> !fir.class<!fir.type<t>>
+// CHECK:             hlfir.yield %[[VAL_9]] : !fir.class<!fir.type<t>>
+// CHECK:           }
+// CHECK:         } user_defined_assign  (%[[VAL_10:.*]]: !fir.class<!fir.type<t>>) to (%[[VAL_11:.*]]: !fir.class<!fir.type<t>>) {
+// CHECK:           fir.call @user_def_assign(%[[VAL_11]], %[[VAL_10]]) : (!fir.class<!fir.type<t>>, !fir.class<!fir.type<t>>) -> ()
+// CHECK:         }
diff --git a/flang/test/Lower/HLFIR/vector-subscript-as-value.f90 b/flang/test/Lower/HLFIR/vector-subscript-as-value.f90
index 2f463cfaa8b0..d4026a37720f 100644
--- a/flang/test/Lower/HLFIR/vector-subscript-as-value.f90
+++ b/flang/test/Lower/HLFIR/vector-subscript-as-value.f90
@@ -1,6 +1,6 @@
 ! Test lowering of vector subscript designators outside of the
 ! assignment left-and side and input IO context.
-! RUN: bbc -emit-hlfir -o - -I nw %s 2>&1 | FileCheck %s
+! RUN: bbc -emit-hlfir -o - -I nw %s --polymorphic-type 2>&1 | FileCheck %s
 
 subroutine foo(x, y)
   integer :: x(100)
@@ -182,3 +182,37 @@ end subroutine
 ! CHECK:    %[[VAL_27:.*]] = hlfir.designate %[[VAL_4]]#0 (%[[VAL_26]]) substr %[[VAL_15]], %[[VAL_16]]  typeparams %[[VAL_22]] : (!fir.box<!fir.array<?x!fir.char<1,?>>>, i64, index, index, index) -> !fir.boxchar<1>
 ! CHECK:    hlfir.yield_element %[[VAL_27]] : !fir.boxchar<1>
 ! CHECK:  }
+
+subroutine test_passing_subscripted_poly(x, vector)
+  interface
+    subroutine do_something(x)
+      class(*) :: x(:)
+    end subroutine
+  end interface
+  class(*) :: x(:, :)
+  integer(8) :: vector(:)
+  call do_something(x(314, vector))
+end subroutine
+! CHECK-LABEL:   func.func @_QPtest_passing_subscripted_poly(
+! CHECK-SAME:                                                %[[VAL_0:.*]]: !fir.class<!fir.array<?x?xnone>>
+! CHECK-SAME:                                                %[[VAL_1:.*]]: !fir.box<!fir.array<?xi64>>
+! CHECK:           %[[VAL_2:.*]]:2 = hlfir.declare %[[VAL_1]] {uniq_name = "_QFtest_passing_subscripted_polyEvector"} : (!fir.box<!fir.array<?xi64>>) -> (!fir.box<!fir.array<?xi64>>, !fir.box<!fir.array<?xi64>>)
+! CHECK:           %[[VAL_3:.*]]:2 = hlfir.declare %[[VAL_0]] {uniq_name = "_QFtest_passing_subscripted_polyEx"} : (!fir.class<!fir.array<?x?xnone>>) -> (!fir.class<!fir.array<?x?xnone>>, !fir.class<!fir.array<?x?xnone>>)
+! CHECK:           %[[VAL_4:.*]] = arith.constant 314 : index
+! CHECK:           %[[VAL_5:.*]] = arith.constant 0 : index
+! CHECK:           %[[VAL_6:.*]]:3 = fir.box_dims %[[VAL_2]]#0, %[[VAL_5]] : (!fir.box<!fir.array<?xi64>>, index) -> (index, index, index)
+! CHECK:           %[[VAL_7:.*]] = fir.shape %[[VAL_6]]#1 : (index) -> !fir.shape<1>
+! CHECK:           %[[VAL_8:.*]] = hlfir.elemental %[[VAL_7]] mold %[[VAL_3]]#0 unordered : (!fir.shape<1>, !fir.class<!fir.array<?x?xnone>>) -> !hlfir.expr<?xnone?> {
+! CHECK:           ^bb0(%[[VAL_9:.*]]: index):
+! CHECK:             %[[VAL_10:.*]] = hlfir.designate %[[VAL_2]]#0 (%[[VAL_9]])  : (!fir.box<!fir.array<?xi64>>, index) -> !fir.ref<i64>
+! CHECK:             %[[VAL_11:.*]] = fir.load %[[VAL_10]] : !fir.ref<i64>
+! CHECK:             %[[VAL_12:.*]] = hlfir.designate %[[VAL_3]]#0 (%[[VAL_4]], %[[VAL_11]])  : (!fir.class<!fir.array<?x?xnone>>, index, i64) -> !fir.class<none>
+! CHECK:             hlfir.yield_element %[[VAL_12]] : !fir.class<none>
+! CHECK:           }
+! CHECK:           %[[VAL_13:.*]]:3 = hlfir.associate %[[VAL_8]](%[[VAL_7]]) {adapt.valuebyref} : (!hlfir.expr<?xnone?>, !fir.shape<1>) -> (!fir.class<!fir.heap<!fir.array<?xnone>>>, !fir.class<!fir.heap<!fir.array<?xnone>>>, i1)
+! CHECK:           %[[VAL_14:.*]] = fir.rebox %[[VAL_13]]#0 : (!fir.class<!fir.heap<!fir.array<?xnone>>>) -> !fir.class<!fir.array<?xnone>>
+! CHECK:           fir.call @_QPdo_something(%[[VAL_14]]) fastmath<contract> : (!fir.class<!fir.array<?xnone>>) -> ()
+! CHECK:           hlfir.end_associate %[[VAL_13]]#0, %[[VAL_13]]#2 : !fir.class<!fir.heap<!fir.array<?xnone>>>, i1
+! CHECK:           hlfir.destroy %[[VAL_8]] : !hlfir.expr<?xnone?>
+! CHECK:           return
+! CHECK:         }
-- 
cgit v1.2.3


From 9d16e79aac09e68c46b89cbbad9fe7edd915b8c3 Mon Sep 17 00:00:00 2001
From: Dani <daniel.kiss@arm.com>
Date: Tue, 12 Mar 2024 10:33:16 +0100
Subject: [AArch64] Fix COMPILER_RT_HAS_AUXV for builtins. (#84816)

COMPILER_RT_HAS_AUXV is used now in builtins so the test need to be in
the builtin-config-ix.cmake too.
---
 compiler-rt/cmake/builtin-config-ix.cmake | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/compiler-rt/cmake/builtin-config-ix.cmake b/compiler-rt/cmake/builtin-config-ix.cmake
index d10222b7530a..33c97b1ac28a 100644
--- a/compiler-rt/cmake/builtin-config-ix.cmake
+++ b/compiler-rt/cmake/builtin-config-ix.cmake
@@ -1,4 +1,5 @@
 include(BuiltinTests)
+include(CheckIncludeFiles)
 include(CheckCSourceCompiles)
 
 # Make all the tests only check the compiler
@@ -43,6 +44,8 @@ void foo(void)  __arm_streaming_compatible {
 }
 ")
 
+check_include_files("sys/auxv.h"    COMPILER_RT_HAS_AUXV)
+
 if(ANDROID)
   set(OS_NAME "Android")
 else()
-- 
cgit v1.2.3


From 368db5683bb9f8c619a8a6d3d15522429ef615c6 Mon Sep 17 00:00:00 2001
From: Luke Weiler <163067703+lwmaia@users.noreply.github.com>
Date: Tue, 12 Mar 2024 02:38:36 -0700
Subject: [lldb] Fix build break on windows (#84863)

This is a one line fix for a Windows specific (I believe) build break.

The build failure looks like this:
`D:\a\_work\1\s\lldb\source\Symbol\Symtab.cpp(128): error C2440:
'<function-style-cast>': cannot convert from 'lldb_private::ConstString'
to 'llvm::StringRef'
D:\a\_work\1\s\lldb\source\Symbol\Symtab.cpp(128): note:
'llvm::StringRef::StringRef': ambiguous call to overloaded function
D:\a\_work\1\s\llvm\include\llvm/ADT/StringRef.h(840): note: could be
'llvm::StringRef::StringRef(llvm::StringRef &&)'
D:\a\_work\1\s\llvm\include\llvm/ADT/StringRef.h(104): note: or
'llvm::StringRef::StringRef(std::string_view)'
D:\a\_work\1\s\lldb\source\Symbol\Symtab.cpp(128): note: while trying to
match the argument list '(lldb_private::ConstString)'
D:\a\_work\1\s\lldb\source\Symbol\Symtab.cpp(128): error C2672:
'std::multimap<llvm::StringRef,const lldb_private::Symbol
*,std::less<llvm::StringRef>,std::allocator<std::pair<const
llvm::StringRef,const lldb_private::Symbol *>>>::emplace': no matching
overloaded function found
C:\Program Files\Microsoft Visual
Studio\2022\Enterprise\VC\Tools\MSVC\14.37.32822\include\map(557): note:
could be
'std::_Tree_iterator<std::_Tree_val<std::_Tree_simple_types<std::pair<const
llvm::StringRef,const lldb_private::Symbol *>>>>
std::multimap<llvm::StringRef,const lldb_private::Symbol
*,std::less<llvm::StringRef>,std::allocator<std::pair<const
llvm::StringRef,const lldb_private::Symbol *>>>::emplace(_Valty &&...)'
`

The StringRef constructor here is intended to take a ConstString object,
which I assume is implicitly converted to a std::string_view by
compilers other than Visual Studio's. To fix the VS build I made the
StringRef initialization more explicit, as you can see in the diff.
---
 lldb/source/Symbol/Symtab.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lldb/source/Symbol/Symtab.cpp b/lldb/source/Symbol/Symtab.cpp
index c63bbe94fece..5b5bf5c3f6f8 100644
--- a/lldb/source/Symbol/Symtab.cpp
+++ b/lldb/source/Symbol/Symtab.cpp
@@ -125,7 +125,7 @@ void Symtab::Dump(Stream *s, Target *target, SortOrder sort_order,
 
       std::multimap<llvm::StringRef, const Symbol *> name_map;
       for (const Symbol &symbol : m_symbols)
-        name_map.emplace(llvm::StringRef(symbol.GetName()), &symbol);
+        name_map.emplace(symbol.GetName().GetStringRef(), &symbol);
 
       for (const auto &name_to_symbol : name_map) {
         const Symbol *symbol = name_to_symbol.second;
-- 
cgit v1.2.3


From a3b52509d522442915a51d8aabcec1df49e95b23 Mon Sep 17 00:00:00 2001
From: Andreas Jonson <andjo403@hotmail.com>
Date: Tue, 12 Mar 2024 10:39:37 +0100
Subject: [InstSimpliy] Use range attribute to simplify comparisons (#84627)

Use the new range attribute from https://github.com/llvm/llvm-project/pull/84617
to simplify comparisons where both sides have range information.
---
 llvm/include/llvm/IR/Attributes.h              |   5 +
 llvm/include/llvm/IR/Function.h                |   3 +
 llvm/include/llvm/IR/InstrTypes.h              |  12 +++
 llvm/lib/Analysis/InstructionSimplify.cpp      |  38 ++++---
 llvm/lib/IR/Function.cpp                       |   4 +
 llvm/test/Transforms/InstCombine/icmp-range.ll | 144 ++++++++++++++++++++++++-
 6 files changed, 187 insertions(+), 19 deletions(-)

diff --git a/llvm/include/llvm/IR/Attributes.h b/llvm/include/llvm/IR/Attributes.h
index 0c2a02514ba0..7dd8a329029a 100644
--- a/llvm/include/llvm/IR/Attributes.h
+++ b/llvm/include/llvm/IR/Attributes.h
@@ -848,6 +848,11 @@ public:
     return getAttributeAtIndex(FunctionIndex, Kind);
   }
 
+  /// Return the attribute for the given attribute kind for the return value.
+  Attribute getRetAttr(Attribute::AttrKind Kind) const {
+    return getAttributeAtIndex(ReturnIndex, Kind);
+  }
+
   /// Return the alignment of the return value.
   MaybeAlign getRetAlignment() const;
 
diff --git a/llvm/include/llvm/IR/Function.h b/llvm/include/llvm/IR/Function.h
index cb87a4498032..d96d506a9b05 100644
--- a/llvm/include/llvm/IR/Function.h
+++ b/llvm/include/llvm/IR/Function.h
@@ -430,6 +430,9 @@ public:
   /// Return the attribute for the given attribute kind.
   Attribute getFnAttribute(StringRef Kind) const;
 
+  /// Return the attribute for the given attribute kind for the return value.
+  Attribute getRetAttribute(Attribute::AttrKind Kind) const;
+
   /// For a string attribute \p Kind, parse attribute as an integer.
   ///
   /// \returns \p Default if attribute is not present.
diff --git a/llvm/include/llvm/IR/InstrTypes.h b/llvm/include/llvm/IR/InstrTypes.h
index 0e81d3b391a0..fed21b992e3d 100644
--- a/llvm/include/llvm/IR/InstrTypes.h
+++ b/llvm/include/llvm/IR/InstrTypes.h
@@ -1909,6 +1909,18 @@ public:
   /// Determine whether the return value has the given attribute.
   bool hasRetAttr(StringRef Kind) const { return hasRetAttrImpl(Kind); }
 
+  /// Return the attribute for the given attribute kind for the return value.
+  Attribute getRetAttr(Attribute::AttrKind Kind) const {
+    Attribute RetAttr = Attrs.getRetAttr(Kind);
+    if (RetAttr.isValid())
+      return RetAttr;
+
+    // Look at the callee, if available.
+    if (const Function *F = getCalledFunction())
+      return F->getAttributes().getRetAttr(Kind);
+    return Attribute();
+  }
+
   /// Determine whether the argument or parameter has the given attribute.
   bool paramHasAttr(unsigned ArgNo, Attribute::AttrKind Kind) const;
 
diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp
index 8c48174b9f52..ce651783caf1 100644
--- a/llvm/lib/Analysis/InstructionSimplify.cpp
+++ b/llvm/lib/Analysis/InstructionSimplify.cpp
@@ -3729,6 +3729,26 @@ static Value *simplifyICmpWithIntrinsicOnLHS(CmpInst::Predicate Pred,
   }
 }
 
+/// Helper method to get range from metadata or attribute.
+static std::optional<ConstantRange> getRange(Value *V,
+                                             const InstrInfoQuery &IIQ) {
+  if (Instruction *I = dyn_cast<Instruction>(V))
+    if (MDNode *MD = IIQ.getMetadata(I, LLVMContext::MD_range))
+      return getConstantRangeFromMetadata(*MD);
+
+  Attribute Range;
+  if (const Argument *A = dyn_cast<Argument>(V)) {
+    Range = A->getAttribute(llvm::Attribute::Range);
+  } else if (const CallBase *CB = dyn_cast<CallBase>(V)) {
+    Range = CB->getRetAttr(llvm::Attribute::Range);
+  }
+
+  if (Range.isValid())
+    return Range.getRange();
+
+  return std::nullopt;
+}
+
 /// Given operands for an ICmpInst, see if we can fold the result.
 /// If not, this returns null.
 static Value *simplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
@@ -3776,24 +3796,14 @@ static Value *simplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
 
   // If both operands have range metadata, use the metadata
   // to simplify the comparison.
-  if (isa<Instruction>(RHS) && isa<Instruction>(LHS)) {
-    auto RHS_Instr = cast<Instruction>(RHS);
-    auto LHS_Instr = cast<Instruction>(LHS);
-
-    if (Q.IIQ.getMetadata(RHS_Instr, LLVMContext::MD_range) &&
-        Q.IIQ.getMetadata(LHS_Instr, LLVMContext::MD_range)) {
-      auto RHS_CR = getConstantRangeFromMetadata(
-          *RHS_Instr->getMetadata(LLVMContext::MD_range));
-      auto LHS_CR = getConstantRangeFromMetadata(
-          *LHS_Instr->getMetadata(LLVMContext::MD_range));
-
-      if (LHS_CR.icmp(Pred, RHS_CR))
+  if (std::optional<ConstantRange> RhsCr = getRange(RHS, Q.IIQ))
+    if (std::optional<ConstantRange> LhsCr = getRange(LHS, Q.IIQ)) {
+      if (LhsCr->icmp(Pred, *RhsCr))
         return ConstantInt::getTrue(ITy);
 
-      if (LHS_CR.icmp(CmpInst::getInversePredicate(Pred), RHS_CR))
+      if (LhsCr->icmp(CmpInst::getInversePredicate(Pred), *RhsCr))
         return ConstantInt::getFalse(ITy);
     }
-  }
 
   // Compare of cast, for example (zext X) != 0 -> X != 0
   if (isa<CastInst>(LHS) && (isa<Constant>(RHS) || isa<CastInst>(RHS))) {
diff --git a/llvm/lib/IR/Function.cpp b/llvm/lib/IR/Function.cpp
index 056e4f31981a..d22e1c123111 100644
--- a/llvm/lib/IR/Function.cpp
+++ b/llvm/lib/IR/Function.cpp
@@ -700,6 +700,10 @@ Attribute Function::getFnAttribute(StringRef Kind) const {
   return AttributeSets.getFnAttr(Kind);
 }
 
+Attribute Function::getRetAttribute(Attribute::AttrKind Kind) const {
+  return AttributeSets.getRetAttr(Kind);
+}
+
 uint64_t Function::getFnAttributeAsParsedInteger(StringRef Name,
                                                  uint64_t Default) const {
   Attribute A = getFnAttribute(Name);
diff --git a/llvm/test/Transforms/InstCombine/icmp-range.ll b/llvm/test/Transforms/InstCombine/icmp-range.ll
index 77bb5fdb6bfd..9ed2f2a4860c 100644
--- a/llvm/test/Transforms/InstCombine/icmp-range.ll
+++ b/llvm/test/Transforms/InstCombine/icmp-range.ll
@@ -149,6 +149,16 @@ define i1 @test_two_ranges(ptr nocapture readonly %arg1, ptr nocapture readonly
   ret i1 %rval
 }
 
+; Values' ranges overlap each other, so it can not be simplified.
+define i1 @test_two_attribute_ranges(i32 range(i32 5, 10) %arg1, i32 range(i32 8, 16) %arg2) {
+; CHECK-LABEL: @test_two_attribute_ranges(
+; CHECK-NEXT:    [[RVAL:%.*]] = icmp ult i32 [[ARG1:%.*]], [[ARG2:%.*]]
+; CHECK-NEXT:    ret i1 [[RVAL]]
+;
+  %rval = icmp ult i32 %arg2, %arg1
+  ret i1 %rval
+}
+
 ; Values' ranges do not overlap each other, so it can simplified to false.
 define i1 @test_two_ranges2(ptr nocapture readonly %arg1, ptr nocapture readonly %arg2) {
 ; CHECK-LABEL: @test_two_ranges2(
@@ -160,6 +170,35 @@ define i1 @test_two_ranges2(ptr nocapture readonly %arg1, ptr nocapture readonly
   ret i1 %rval
 }
 
+; Values' ranges do not overlap each other, so it can simplified to false.
+define i1 @test_two_argument_ranges(i32 range(i32 1, 6) %arg1, i32 range(i32 8, 16) %arg2) {
+; CHECK-LABEL: @test_two_argument_ranges(
+; CHECK-NEXT:    ret i1 false
+;
+  %rval = icmp ult i32 %arg2, %arg1
+  ret i1 %rval
+}
+
+; Values' ranges do not overlap each other, so it can simplified to false.
+define i1 @test_one_range_and_one_argument_range(ptr nocapture readonly %arg1, i32 range(i32 8, 16) %arg2) {
+; CHECK-LABEL: @test_one_range_and_one_argument_range(
+; CHECK-NEXT:    ret i1 false
+;
+  %val1 = load i32, ptr %arg1, !range !0
+  %rval = icmp ult i32 %arg2, %val1
+  ret i1 %rval
+}
+
+; Values' ranges do not overlap each other, so it can simplified to false.
+define i1 @test_one_argument_range_and_one_range(i32 range(i32 1, 6) %arg1, ptr nocapture readonly %arg2) {
+; CHECK-LABEL: @test_one_argument_range_and_one_range(
+; CHECK-NEXT:    ret i1 false
+;
+  %val1 = load i32, ptr %arg2, !range !6
+  %rval = icmp ult i32 %val1, %arg1
+  ret i1 %rval
+}
+
 ; Values' ranges do not overlap each other, so it can simplified to true.
 define i1 @test_two_ranges3(ptr nocapture readonly %arg1, ptr nocapture readonly %arg2) {
 ; CHECK-LABEL: @test_two_ranges3(
@@ -186,8 +225,8 @@ define <2 x i1> @test_two_ranges_vec(ptr nocapture readonly %arg1, ptr nocapture
 }
 
 ; Values' ranges do not overlap each other, so it can simplified to false.
-define <2 x i1> @test_two_ranges_vec_true(ptr nocapture readonly %arg1, ptr nocapture readonly %arg2) {
-; CHECK-LABEL: @test_two_ranges_vec_true(
+define <2 x i1> @test_two_ranges_vec_false(ptr nocapture readonly %arg1, ptr nocapture readonly %arg2) {
+; CHECK-LABEL: @test_two_ranges_vec_false(
 ; CHECK-NEXT:    ret <2 x i1> zeroinitializer
 ;
   %val1 = load <2 x i32>, ptr %arg1, !range !0
@@ -196,9 +235,9 @@ define <2 x i1> @test_two_ranges_vec_true(ptr nocapture readonly %arg1, ptr noca
   ret <2 x i1> %rval
 }
 
-; Values' ranges do not overlap each other, so it can simplified to false.
-define <2 x i1> @test_two_ranges_vec_false(ptr nocapture readonly %arg1, ptr nocapture readonly %arg2) {
-; CHECK-LABEL: @test_two_ranges_vec_false(
+; Values' ranges do not overlap each other, so it can simplified to true.
+define <2 x i1> @test_two_ranges_vec_true(ptr nocapture readonly %arg1, ptr nocapture readonly %arg2) {
+; CHECK-LABEL: @test_two_ranges_vec_true(
 ; CHECK-NEXT:    ret <2 x i1> <i1 true, i1 true>
 ;
   %val1 = load <2 x i32>, ptr %arg1, !range !0
@@ -207,6 +246,101 @@ define <2 x i1> @test_two_ranges_vec_false(ptr nocapture readonly %arg1, ptr noc
   ret <2 x i1> %rval
 }
 
+; Values' ranges overlap each other, so it can not be simplified.
+define <2 x i1> @test_two_argument_ranges_vec(<2 x i32> range(i32 5, 10) %arg1, <2 x i32> range(i32 8, 16) %arg2) {
+; CHECK-LABEL: @test_two_argument_ranges_vec(
+; CHECK-NEXT:    [[RVAL:%.*]] = icmp ult <2 x i32> [[VAL2:%.*]], [[VAL1:%.*]]
+; CHECK-NEXT:    ret <2 x i1> [[RVAL]]
+;
+  %rval = icmp ult <2 x i32> %arg2, %arg1
+  ret <2 x i1> %rval
+}
+
+; Values' ranges do not overlap each other, so it can simplified to false.
+define <2 x i1> @test_two_argument_ranges_vec_false(<2 x i32> range(i32 1, 6) %arg1, <2 x i32> range(i32 8, 16) %arg2) {
+; CHECK-LABEL: @test_two_argument_ranges_vec_false(
+; CHECK-NEXT:    ret <2 x i1> zeroinitializer
+;
+  %rval = icmp ult <2 x i32> %arg2, %arg1
+  ret <2 x i1> %rval
+}
+
+; Values' ranges do not overlap each other, so it can simplified to true.
+define <2 x i1> @test_two_argument_ranges_vec_true(<2 x i32> range(i32 1, 6) %arg1, <2 x i32> range(i32 8, 16) %arg2) {
+; CHECK-LABEL: @test_two_argument_ranges_vec_true(
+; CHECK-NEXT:    ret <2 x i1> <i1 true, i1 true>
+;
+  %rval = icmp ugt <2 x i32> %arg2, %arg1
+  ret <2 x i1> %rval
+}
+
+declare i32 @create_range1()
+declare range(i32 8, 16) i32 @create_range2()
+declare range(i32 1, 6) i32 @create_range3()
+
+; Values' ranges overlap each other, so it can not be simplified.
+define i1 @test_two_return_attribute_ranges_not_simplified() {
+; CHECK-LABEL: @test_two_return_attribute_ranges_not_simplified(
+; CHECK-NEXT:    [[ARG2:%.*]] = call range(i32 5, 10) i32 @create_range1()
+; CHECK-NEXT:    [[ARG1:%.*]] = call i32 @create_range2()
+; CHECK-NEXT:    [[RVAL:%.*]] = icmp ult i32 [[ARG1]], [[ARG2]]
+; CHECK-NEXT:    ret i1 [[RVAL]]
+;
+  %val1 = call range(i32 5, 10) i32 @create_range1()
+  %val2 = call i32 @create_range2()
+  %rval = icmp ult i32 %val2, %val1
+  ret i1 %rval
+}
+
+; Values' ranges do not overlap each other, so it can simplified to false.
+define i1 @test_two_return_attribute_ranges_one_in_call() {
+; CHECK-LABEL: @test_two_return_attribute_ranges_one_in_call(
+; CHECK-NEXT:    [[VAL1:%.*]] = call range(i32 1, 6) i32 @create_range1()
+; CHECK-NEXT:    [[ARG1:%.*]] = call i32 @create_range2()
+; CHECK-NEXT:    ret i1 false
+;
+  %val1 = call range(i32 1, 6) i32 @create_range1()
+  %val2 = call i32 @create_range2()
+  %rval = icmp ult i32 %val2, %val1
+  ret i1 %rval
+}
+
+; Values' ranges do not overlap each other, so it can simplified to false.
+define i1 @test_two_return_attribute_ranges() {
+; CHECK-LABEL: @test_two_return_attribute_ranges(
+; CHECK-NEXT:    [[VAL1:%.*]] = call i32 @create_range3()
+; CHECK-NEXT:    [[ARG1:%.*]] = call i32 @create_range2()
+; CHECK-NEXT:    ret i1 false
+;
+  %val1 = call i32 @create_range3()
+  %val2 = call i32 @create_range2()
+  %rval = icmp ult i32 %val2, %val1
+  ret i1 %rval
+}
+
+; Values' ranges do not overlap each other, so it can simplified to false.
+define i1 @test_one_return_argument_and_one_argument_range(i32 range(i32 8, 16) %arg1) {
+; CHECK-LABEL: @test_one_return_argument_and_one_argument_range(
+; CHECK-NEXT:    [[VAL1:%.*]] = call i32 @create_range3()
+; CHECK-NEXT:    ret i1 false
+;
+  %val1 = call i32 @create_range3()
+  %rval = icmp ult i32 %arg1, %val1
+  ret i1 %rval
+}
+
+; Values' ranges do not overlap each other, so it can simplified to false.
+define i1 @test_one_range_and_one_return_argument(ptr nocapture readonly %arg1) {
+; CHECK-LABEL: @test_one_range_and_one_return_argument(
+; CHECK-NEXT:    [[VAL1:%.*]] = call i32 @create_range3()
+; CHECK-NEXT:    ret i1 false
+;
+  %val1 = call i32 @create_range3()
+  %val2 = load i32, ptr %arg1, !range !6
+  %rval = icmp ult i32 %val2, %val1
+  ret i1 %rval
+}
+
 define i1 @ugt_zext(i1 %b, i8 %x) {
 ; CHECK-LABEL: @ugt_zext(
 ; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i8 [[X:%.*]], 0
-- 
cgit v1.2.3


From bba4a1daff6ee09941f1369a4e56b4af95efdc5c Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo@fhahn.com>
Date: Tue, 12 Mar 2024 09:47:42 +0000
Subject: [ArgPromotion] Remove incorrect TranspBlocks set for loads. (#84835)

The TranspBlocks set was used to cache aliasing decision for all
processed loads in the parent loop. This is incorrect, because each load
can access a different location, which means one load not being modified
in a block doesn't translate to another load not being modified in the
same block.

All loads access the same underlying object, so we could perhaps use a
location without size for all loads and retain the cache, but that would
mean we loose precision.

For now, just drop the cache.

Fixes https://github.com/llvm/llvm-project/issues/84807

PR: https://github.com/llvm/llvm-project/pull/84835
---
 llvm/lib/Transforms/IPO/ArgumentPromotion.cpp              |  6 +-----
 .../aliasing-and-non-aliasing-loads-with-clobber.ll        | 14 +++++++-------
 2 files changed, 8 insertions(+), 12 deletions(-)

diff --git a/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp b/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
index e89ec353487e..3aa8ea3f5147 100644
--- a/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
+++ b/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
@@ -653,10 +653,6 @@ static bool findArgParts(Argument *Arg, const DataLayout &DL, AAResults &AAR,
   // check to see if the pointer is guaranteed to not be modified from entry of
   // the function to each of the load instructions.
 
-  // Because there could be several/many load instructions, remember which
-  // blocks we know to be transparent to the load.
-  df_iterator_default_set<BasicBlock *, 16> TranspBlocks;
-
   for (LoadInst *Load : Loads) {
     // Check to see if the load is invalidated from the start of the block to
     // the load itself.
@@ -670,7 +666,7 @@ static bool findArgParts(Argument *Arg, const DataLayout &DL, AAResults &AAR,
     // To do this, we perform a depth first search on the inverse CFG from the
     // loading block.
     for (BasicBlock *P : predecessors(BB)) {
-      for (BasicBlock *TranspBB : inverse_depth_first_ext(P, TranspBlocks))
+      for (BasicBlock *TranspBB : inverse_depth_first(P))
         if (AAR.canBasicBlockModify(*TranspBB, Loc))
           return false;
     }
diff --git a/llvm/test/Transforms/ArgumentPromotion/aliasing-and-non-aliasing-loads-with-clobber.ll b/llvm/test/Transforms/ArgumentPromotion/aliasing-and-non-aliasing-loads-with-clobber.ll
index 69385a7ea51a..1e1669b29b0d 100644
--- a/llvm/test/Transforms/ArgumentPromotion/aliasing-and-non-aliasing-loads-with-clobber.ll
+++ b/llvm/test/Transforms/ArgumentPromotion/aliasing-and-non-aliasing-loads-with-clobber.ll
@@ -7,17 +7,14 @@ target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:
 
 ; Test case for https://github.com/llvm/llvm-project/issues/84807.
 
-; FIXME: Currently the loads from @callee are moved to @caller, even though
-;        the store in %then may aliases to load from %q.
+; Make sure the loads from @callee are not moved to @caller, as the store
+; in %then may aliases to load from %q.
 
 define i32 @caller1(i1 %c) {
 ; CHECK-LABEL: define i32 @caller1(
 ; CHECK-SAME: i1 [[C:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[F_VAL:%.*]] = load i16, ptr @f, align 8
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr i8, ptr @f, i64 8
-; CHECK-NEXT:    [[F_VAL1:%.*]] = load i64, ptr [[TMP0]], align 8
-; CHECK-NEXT:    call void @callee1(i16 [[F_VAL]], i64 [[F_VAL1]], i1 [[C]])
+; CHECK-NEXT:    call void @callee1(ptr noundef nonnull @f, i1 [[C]])
 ; CHECK-NEXT:    ret i32 0
 ;
 entry:
@@ -27,13 +24,16 @@ entry:
 
 define internal void @callee1(ptr nocapture noundef readonly %q, i1 %c) {
 ; CHECK-LABEL: define internal void @callee1(
-; CHECK-SAME: i16 [[Q_0_VAL:%.*]], i64 [[Q_8_VAL:%.*]], i1 [[C:%.*]]) {
+; CHECK-SAME: ptr nocapture noundef readonly [[Q:%.*]], i1 [[C:%.*]]) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 [[C]], label [[THEN:%.*]], label [[EXIT:%.*]]
 ; CHECK:       then:
 ; CHECK-NEXT:    store i16 123, ptr @f, align 8
 ; CHECK-NEXT:    br label [[EXIT]]
 ; CHECK:       exit:
+; CHECK-NEXT:    [[Q_0_VAL:%.*]] = load i16, ptr [[Q]], align 8
+; CHECK-NEXT:    [[GEP_8:%.*]] = getelementptr inbounds i8, ptr [[Q]], i64 8
+; CHECK-NEXT:    [[Q_8_VAL:%.*]] = load i64, ptr [[GEP_8]], align 8
 ; CHECK-NEXT:    call void @use(i16 [[Q_0_VAL]], i64 [[Q_8_VAL]])
 ; CHECK-NEXT:    ret void
 ;
-- 
cgit v1.2.3


From 9228859c2a5aed307dc61edb4cfd6bee7b4c5949 Mon Sep 17 00:00:00 2001
From: David Stuttard <david.stuttard@amd.com>
Date: Tue, 12 Mar 2024 10:07:02 +0000
Subject: [CMake] Add tablegen job pool support (#84762)

Add the ability to set the number of tablegen jobs that can run in
parallel
similar to the LLVM_PARALLEL_[COMPILE|LINK]_JOBS options that already
exist.
---
 llvm/cmake/modules/HandleLLVMOptions.cmake | 24 +++++++++++++++++++++++-
 llvm/cmake/modules/TableGen.cmake          |  7 +++++++
 llvm/docs/CMake.rst                        |  8 ++++++++
 llvm/docs/GettingStarted.rst               |  6 +++---
 4 files changed, 41 insertions(+), 4 deletions(-)

diff --git a/llvm/cmake/modules/HandleLLVMOptions.cmake b/llvm/cmake/modules/HandleLLVMOptions.cmake
index eca2962cf820..745a8354f118 100644
--- a/llvm/cmake/modules/HandleLLVMOptions.cmake
+++ b/llvm/cmake/modules/HandleLLVMOptions.cmake
@@ -36,7 +36,7 @@ string(TOUPPER "${LLVM_ENABLE_LTO}" uppercase_LLVM_ENABLE_LTO)
 # The following only works with the Ninja generator in CMake >= 3.0.
 set(LLVM_PARALLEL_COMPILE_JOBS "" CACHE STRING
   "Define the maximum number of concurrent compilation jobs (Ninja only).")
-if(LLVM_RAM_PER_COMPILE_JOB OR LLVM_RAM_PER_LINK_JOB)
+if(LLVM_RAM_PER_COMPILE_JOB OR LLVM_RAM_PER_LINK_JOB OR LLVM_RAM_PER_TABLEGEN_JOB)
   cmake_host_system_information(RESULT available_physical_memory QUERY AVAILABLE_PHYSICAL_MEMORY)
   cmake_host_system_information(RESULT number_of_logical_cores QUERY NUMBER_OF_LOGICAL_CORES)
 endif()
@@ -86,6 +86,28 @@ elseif(LLVM_PARALLEL_LINK_JOBS)
   message(WARNING "Job pooling is only available with Ninja generators.")
 endif()
 
+set(LLVM_PARALLEL_TABLEGEN_JOBS "" CACHE STRING
+  "Define the maximum number of concurrent tablegen jobs (Ninja only).")
+if(LLVM_RAM_PER_TABLEGEN_JOB)
+  math(EXPR jobs_with_sufficient_memory "${available_physical_memory} / ${LLVM_RAM_PER_TABLEGEN_JOB}" OUTPUT_FORMAT DECIMAL)
+  if (jobs_with_sufficient_memory LESS 1)
+    set(jobs_with_sufficient_memory 1)
+  endif()
+  if (jobs_with_sufficient_memory LESS number_of_logical_cores)
+    set(LLVM_PARALLEL_TABLEGEN_JOBS "${jobs_with_sufficient_memory}")
+  else()
+    set(LLVM_PARALLEL_TABLEGEN_JOBS "${number_of_logical_cores}")
+  endif()
+endif()
+if(LLVM_PARALLEL_TABLEGEN_JOBS)
+  if(NOT CMAKE_GENERATOR MATCHES "Ninja")
+    message(WARNING "Job pooling is only available with Ninja generators.")
+  else()
+    set_property(GLOBAL APPEND PROPERTY JOB_POOLS tablegen_job_pool=${LLVM_PARALLEL_TABLEGEN_JOBS})
+    # Job pool for tablegen is set on the add_custom_command
+  endif()
+endif()
+
 if( LLVM_ENABLE_ASSERTIONS )
   # MSVC doesn't like _DEBUG on release builds. See PR 4379.
   if( NOT MSVC )
diff --git a/llvm/cmake/modules/TableGen.cmake b/llvm/cmake/modules/TableGen.cmake
index 1d18fdde2bb9..df91598c404f 100644
--- a/llvm/cmake/modules/TableGen.cmake
+++ b/llvm/cmake/modules/TableGen.cmake
@@ -125,6 +125,12 @@ function(tablegen project ofn)
   set(tablegen_exe ${${project}_TABLEGEN_EXE})
   set(tablegen_depends ${${project}_TABLEGEN_TARGET} ${tablegen_exe})
 
+  if(LLVM_PARALLEL_TABLEGEN_JOBS)
+    set(LLVM_TABLEGEN_JOB_POOL JOB_POOL tablegen_job_pool)
+  else()
+    set(LLVM_TABLEGEN_JOB_POOL "")
+  endif()
+
   add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/${ofn}
     COMMAND ${tablegen_exe} ${ARG_UNPARSED_ARGUMENTS} -I ${CMAKE_CURRENT_SOURCE_DIR}
     ${tblgen_includes}
@@ -139,6 +145,7 @@ function(tablegen project ofn)
       ${local_tds} ${global_tds}
     ${LLVM_TARGET_DEFINITIONS_ABSOLUTE}
     ${LLVM_TARGET_DEPENDS}
+    ${LLVM_TABLEGEN_JOB_POOL}
     COMMENT "Building ${ofn}..."
     )
 
diff --git a/llvm/docs/CMake.rst b/llvm/docs/CMake.rst
index 1490b38feb1e..d2f66d71d39a 100644
--- a/llvm/docs/CMake.rst
+++ b/llvm/docs/CMake.rst
@@ -762,6 +762,9 @@ enabled sub-projects. Nearly all of these variable names begin with
 **LLVM_PARALLEL_LINK_JOBS**:STRING
   Define the maximum number of concurrent link jobs.
 
+**LLVM_PARALLEL_TABLEGEN_JOBS**:STRING
+  Define the maximum number of concurrent tablegen jobs.
+
 **LLVM_RAM_PER_COMPILE_JOB**:STRING
   Calculates the amount of Ninja compile jobs according to available resources.
   Value has to be in MB, overwrites LLVM_PARALLEL_COMPILE_JOBS. Compile jobs 
@@ -775,6 +778,11 @@ enabled sub-projects. Nearly all of these variable names begin with
   to be sure its not terminated in your memory restricted environment. On ELF
   platforms also consider ``LLVM_USE_SPLIT_DWARF`` in Debug build.
 
+**LLVM_RAM_PER_TABLEGEN_JOB**:STRING
+  Calculates the amount of Ninja tablegen jobs according to available resources.
+  Value has to be in MB, overwrites LLVM_PARALLEL_TABLEGEN_JOBS. Tablegen jobs
+  will be between one and amount of logical cores.
+
 **LLVM_PROFDATA_FILE**:PATH
   Path to a profdata file to pass into clang's -fprofile-instr-use flag. This
   can only be specified if you're building with clang.
diff --git a/llvm/docs/GettingStarted.rst b/llvm/docs/GettingStarted.rst
index 7634199babba..705f6427d9ed 100644
--- a/llvm/docs/GettingStarted.rst
+++ b/llvm/docs/GettingStarted.rst
@@ -90,11 +90,11 @@ Getting the Source Code and Building LLVM
        is installed on your system. This can dramatically speed up link times
        if the default linker is slow.
 
-     * ``-DLLVM_PARALLEL_{COMPILE,LINK}_JOBS=N`` --- Limit the number of
-       compile/link jobs running in parallel at the same time. This is
+     * ``-DLLVM_PARALLEL_{COMPILE,LINK,TABLEGEN}_JOBS=N`` --- Limit the number of
+       compile/link/tablegen jobs running in parallel at the same time. This is
        especially important for linking since linking can use lots of memory. If
        you run into memory issues building LLVM, try setting this to limit the
-       maximum number of compile/link jobs running at the same time.
+       maximum number of compile/link/tablegen jobs running at the same time.
 
    * ``cmake --build build [--target <target>]`` or the build system specified
      above directly.
-- 
cgit v1.2.3


From ce1fd9281707c2163728085d126ff83041e1db51 Mon Sep 17 00:00:00 2001
From: Danial Klimkin <dklimkin@google.com>
Date: Tue, 12 Mar 2024 11:19:48 +0100
Subject: Update test past bdbad0d07bb600301cb324e87a6be37ca4af591a (#84889)

---
 .../data-formatter/builtin-formats/TestBuiltinFormats.py                | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lldb/test/API/functionalities/data-formatter/builtin-formats/TestBuiltinFormats.py b/lldb/test/API/functionalities/data-formatter/builtin-formats/TestBuiltinFormats.py
index 8c3bdabeaac1..4d6f44db0195 100644
--- a/lldb/test/API/functionalities/data-formatter/builtin-formats/TestBuiltinFormats.py
+++ b/lldb/test/API/functionalities/data-formatter/builtin-formats/TestBuiltinFormats.py
@@ -308,5 +308,5 @@ class TestCase(TestBase):
     @no_debug_info_test
     def test_instruction(self):
         self.assertIn(
-            "  addq   0xa(%rdi), %r8\n", self.getFormatted("instruction", "0x0a47034c")
+            "= addq   0xa(%rdi), %r8\n", self.getFormatted("instruction", "0x0a47034c")
         )
-- 
cgit v1.2.3


From 9997e0397156ff7e01aecbd17bdeb7bfe5fb15b0 Mon Sep 17 00:00:00 2001
From: Orlando Cazalet-Hyams <orlando.hyams@sony.com>
Date: Tue, 12 Mar 2024 10:25:58 +0000
Subject: [RemoveDIs] Update DIBuilder to conditionally insert DbgRecords
 (#84739)

Have DIBuilder conditionally insert either debug intrinsics or DbgRecord
depending on the module's IsNewDbgInfoFormat flag. The insertion methods
now return a `DbgInstPtr` (a `PointerUnion<Instruction *, DbgRecord
*>`).

Add a unittest for both modes (I couldn't find an existing test testing
insertion behaviours specifically).

This patch changes the existing assumption that DbgRecords are only ever
inserted if there's an instruction to insert-before because clang
currently inserts debug intrinsics while CodeGening (like any other
instruction) meaning it'll try inserting to the end of a block without a
terminator. We already have machinery in place to maintain the
DbgRecords when a terminator is removed - these become "trailing
DbgRecords" which are re-attached when a new instruction is inserted.
All I've done is allow this state to occur while inserting DbgRecords
too, i.e., it's not only removing terminators that causes this valid
transient state, but inserting DbgRecords into incomplete blocks too.

The C API will be updated in follow up patches.

---

Note: this doesn't mean clang is emitting DbgRecords yet, because the
modules it creates are still always in the old debug mode. That will
come in a future patch.
---
 llvm/include/llvm/IR/DIBuilder.h                   |  73 +++++-----
 llvm/lib/IR/BasicBlock.cpp                         |  13 +-
 llvm/lib/IR/DIBuilder.cpp                          | 131 ++++++++++++------
 llvm/lib/IR/DebugInfo.cpp                          |  89 ++++++------
 llvm/lib/IR/Instruction.cpp                        |   3 +-
 llvm/lib/Transforms/Scalar/SROA.cpp                |  47 +++----
 llvm/lib/Transforms/Utils/Local.cpp                |  12 +-
 .../Transforms/Utils/PromoteMemoryToRegister.cpp   |  25 ++--
 llvm/unittests/IR/IRBuilderTest.cpp                | 152 ++++++++++++++++++---
 9 files changed, 358 insertions(+), 187 deletions(-)

diff --git a/llvm/include/llvm/IR/DIBuilder.h b/llvm/include/llvm/IR/DIBuilder.h
index edec161b3971..94af17af8160 100644
--- a/llvm/include/llvm/IR/DIBuilder.h
+++ b/llvm/include/llvm/IR/DIBuilder.h
@@ -38,6 +38,9 @@ namespace llvm {
   class Module;
   class Value;
   class DbgAssignIntrinsic;
+  class DbgRecord;
+
+  using DbgInstPtr = PointerUnion<Instruction *, DbgRecord *>;
 
   class DIBuilder {
     Module &M;
@@ -90,13 +93,17 @@ namespace llvm {
     void trackIfUnresolved(MDNode *N);
 
     /// Internal helper for insertDeclare.
-    Instruction *insertDeclare(llvm::Value *Storage, DILocalVariable *VarInfo,
-                               DIExpression *Expr, const DILocation *DL,
-                               BasicBlock *InsertBB, Instruction *InsertBefore);
+    DbgInstPtr insertDeclare(llvm::Value *Storage, DILocalVariable *VarInfo,
+                             DIExpression *Expr, const DILocation *DL,
+                             BasicBlock *InsertBB, Instruction *InsertBefore);
 
     /// Internal helper for insertLabel.
-    Instruction *insertLabel(DILabel *LabelInfo, const DILocation *DL,
-                             BasicBlock *InsertBB, Instruction *InsertBefore);
+    DbgInstPtr insertLabel(DILabel *LabelInfo, const DILocation *DL,
+                           BasicBlock *InsertBB, Instruction *InsertBefore);
+
+    /// Internal helper. Track metadata if untracked and insert \p DPV.
+    void insertDPValue(DPValue *DPV, BasicBlock *InsertBB,
+                       Instruction *InsertBefore, bool InsertAtHead = false);
 
     /// Internal helper with common code used by insertDbg{Value,Addr}Intrinsic.
     Instruction *insertDbgIntrinsic(llvm::Function *Intrinsic, llvm::Value *Val,
@@ -106,10 +113,11 @@ namespace llvm {
                                     Instruction *InsertBefore);
 
     /// Internal helper for insertDbgValueIntrinsic.
-    Instruction *
-    insertDbgValueIntrinsic(llvm::Value *Val, DILocalVariable *VarInfo,
-                            DIExpression *Expr, const DILocation *DL,
-                            BasicBlock *InsertBB, Instruction *InsertBefore);
+    DbgInstPtr insertDbgValueIntrinsic(llvm::Value *Val,
+                                       DILocalVariable *VarInfo,
+                                       DIExpression *Expr, const DILocation *DL,
+                                       BasicBlock *InsertBB,
+                                       Instruction *InsertBefore);
 
   public:
     /// Construct a builder for a module.
@@ -921,9 +929,9 @@ namespace llvm {
     /// \param Expr        A complex location expression.
     /// \param DL          Debug info location.
     /// \param InsertAtEnd Location for the new intrinsic.
-    Instruction *insertDeclare(llvm::Value *Storage, DILocalVariable *VarInfo,
-                               DIExpression *Expr, const DILocation *DL,
-                               BasicBlock *InsertAtEnd);
+    DbgInstPtr insertDeclare(llvm::Value *Storage, DILocalVariable *VarInfo,
+                             DIExpression *Expr, const DILocation *DL,
+                             BasicBlock *InsertAtEnd);
 
     /// Insert a new llvm.dbg.assign intrinsic call.
     /// \param LinkedInstr   Instruction with a DIAssignID to link with the new
@@ -939,11 +947,10 @@ namespace llvm {
     /// \param DL            Debug info location, usually: (line: 0,
     ///                      column: 0, scope: var-decl-scope). See
     ///                      getDebugValueLoc.
-    DbgAssignIntrinsic *insertDbgAssign(Instruction *LinkedInstr, Value *Val,
-                                        DILocalVariable *SrcVar,
-                                        DIExpression *ValExpr, Value *Addr,
-                                        DIExpression *AddrExpr,
-                                        const DILocation *DL);
+    DbgInstPtr insertDbgAssign(Instruction *LinkedInstr, Value *Val,
+                               DILocalVariable *SrcVar, DIExpression *ValExpr,
+                               Value *Addr, DIExpression *AddrExpr,
+                               const DILocation *DL);
 
     /// Insert a new llvm.dbg.declare intrinsic call.
     /// \param Storage      llvm::Value of the variable
@@ -951,23 +958,23 @@ namespace llvm {
     /// \param Expr         A complex location expression.
     /// \param DL           Debug info location.
     /// \param InsertBefore Location for the new intrinsic.
-    Instruction *insertDeclare(llvm::Value *Storage, DILocalVariable *VarInfo,
-                               DIExpression *Expr, const DILocation *DL,
-                               Instruction *InsertBefore);
+    DbgInstPtr insertDeclare(llvm::Value *Storage, DILocalVariable *VarInfo,
+                             DIExpression *Expr, const DILocation *DL,
+                             Instruction *InsertBefore);
 
     /// Insert a new llvm.dbg.label intrinsic call.
     /// \param LabelInfo    Label's debug info descriptor.
     /// \param DL           Debug info location.
     /// \param InsertBefore Location for the new intrinsic.
-    Instruction *insertLabel(DILabel *LabelInfo, const DILocation *DL,
-                             Instruction *InsertBefore);
+    DbgInstPtr insertLabel(DILabel *LabelInfo, const DILocation *DL,
+                           Instruction *InsertBefore);
 
     /// Insert a new llvm.dbg.label intrinsic call.
     /// \param LabelInfo    Label's debug info descriptor.
     /// \param DL           Debug info location.
     /// \param InsertAtEnd Location for the new intrinsic.
-    Instruction *insertLabel(DILabel *LabelInfo, const DILocation *DL,
-                             BasicBlock *InsertAtEnd);
+    DbgInstPtr insertLabel(DILabel *LabelInfo, const DILocation *DL,
+                           BasicBlock *InsertAtEnd);
 
     /// Insert a new llvm.dbg.value intrinsic call.
     /// \param Val          llvm::Value of the variable
@@ -975,11 +982,10 @@ namespace llvm {
     /// \param Expr         A complex location expression.
     /// \param DL           Debug info location.
     /// \param InsertAtEnd Location for the new intrinsic.
-    Instruction *insertDbgValueIntrinsic(llvm::Value *Val,
-                                         DILocalVariable *VarInfo,
-                                         DIExpression *Expr,
-                                         const DILocation *DL,
-                                         BasicBlock *InsertAtEnd);
+    DbgInstPtr insertDbgValueIntrinsic(llvm::Value *Val,
+                                       DILocalVariable *VarInfo,
+                                       DIExpression *Expr, const DILocation *DL,
+                                       BasicBlock *InsertAtEnd);
 
     /// Insert a new llvm.dbg.value intrinsic call.
     /// \param Val          llvm::Value of the variable
@@ -987,11 +993,10 @@ namespace llvm {
     /// \param Expr         A complex location expression.
     /// \param DL           Debug info location.
     /// \param InsertBefore Location for the new intrinsic.
-    Instruction *insertDbgValueIntrinsic(llvm::Value *Val,
-                                         DILocalVariable *VarInfo,
-                                         DIExpression *Expr,
-                                         const DILocation *DL,
-                                         Instruction *InsertBefore);
+    DbgInstPtr insertDbgValueIntrinsic(llvm::Value *Val,
+                                       DILocalVariable *VarInfo,
+                                       DIExpression *Expr, const DILocation *DL,
+                                       Instruction *InsertBefore);
 
     /// Replace the vtable holder in the given type.
     ///
diff --git a/llvm/lib/IR/BasicBlock.cpp b/llvm/lib/IR/BasicBlock.cpp
index c188d2f912d1..673e2f68249c 100644
--- a/llvm/lib/IR/BasicBlock.cpp
+++ b/llvm/lib/IR/BasicBlock.cpp
@@ -754,8 +754,6 @@ void BasicBlock::spliceDebugInfoEmptyBlock(BasicBlock::iterator Dest,
   // occur when a block is optimised away and the terminator has been moved
   // somewhere else.
   if (Src->empty()) {
-    assert(Dest != end() &&
-           "Transferring trailing DPValues to another trailing position");
     DPMarker *SrcTrailingDPValues = Src->getTrailingDPValues();
     if (!SrcTrailingDPValues)
       return;
@@ -1040,15 +1038,10 @@ void BasicBlock::insertDPValueAfter(DbgRecord *DPV, Instruction *I) {
 
 void BasicBlock::insertDPValueBefore(DbgRecord *DPV,
                                      InstListType::iterator Where) {
-  // We should never directly insert at the end of the block, new DPValues
-  // shouldn't be generated at times when there's no terminator.
-  assert(Where != end());
-  assert(Where->getParent() == this);
-  if (!Where->DbgMarker)
-    createMarker(Where);
+  assert(Where == end() || Where->getParent() == this);
   bool InsertAtHead = Where.getHeadBit();
-  createMarker(&*Where);
-  Where->DbgMarker->insertDPValue(DPV, InsertAtHead);
+  DPMarker *M = createMarker(Where);
+  M->insertDPValue(DPV, InsertAtHead);
 }
 
 DPMarker *BasicBlock::getNextMarker(Instruction *I) {
diff --git a/llvm/lib/IR/DIBuilder.cpp b/llvm/lib/IR/DIBuilder.cpp
index 62efaba02534..c0643f63c972 100644
--- a/llvm/lib/IR/DIBuilder.cpp
+++ b/llvm/lib/IR/DIBuilder.cpp
@@ -925,35 +925,47 @@ DILexicalBlock *DIBuilder::createLexicalBlock(DIScope *Scope, DIFile *File,
                                      File, Line, Col);
 }
 
-Instruction *DIBuilder::insertDeclare(Value *Storage, DILocalVariable *VarInfo,
-                                      DIExpression *Expr, const DILocation *DL,
-                                      Instruction *InsertBefore) {
+DbgInstPtr DIBuilder::insertDeclare(Value *Storage, DILocalVariable *VarInfo,
+                                    DIExpression *Expr, const DILocation *DL,
+                                    Instruction *InsertBefore) {
   return insertDeclare(Storage, VarInfo, Expr, DL, InsertBefore->getParent(),
                        InsertBefore);
 }
 
-Instruction *DIBuilder::insertDeclare(Value *Storage, DILocalVariable *VarInfo,
-                                      DIExpression *Expr, const DILocation *DL,
-                                      BasicBlock *InsertAtEnd) {
+DbgInstPtr DIBuilder::insertDeclare(Value *Storage, DILocalVariable *VarInfo,
+                                    DIExpression *Expr, const DILocation *DL,
+                                    BasicBlock *InsertAtEnd) {
   // If this block already has a terminator then insert this intrinsic before
   // the terminator. Otherwise, put it at the end of the block.
   Instruction *InsertBefore = InsertAtEnd->getTerminator();
   return insertDeclare(Storage, VarInfo, Expr, DL, InsertAtEnd, InsertBefore);
 }
 
-DbgAssignIntrinsic *
-DIBuilder::insertDbgAssign(Instruction *LinkedInstr, Value *Val,
-                           DILocalVariable *SrcVar, DIExpression *ValExpr,
-                           Value *Addr, DIExpression *AddrExpr,
-                           const DILocation *DL) {
+DbgInstPtr DIBuilder::insertDbgAssign(Instruction *LinkedInstr, Value *Val,
+                                      DILocalVariable *SrcVar,
+                                      DIExpression *ValExpr, Value *Addr,
+                                      DIExpression *AddrExpr,
+                                      const DILocation *DL) {
+  auto *Link = cast_or_null<DIAssignID>(
+      LinkedInstr->getMetadata(LLVMContext::MD_DIAssignID));
+  assert(Link && "Linked instruction must have DIAssign metadata attached");
+
+  if (M.IsNewDbgInfoFormat) {
+    DPValue *DPV = DPValue::createDPVAssign(Val, SrcVar, ValExpr, Link, Addr,
+                                            AddrExpr, DL);
+    BasicBlock *InsertBB = LinkedInstr->getParent();
+    // Insert after LinkedInstr.
+    BasicBlock::iterator NextIt = std::next(LinkedInstr->getIterator());
+    Instruction *InsertBefore = NextIt == InsertBB->end() ? nullptr : &*NextIt;
+    insertDPValue(DPV, InsertBB, InsertBefore, true);
+    return DPV;
+  }
+
   LLVMContext &Ctx = LinkedInstr->getContext();
   Module *M = LinkedInstr->getModule();
   if (!AssignFn)
     AssignFn = Intrinsic::getDeclaration(M, Intrinsic::dbg_assign);
 
-  auto *Link = LinkedInstr->getMetadata(LLVMContext::MD_DIAssignID);
-  assert(Link && "Linked instruction must have DIAssign metadata attached");
-
   std::array<Value *, 6> Args = {
       MetadataAsValue::get(Ctx, ValueAsMetadata::get(Val)),
       MetadataAsValue::get(Ctx, SrcVar),
@@ -971,35 +983,36 @@ DIBuilder::insertDbgAssign(Instruction *LinkedInstr, Value *Val,
   return DVI;
 }
 
-Instruction *DIBuilder::insertLabel(DILabel *LabelInfo, const DILocation *DL,
-                                    Instruction *InsertBefore) {
+DbgInstPtr DIBuilder::insertLabel(DILabel *LabelInfo, const DILocation *DL,
+                                  Instruction *InsertBefore) {
   return insertLabel(LabelInfo, DL,
                      InsertBefore ? InsertBefore->getParent() : nullptr,
                      InsertBefore);
 }
 
-Instruction *DIBuilder::insertLabel(DILabel *LabelInfo, const DILocation *DL,
-                                    BasicBlock *InsertAtEnd) {
+DbgInstPtr DIBuilder::insertLabel(DILabel *LabelInfo, const DILocation *DL,
+                                  BasicBlock *InsertAtEnd) {
   return insertLabel(LabelInfo, DL, InsertAtEnd, nullptr);
 }
 
-Instruction *DIBuilder::insertDbgValueIntrinsic(Value *V,
-                                                DILocalVariable *VarInfo,
-                                                DIExpression *Expr,
-                                                const DILocation *DL,
-                                                Instruction *InsertBefore) {
-  Instruction *DVI = insertDbgValueIntrinsic(
+DbgInstPtr DIBuilder::insertDbgValueIntrinsic(Value *V,
+                                              DILocalVariable *VarInfo,
+                                              DIExpression *Expr,
+                                              const DILocation *DL,
+                                              Instruction *InsertBefore) {
+  DbgInstPtr DVI = insertDbgValueIntrinsic(
       V, VarInfo, Expr, DL, InsertBefore ? InsertBefore->getParent() : nullptr,
       InsertBefore);
-  cast<CallInst>(DVI)->setTailCall();
+  if (DVI.is<Instruction *>())
+    cast<CallInst>(DVI.get<Instruction *>())->setTailCall();
   return DVI;
 }
 
-Instruction *DIBuilder::insertDbgValueIntrinsic(Value *V,
-                                                DILocalVariable *VarInfo,
-                                                DIExpression *Expr,
-                                                const DILocation *DL,
-                                                BasicBlock *InsertAtEnd) {
+DbgInstPtr DIBuilder::insertDbgValueIntrinsic(Value *V,
+                                              DILocalVariable *VarInfo,
+                                              DIExpression *Expr,
+                                              const DILocation *DL,
+                                              BasicBlock *InsertAtEnd) {
   return insertDbgValueIntrinsic(V, VarInfo, Expr, DL, InsertAtEnd, nullptr);
 }
 
@@ -1023,24 +1036,37 @@ static Function *getDeclareIntrin(Module &M) {
   return Intrinsic::getDeclaration(&M, Intrinsic::dbg_declare);
 }
 
-Instruction *DIBuilder::insertDbgValueIntrinsic(
+DbgInstPtr DIBuilder::insertDbgValueIntrinsic(
     llvm::Value *Val, DILocalVariable *VarInfo, DIExpression *Expr,
     const DILocation *DL, BasicBlock *InsertBB, Instruction *InsertBefore) {
+  if (M.IsNewDbgInfoFormat) {
+    DPValue *DPV = DPValue::createDPValue(Val, VarInfo, Expr, DL);
+    insertDPValue(DPV, InsertBB, InsertBefore);
+    return DPV;
+  }
+
   if (!ValueFn)
     ValueFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_value);
   return insertDbgIntrinsic(ValueFn, Val, VarInfo, Expr, DL, InsertBB,
                             InsertBefore);
 }
 
-Instruction *DIBuilder::insertDeclare(Value *Storage, DILocalVariable *VarInfo,
-                                      DIExpression *Expr, const DILocation *DL,
-                                      BasicBlock *InsertBB,
-                                      Instruction *InsertBefore) {
+DbgInstPtr DIBuilder::insertDeclare(Value *Storage, DILocalVariable *VarInfo,
+                                    DIExpression *Expr, const DILocation *DL,
+                                    BasicBlock *InsertBB,
+                                    Instruction *InsertBefore) {
   assert(VarInfo && "empty or invalid DILocalVariable* passed to dbg.declare");
   assert(DL && "Expected debug loc");
   assert(DL->getScope()->getSubprogram() ==
              VarInfo->getScope()->getSubprogram() &&
          "Expected matching subprograms");
+
+  if (M.IsNewDbgInfoFormat) {
+    DPValue *DPV = DPValue::createDPVDeclare(Storage, VarInfo, Expr, DL);
+    insertDPValue(DPV, InsertBB, InsertBefore);
+    return DPV;
+  }
+
   if (!DeclareFn)
     DeclareFn = getDeclareIntrin(M);
 
@@ -1055,6 +1081,23 @@ Instruction *DIBuilder::insertDeclare(Value *Storage, DILocalVariable *VarInfo,
   return B.CreateCall(DeclareFn, Args);
 }
 
+void DIBuilder::insertDPValue(DPValue *DPV, BasicBlock *InsertBB,
+                              Instruction *InsertBefore, bool InsertAtHead) {
+  assert(InsertBefore || InsertBB);
+  trackIfUnresolved(DPV->getVariable());
+  trackIfUnresolved(DPV->getExpression());
+  if (DPV->isDbgAssign())
+    trackIfUnresolved(DPV->getAddressExpression());
+
+  BasicBlock::iterator InsertPt;
+  if (InsertBB && InsertBefore)
+    InsertPt = InsertBefore->getIterator();
+  else if (InsertBB)
+    InsertPt = InsertBB->end();
+  InsertPt.setHeadBit(InsertAtHead);
+  InsertBB->insertDPValueBefore(DPV, InsertPt);
+}
+
 Instruction *DIBuilder::insertDbgIntrinsic(llvm::Function *IntrinsicFn,
                                            Value *V, DILocalVariable *VarInfo,
                                            DIExpression *Expr,
@@ -1081,18 +1124,28 @@ Instruction *DIBuilder::insertDbgIntrinsic(llvm::Function *IntrinsicFn,
   return B.CreateCall(IntrinsicFn, Args);
 }
 
-Instruction *DIBuilder::insertLabel(DILabel *LabelInfo, const DILocation *DL,
-                                    BasicBlock *InsertBB,
-                                    Instruction *InsertBefore) {
+DbgInstPtr DIBuilder::insertLabel(DILabel *LabelInfo, const DILocation *DL,
+                                  BasicBlock *InsertBB,
+                                  Instruction *InsertBefore) {
   assert(LabelInfo && "empty or invalid DILabel* passed to dbg.label");
   assert(DL && "Expected debug loc");
   assert(DL->getScope()->getSubprogram() ==
              LabelInfo->getScope()->getSubprogram() &&
          "Expected matching subprograms");
+
+  trackIfUnresolved(LabelInfo);
+  if (M.IsNewDbgInfoFormat) {
+    DPLabel *DPL = new DPLabel(LabelInfo, DL);
+    if (InsertBB && InsertBefore)
+      InsertBB->insertDPValueBefore(DPL, InsertBefore->getIterator());
+    else if (InsertBB)
+      InsertBB->insertDPValueBefore(DPL, InsertBB->end());
+    return DPL;
+  }
+
   if (!LabelFn)
     LabelFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_label);
 
-  trackIfUnresolved(LabelInfo);
   Value *Args[] = {MetadataAsValue::get(VMContext, LabelInfo)};
 
   IRBuilder<> B(DL->getContext());
diff --git a/llvm/lib/IR/DebugInfo.cpp b/llvm/lib/IR/DebugInfo.cpp
index 1f3ff2246a44..68fd244e2569 100644
--- a/llvm/lib/IR/DebugInfo.cpp
+++ b/llvm/lib/IR/DebugInfo.cpp
@@ -1663,43 +1663,47 @@ LLVMValueRef
 LLVMDIBuilderInsertDeclareBefore(LLVMDIBuilderRef Builder, LLVMValueRef Storage,
                                  LLVMMetadataRef VarInfo, LLVMMetadataRef Expr,
                                  LLVMMetadataRef DL, LLVMValueRef Instr) {
-  return wrap(unwrap(Builder)->insertDeclare(
-                  unwrap(Storage), unwrap<DILocalVariable>(VarInfo),
-                  unwrap<DIExpression>(Expr), unwrap<DILocation>(DL),
-                  unwrap<Instruction>(Instr)));
-}
-
-LLVMValueRef LLVMDIBuilderInsertDeclareAtEnd(
-    LLVMDIBuilderRef Builder, LLVMValueRef Storage, LLVMMetadataRef VarInfo,
-    LLVMMetadataRef Expr, LLVMMetadataRef DL, LLVMBasicBlockRef Block) {
-  return wrap(unwrap(Builder)->insertDeclare(
-                  unwrap(Storage), unwrap<DILocalVariable>(VarInfo),
-                  unwrap<DIExpression>(Expr), unwrap<DILocation>(DL),
-                  unwrap(Block)));
-}
-
-LLVMValueRef LLVMDIBuilderInsertDbgValueBefore(LLVMDIBuilderRef Builder,
-                                               LLVMValueRef Val,
-                                               LLVMMetadataRef VarInfo,
-                                               LLVMMetadataRef Expr,
-                                               LLVMMetadataRef DebugLoc,
-                                               LLVMValueRef Instr) {
-  return wrap(unwrap(Builder)->insertDbgValueIntrinsic(
-                  unwrap(Val), unwrap<DILocalVariable>(VarInfo),
-                  unwrap<DIExpression>(Expr), unwrap<DILocation>(DebugLoc),
-                  unwrap<Instruction>(Instr)));
-}
-
-LLVMValueRef LLVMDIBuilderInsertDbgValueAtEnd(LLVMDIBuilderRef Builder,
-                                              LLVMValueRef Val,
-                                              LLVMMetadataRef VarInfo,
-                                              LLVMMetadataRef Expr,
-                                              LLVMMetadataRef DebugLoc,
-                                              LLVMBasicBlockRef Block) {
-  return wrap(unwrap(Builder)->insertDbgValueIntrinsic(
-                  unwrap(Val), unwrap<DILocalVariable>(VarInfo),
-                  unwrap<DIExpression>(Expr), unwrap<DILocation>(DebugLoc),
-                  unwrap(Block)));
+  DbgInstPtr DbgInst = unwrap(Builder)->insertDeclare(
+      unwrap(Storage), unwrap<DILocalVariable>(VarInfo),
+      unwrap<DIExpression>(Expr), unwrap<DILocation>(DL),
+      unwrap<Instruction>(Instr));
+  assert(isa<Instruction *>(DbgInst) &&
+         "Inserted a DbgRecord into function using old debug info mode");
+  return wrap(cast<Instruction *>(DbgInst));
+}
+
+LLVMValueRef
+LLVMDIBuilderInsertDeclareAtEnd(LLVMDIBuilderRef Builder, LLVMValueRef Storage,
+                                LLVMMetadataRef VarInfo, LLVMMetadataRef Expr,
+                                LLVMMetadataRef DL, LLVMBasicBlockRef Block) {
+  DbgInstPtr DbgInst = unwrap(Builder)->insertDeclare(
+      unwrap(Storage), unwrap<DILocalVariable>(VarInfo),
+      unwrap<DIExpression>(Expr), unwrap<DILocation>(DL), unwrap(Block));
+  assert(isa<Instruction *>(DbgInst) &&
+         "Inserted a DbgRecord into function using old debug info mode");
+  return wrap(cast<Instruction *>(DbgInst));
+}
+
+LLVMValueRef LLVMDIBuilderInsertDbgValueBefore(
+    LLVMDIBuilderRef Builder, LLVMValueRef Val, LLVMMetadataRef VarInfo,
+    LLVMMetadataRef Expr, LLVMMetadataRef DebugLoc, LLVMValueRef Instr) {
+  DbgInstPtr DbgInst = unwrap(Builder)->insertDbgValueIntrinsic(
+      unwrap(Val), unwrap<DILocalVariable>(VarInfo), unwrap<DIExpression>(Expr),
+      unwrap<DILocation>(DebugLoc), unwrap<Instruction>(Instr));
+  assert(isa<Instruction *>(DbgInst) &&
+         "Inserted a DbgRecord into function using old debug info mode");
+  return wrap(cast<Instruction *>(DbgInst));
+}
+
+LLVMValueRef LLVMDIBuilderInsertDbgValueAtEnd(
+    LLVMDIBuilderRef Builder, LLVMValueRef Val, LLVMMetadataRef VarInfo,
+    LLVMMetadataRef Expr, LLVMMetadataRef DebugLoc, LLVMBasicBlockRef Block) {
+  DbgInstPtr DbgInst = unwrap(Builder)->insertDbgValueIntrinsic(
+      unwrap(Val), unwrap<DILocalVariable>(VarInfo), unwrap<DIExpression>(Expr),
+      unwrap<DILocation>(DebugLoc), unwrap(Block));
+  assert(isa<Instruction *>(DbgInst) &&
+         "Inserted a DbgRecord into function using old debug info mode");
+  return wrap(cast<Instruction *>(DbgInst));
 }
 
 LLVMMetadataRef LLVMDIBuilderCreateAutoVariable(
@@ -2115,10 +2119,15 @@ static void emitDbgAssign(AssignmentInfo Info, Value *Val, Value *Dest,
     LLVM_DEBUG(if (Assign) errs() << " > INSERT: " << *Assign << "\n");
     return;
   }
-  auto *Assign = DIB.insertDbgAssign(&StoreLikeInst, Val, VarRec.Var, Expr,
-                                     Dest, AddrExpr, VarRec.DL);
+  auto Assign = DIB.insertDbgAssign(&StoreLikeInst, Val, VarRec.Var, Expr, Dest,
+                                    AddrExpr, VarRec.DL);
   (void)Assign;
-  LLVM_DEBUG(if (Assign) errs() << " > INSERT: " << *Assign << "\n");
+  LLVM_DEBUG(if (!Assign.isNull()) {
+    if (Assign.is<DbgRecord *>())
+      errs() << " > INSERT: " << *Assign.get<DbgRecord *>() << "\n";
+    else
+      errs() << " > INSERT: " << *Assign.get<Instruction *>() << "\n";
+  });
 }
 
 #undef DEBUG_TYPE // Silence redefinition warning (from ConstantsContext.h).
diff --git a/llvm/lib/IR/Instruction.cpp b/llvm/lib/IR/Instruction.cpp
index e863ef3eb8d6..6b8c6e0c85ed 100644
--- a/llvm/lib/IR/Instruction.cpp
+++ b/llvm/lib/IR/Instruction.cpp
@@ -166,7 +166,8 @@ void Instruction::insertBefore(BasicBlock &BB,
   }
 
   // If we're inserting a terminator, check if we need to flush out
-  // TrailingDPValues.
+  // TrailingDPValues. Inserting instructions at the end of an incomplete
+  // block is handled by the code block above.
   if (isTerminator())
     getParent()->flushTerminatorDbgValues();
 }
diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp
index e11b984f13bb..190fee11618b 100644
--- a/llvm/lib/Transforms/Scalar/SROA.cpp
+++ b/llvm/lib/Transforms/Scalar/SROA.cpp
@@ -324,23 +324,16 @@ static DebugVariable getAggregateVariable(DPValue *DPV) {
                        DPV->getDebugLoc().getInlinedAt());
 }
 
-static DPValue *createLinkedAssign(DPValue *, DIBuilder &DIB,
-                                   Instruction *LinkedInstr, Value *NewValue,
-                                   DILocalVariable *Variable,
-                                   DIExpression *Expression, Value *Address,
-                                   DIExpression *AddressExpression,
-                                   const DILocation *DI) {
-  (void)DIB;
-  return DPValue::createLinkedDPVAssign(LinkedInstr, NewValue, Variable,
-                                        Expression, Address, AddressExpression,
-                                        DI);
+/// Helpers for handling new and old debug info modes in migrateDebugInfo.
+/// These overloads unwrap a DbgInstPtr {Instruction* | DbgRecord*} union based
+/// on the \p Unused parameter type.
+DPValue *UnwrapDbgInstPtr(DbgInstPtr P, DPValue *Unused) {
+  (void)Unused;
+  return static_cast<DPValue *>(cast<DbgRecord *>(P));
 }
-static DbgAssignIntrinsic *createLinkedAssign(
-    DbgAssignIntrinsic *, DIBuilder &DIB, Instruction *LinkedInstr,
-    Value *NewValue, DILocalVariable *Variable, DIExpression *Expression,
-    Value *Address, DIExpression *AddressExpression, const DILocation *DI) {
-  return DIB.insertDbgAssign(LinkedInstr, NewValue, Variable, Expression,
-                             Address, AddressExpression, DI);
+DbgAssignIntrinsic *UnwrapDbgInstPtr(DbgInstPtr P, DbgAssignIntrinsic *Unused) {
+  (void)Unused;
+  return static_cast<DbgAssignIntrinsic *>(cast<Instruction *>(P));
 }
 
 /// Find linked dbg.assign and generate a new one with the correct
@@ -398,7 +391,7 @@ static void migrateDebugInfo(AllocaInst *OldAlloca, bool IsSplit,
   DIBuilder DIB(*OldInst->getModule(), /*AllowUnresolved*/ false);
   assert(OldAlloca->isStaticAlloca());
 
-  auto MigrateDbgAssign = [&](auto DbgAssign) {
+  auto MigrateDbgAssign = [&](auto *DbgAssign) {
     LLVM_DEBUG(dbgs() << "      existing dbg.assign is: " << *DbgAssign
                       << "\n");
     auto *Expr = DbgAssign->getExpression();
@@ -452,10 +445,12 @@ static void migrateDebugInfo(AllocaInst *OldAlloca, bool IsSplit,
     }
 
     ::Value *NewValue = Value ? Value : DbgAssign->getValue();
-    auto *NewAssign = createLinkedAssign(
-        DbgAssign, DIB, Inst, NewValue, DbgAssign->getVariable(), Expr, Dest,
-        DIExpression::get(Expr->getContext(), std::nullopt),
-        DbgAssign->getDebugLoc());
+    auto *NewAssign = UnwrapDbgInstPtr(
+        DIB.insertDbgAssign(Inst, NewValue, DbgAssign->getVariable(), Expr,
+                            Dest,
+                            DIExpression::get(Expr->getContext(), std::nullopt),
+                            DbgAssign->getDebugLoc()),
+        DbgAssign);
 
     // If we've updated the value but the original dbg.assign has an arglist
     // then kill it now - we can't use the requested new value.
@@ -5031,9 +5026,11 @@ static void insertNewDbgInst(DIBuilder &DIB, DbgAssignIntrinsic *Orig,
     NewAddr->setMetadata(LLVMContext::MD_DIAssignID,
                          DIAssignID::getDistinct(NewAddr->getContext()));
   }
-  auto *NewAssign = DIB.insertDbgAssign(
-      NewAddr, Orig->getValue(), Orig->getVariable(), NewFragmentExpr, NewAddr,
-      Orig->getAddressExpression(), Orig->getDebugLoc());
+  Instruction *NewAssign =
+      DIB.insertDbgAssign(NewAddr, Orig->getValue(), Orig->getVariable(),
+                          NewFragmentExpr, NewAddr,
+                          Orig->getAddressExpression(), Orig->getDebugLoc())
+          .get<Instruction *>();
   LLVM_DEBUG(dbgs() << "Created new assign intrinsic: " << *NewAssign << "\n");
   (void)NewAssign;
 }
@@ -5052,7 +5049,7 @@ static void insertNewDbgInst(DIBuilder &DIB, DPValue *Orig, AllocaInst *NewAddr,
     NewAddr->setMetadata(LLVMContext::MD_DIAssignID,
                          DIAssignID::getDistinct(NewAddr->getContext()));
   }
-  auto *NewAssign = DPValue::createLinkedDPVAssign(
+  DPValue *NewAssign = DPValue::createLinkedDPVAssign(
       NewAddr, Orig->getValue(), Orig->getVariable(), NewFragmentExpr, NewAddr,
       Orig->getAddressExpression(), Orig->getDebugLoc());
   LLVM_DEBUG(dbgs() << "Created new DPVAssign: " << *NewAssign << "\n");
diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp
index d3bb89075015..a44536e34c92 100644
--- a/llvm/lib/Transforms/Utils/Local.cpp
+++ b/llvm/lib/Transforms/Utils/Local.cpp
@@ -1649,9 +1649,9 @@ static void insertDbgValueOrDPValue(DIBuilder &Builder, Value *DV,
                                     const DebugLoc &NewLoc,
                                     BasicBlock::iterator Instr) {
   if (!UseNewDbgInfoFormat) {
-    auto *DbgVal = Builder.insertDbgValueIntrinsic(DV, DIVar, DIExpr, NewLoc,
-                                                   (Instruction *)nullptr);
-    DbgVal->insertBefore(Instr);
+    auto DbgVal = Builder.insertDbgValueIntrinsic(DV, DIVar, DIExpr, NewLoc,
+                                                  (Instruction *)nullptr);
+    DbgVal.get<Instruction *>()->insertBefore(Instr);
   } else {
     // RemoveDIs: if we're using the new debug-info format, allocate a
     // DPValue directly instead of a dbg.value intrinsic.
@@ -1667,9 +1667,9 @@ static void insertDbgValueOrDPValueAfter(DIBuilder &Builder, Value *DV,
                                          const DebugLoc &NewLoc,
                                          BasicBlock::iterator Instr) {
   if (!UseNewDbgInfoFormat) {
-    auto *DbgVal = Builder.insertDbgValueIntrinsic(DV, DIVar, DIExpr, NewLoc,
-                                                   (Instruction *)nullptr);
-    DbgVal->insertAfter(&*Instr);
+    auto DbgVal = Builder.insertDbgValueIntrinsic(DV, DIVar, DIExpr, NewLoc,
+                                                  (Instruction *)nullptr);
+    DbgVal.get<Instruction *>()->insertAfter(&*Instr);
   } else {
     // RemoveDIs: if we're using the new debug-info format, allocate a
     // DPValue directly instead of a dbg.value intrinsic.
diff --git a/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
index 88b05aab8db4..b462803bad38 100644
--- a/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
+++ b/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
@@ -101,21 +101,20 @@ bool llvm::isAllocaPromotable(const AllocaInst *AI) {
 
 namespace {
 
-static DPValue *createDebugValue(DIBuilder &DIB, Value *NewValue,
-                                 DILocalVariable *Variable,
-                                 DIExpression *Expression, const DILocation *DI,
-                                 DPValue *InsertBefore) {
+static void createDebugValue(DIBuilder &DIB, Value *NewValue,
+                             DILocalVariable *Variable,
+                             DIExpression *Expression, const DILocation *DI,
+                             DPValue *InsertBefore) {
+  // FIXME: Merge these two functions now that DIBuilder supports DPValues.
+  // We neeed the API to accept DPValues as an insert point for that to work.
   (void)DIB;
-  return DPValue::createDPValue(NewValue, Variable, Expression, DI,
-                                *InsertBefore);
+  DPValue::createDPValue(NewValue, Variable, Expression, DI, *InsertBefore);
 }
-static DbgValueInst *createDebugValue(DIBuilder &DIB, Value *NewValue,
-                                      DILocalVariable *Variable,
-                                      DIExpression *Expression,
-                                      const DILocation *DI,
-                                      Instruction *InsertBefore) {
-  return static_cast<DbgValueInst *>(DIB.insertDbgValueIntrinsic(
-      NewValue, Variable, Expression, DI, InsertBefore));
+static void createDebugValue(DIBuilder &DIB, Value *NewValue,
+                             DILocalVariable *Variable,
+                             DIExpression *Expression, const DILocation *DI,
+                             Instruction *InsertBefore) {
+  DIB.insertDbgValueIntrinsic(NewValue, Variable, Expression, DI, InsertBefore);
 }
 
 /// Helper for updating assignment tracking debug info when promoting allocas.
diff --git a/llvm/unittests/IR/IRBuilderTest.cpp b/llvm/unittests/IR/IRBuilderTest.cpp
index d15ff9dd51a4..cece65974c01 100644
--- a/llvm/unittests/IR/IRBuilderTest.cpp
+++ b/llvm/unittests/IR/IRBuilderTest.cpp
@@ -871,25 +871,139 @@ TEST_F(IRBuilderTest, createFunction) {
 }
 
 TEST_F(IRBuilderTest, DIBuilder) {
-  IRBuilder<> Builder(BB);
-  DIBuilder DIB(*M);
-  auto File = DIB.createFile("F.CBL", "/");
-  auto CU = DIB.createCompileUnit(dwarf::DW_LANG_Cobol74,
-                                  DIB.createFile("F.CBL", "/"), "llvm-cobol74",
-                                  true, "", 0);
-  auto Type = DIB.createSubroutineType(DIB.getOrCreateTypeArray(std::nullopt));
-  auto SP = DIB.createFunction(
-      CU, "foo", "", File, 1, Type, 1, DINode::FlagZero,
-      DISubprogram::SPFlagDefinition | DISubprogram::SPFlagOptimized);
-  F->setSubprogram(SP);
-  AllocaInst *I = Builder.CreateAlloca(Builder.getInt8Ty());
-  auto BarSP = DIB.createFunction(
-      CU, "bar", "", File, 1, Type, 1, DINode::FlagZero,
-      DISubprogram::SPFlagDefinition | DISubprogram::SPFlagOptimized);
-  auto BadScope = DIB.createLexicalBlockFile(BarSP, File, 0);
-  I->setDebugLoc(DILocation::get(Ctx, 2, 0, BadScope));
-  DIB.finalize();
-  EXPECT_TRUE(verifyModule(*M));
+  auto GetLastDbgRecord = [](const Instruction *I) -> DbgRecord * {
+    if (I->getDbgValueRange().empty())
+      return nullptr;
+    return &*std::prev(I->getDbgValueRange().end());
+  };
+
+  auto ExpectOrder = [&](DbgInstPtr First, BasicBlock::iterator Second) {
+    if (M->IsNewDbgInfoFormat) {
+      EXPECT_TRUE(First.is<DbgRecord *>());
+      EXPECT_FALSE(Second->getDbgValueRange().empty());
+      EXPECT_EQ(GetLastDbgRecord(&*Second), First.get<DbgRecord *>());
+    } else {
+      EXPECT_TRUE(First.is<Instruction *>());
+      EXPECT_EQ(&*std::prev(Second), First.get<Instruction *>());
+    }
+  };
+
+  auto RunTest = [&]() {
+    IRBuilder<> Builder(BB);
+    DIBuilder DIB(*M);
+    auto File = DIB.createFile("F.CBL", "/");
+    auto CU = DIB.createCompileUnit(dwarf::DW_LANG_Cobol74,
+                                    DIB.createFile("F.CBL", "/"),
+                                    "llvm-cobol74", true, "", 0);
+    auto Type =
+        DIB.createSubroutineType(DIB.getOrCreateTypeArray(std::nullopt));
+    auto SP = DIB.createFunction(
+        CU, "foo", "", File, 1, Type, 1, DINode::FlagZero,
+        DISubprogram::SPFlagDefinition | DISubprogram::SPFlagOptimized);
+    F->setSubprogram(SP);
+    AllocaInst *I = Builder.CreateAlloca(Builder.getInt8Ty());
+    auto BarSP = DIB.createFunction(
+        CU, "bar", "", File, 1, Type, 1, DINode::FlagZero,
+        DISubprogram::SPFlagDefinition | DISubprogram::SPFlagOptimized);
+    auto BarScope = DIB.createLexicalBlockFile(BarSP, File, 0);
+    I->setDebugLoc(DILocation::get(Ctx, 2, 0, BarScope));
+
+    // Create another instruction so that there's one before the alloca we're
+    // inserting debug intrinsics before, to make end-checking easier.
+    I = Builder.CreateAlloca(Builder.getInt1Ty());
+
+    // Label metadata and records
+    // --------------------------
+    DILocation *LabelLoc = DILocation::get(Ctx, 1, 0, BarScope);
+    DILabel *AlwaysPreserveLabel = DIB.createLabel(
+        BarScope, "meles_meles", File, 1, /*AlwaysPreserve*/ true);
+    DILabel *Label =
+        DIB.createLabel(BarScope, "badger", File, 1, /*AlwaysPreserve*/ false);
+
+    { /* dbg.label | DPLabel */
+      // Insert before I and check order.
+      ExpectOrder(DIB.insertLabel(Label, LabelLoc, I), I->getIterator());
+
+      // We should be able to insert at the end of the block, even if there's
+      // no terminator yet. Note that in RemoveDIs mode this record won't get
+      // inserted into the block untill another instruction is added.
+      DbgInstPtr LabelRecord = DIB.insertLabel(Label, LabelLoc, BB);
+      // Specifically do not insert a terminator, to check this works. `I`
+      // should have absorbed the DPLabel in the new debug info mode.
+      I = Builder.CreateAlloca(Builder.getInt32Ty());
+      ExpectOrder(LabelRecord, I->getIterator());
+    }
+
+    // Variable metadata and records
+    // -----------------------------
+    DILocation *VarLoc = DILocation::get(Ctx, 2, 0, BarScope);
+    auto *IntType = DIB.createBasicType("int", 32, dwarf::DW_ATE_signed);
+    DILocalVariable *VarX =
+        DIB.createAutoVariable(BarSP, "X", File, 2, IntType, true);
+    DILocalVariable *VarY =
+        DIB.createAutoVariable(BarSP, "Y", File, 2, IntType, true);
+    { /* dbg.value | DPValue::Value */
+      ExpectOrder(DIB.insertDbgValueIntrinsic(I, VarX, DIB.createExpression(),
+                                              VarLoc, I),
+                  I->getIterator());
+      // Check inserting at end of the block works as with labels.
+      DbgInstPtr VarXValue = DIB.insertDbgValueIntrinsic(
+          I, VarX, DIB.createExpression(), VarLoc, BB);
+      I = Builder.CreateAlloca(Builder.getInt32Ty());
+      ExpectOrder(VarXValue, I->getIterator());
+      EXPECT_EQ(BB->getTrailingDPValues(), nullptr);
+    }
+    { /* dbg.declare | DPValue::Declare */
+      ExpectOrder(DIB.insertDeclare(I, VarY, DIB.createExpression(), VarLoc, I),
+                  I->getIterator());
+      // Check inserting at end of the block works as with labels.
+      DbgInstPtr VarYDeclare =
+          DIB.insertDeclare(I, VarY, DIB.createExpression(), VarLoc, BB);
+      I = Builder.CreateAlloca(Builder.getInt32Ty());
+      ExpectOrder(VarYDeclare, I->getIterator());
+      EXPECT_EQ(BB->getTrailingDPValues(), nullptr);
+    }
+    { /* dbg.assign | DPValue::Assign */
+      I = Builder.CreateAlloca(Builder.getInt32Ty());
+      I->setMetadata(LLVMContext::MD_DIAssignID, DIAssignID::getDistinct(Ctx));
+      // DbgAssign interface is slightly different - it always inserts after the
+      // linked instr. Check we can do this with no instruction to insert
+      // before.
+      DbgInstPtr VarXAssign =
+          DIB.insertDbgAssign(I, I, VarX, DIB.createExpression(), I,
+                              DIB.createExpression(), VarLoc);
+      I = Builder.CreateAlloca(Builder.getInt32Ty());
+      ExpectOrder(VarXAssign, I->getIterator());
+      EXPECT_EQ(BB->getTrailingDPValues(), nullptr);
+    }
+
+    Builder.CreateRet(nullptr);
+    DIB.finalize();
+    // Check the labels are not/are added to Bar's retainedNodes array
+    // (AlwaysPreserve).
+    EXPECT_EQ(find(BarSP->getRetainedNodes(), Label),
+              BarSP->getRetainedNodes().end());
+    EXPECT_NE(find(BarSP->getRetainedNodes(), AlwaysPreserveLabel),
+              BarSP->getRetainedNodes().end());
+    EXPECT_NE(find(BarSP->getRetainedNodes(), VarX),
+              BarSP->getRetainedNodes().end());
+    EXPECT_NE(find(BarSP->getRetainedNodes(), VarY),
+              BarSP->getRetainedNodes().end());
+    EXPECT_TRUE(verifyModule(*M));
+  };
+
+  // Test in old-debug mode.
+  EXPECT_FALSE(M->IsNewDbgInfoFormat);
+  RunTest();
+
+  // Test in new-debug mode.
+  // Reset the test then call convertToNewDbgValues to flip the flag
+  // on the test's Module, Function and BasicBlock.
+  TearDown();
+  SetUp();
+  M->convertToNewDbgValues();
+  EXPECT_TRUE(M->IsNewDbgInfoFormat);
+  RunTest();
 }
 
 TEST_F(IRBuilderTest, createArtificialSubprogram) {
-- 
cgit v1.2.3


From 19266ca389e3fc3bce9d24c074b836d6e69873ce Mon Sep 17 00:00:00 2001
From: Marius Brehler <marius.brehler@iml.fraunhofer.de>
Date: Tue, 12 Mar 2024 11:27:26 +0100
Subject: [mlir][EmitC] Add an `emitc.conditional` operator (#84883)

This adds an `emitc.conditional` operation for the ternary conditional
operator. Furthermore, this adds a converion from `arith.select` to the
new op.
---
 mlir/include/mlir/Dialect/EmitC/IR/EmitC.td        | 30 ++++++++++++++++++
 mlir/lib/Conversion/ArithToEmitC/ArithToEmitC.cpp  | 28 +++++++++++++++-
 mlir/lib/Target/Cpp/TranslateToCpp.cpp             | 37 ++++++++++++++++++----
 .../Conversion/ArithToEmitC/arith-to-emitc.mlir    |  8 +++++
 mlir/test/Dialect/EmitC/ops.mlir                   |  5 +++
 mlir/test/Target/Cpp/conditional.mlir              |  9 ++++++
 6 files changed, 110 insertions(+), 7 deletions(-)
 create mode 100644 mlir/test/Target/Cpp/conditional.mlir

diff --git a/mlir/include/mlir/Dialect/EmitC/IR/EmitC.td b/mlir/include/mlir/Dialect/EmitC/IR/EmitC.td
index ac1e38a5506d..ec842f76628c 100644
--- a/mlir/include/mlir/Dialect/EmitC/IR/EmitC.td
+++ b/mlir/include/mlir/Dialect/EmitC/IR/EmitC.td
@@ -908,6 +908,36 @@ def EmitC_SubOp : EmitC_BinaryOp<"sub", [CExpression]> {
   let hasVerifier = 1;
 }
 
+def EmitC_ConditionalOp : EmitC_Op<"conditional",
+    [AllTypesMatch<["true_value", "false_value", "result"]>, CExpression]> {
+  let summary = "Conditional (ternary) operation";
+  let description = [{
+    With the `conditional` operation the ternary conditional operator can
+    be applied.
+
+    Example:
+
+    ```mlir
+    %0 = emitc.cmp gt, %arg0, %arg1 : (i32, i32) -> i1
+
+    %c0 = "emitc.constant"() {value = 10 : i32} : () -> i32
+    %c1 = "emitc.constant"() {value = 11 : i32} : () -> i32
+
+    %1 = emitc.conditional %0, %c0, %c1 : i32
+    ```
+    ```c++
+    // Code emitted for the operations above.
+    bool v3 = v1 > v2;
+    int32_t v4 = 10;
+    int32_t v5 = 11;
+    int32_t v6 = v3 ? v4 : v5;
+    ```
+  }];
+  let arguments = (ins I1:$condition, AnyType:$true_value, AnyType:$false_value);
+  let results = (outs AnyType:$result);
+  let assemblyFormat = "operands attr-dict `:` type($result)";
+}
+
 def EmitC_UnaryMinusOp : EmitC_UnaryOp<"unary_minus", [CExpression]> {
   let summary = "Unary minus operation";
   let description = [{
diff --git a/mlir/lib/Conversion/ArithToEmitC/ArithToEmitC.cpp b/mlir/lib/Conversion/ArithToEmitC/ArithToEmitC.cpp
index 40dce001a3b2..3532785c31b9 100644
--- a/mlir/lib/Conversion/ArithToEmitC/ArithToEmitC.cpp
+++ b/mlir/lib/Conversion/ArithToEmitC/ArithToEmitC.cpp
@@ -54,6 +54,31 @@ public:
     return success();
   }
 };
+
+class SelectOpConversion : public OpConversionPattern<arith::SelectOp> {
+public:
+  using OpConversionPattern<arith::SelectOp>::OpConversionPattern;
+
+  LogicalResult
+  matchAndRewrite(arith::SelectOp selectOp, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
+
+    Type dstType = getTypeConverter()->convertType(selectOp.getType());
+    if (!dstType)
+      return rewriter.notifyMatchFailure(selectOp, "type conversion failed");
+
+    if (!adaptor.getCondition().getType().isInteger(1))
+      return rewriter.notifyMatchFailure(
+          selectOp,
+          "can only be converted if condition is a scalar of type i1");
+
+    rewriter.replaceOpWithNewOp<emitc::ConditionalOp>(selectOp, dstType,
+                                                      adaptor.getOperands());
+
+    return success();
+  }
+};
+
 } // namespace
 
 //===----------------------------------------------------------------------===//
@@ -70,7 +95,8 @@ void mlir::populateArithToEmitCPatterns(TypeConverter &typeConverter,
     ArithOpConversion<arith::AddFOp, emitc::AddOp>,
     ArithOpConversion<arith::DivFOp, emitc::DivOp>,
     ArithOpConversion<arith::MulFOp, emitc::MulOp>,
-    ArithOpConversion<arith::SubFOp, emitc::SubOp>
+    ArithOpConversion<arith::SubFOp, emitc::SubOp>,
+    SelectOpConversion
   >(typeConverter, ctx);
   // clang-format on
 }
diff --git a/mlir/lib/Target/Cpp/TranslateToCpp.cpp b/mlir/lib/Target/Cpp/TranslateToCpp.cpp
index 3cf137c1d07c..7cbb1e9265e1 100644
--- a/mlir/lib/Target/Cpp/TranslateToCpp.cpp
+++ b/mlir/lib/Target/Cpp/TranslateToCpp.cpp
@@ -96,6 +96,7 @@ static FailureOr<int> getOperatorPrecedence(Operation *operation) {
         }
         return op->emitError("unsupported cmp predicate");
       })
+      .Case<emitc::ConditionalOp>([&](auto op) { return 2; })
       .Case<emitc::DivOp>([&](auto op) { return 13; })
       .Case<emitc::LogicalAndOp>([&](auto op) { return 4; })
       .Case<emitc::LogicalNotOp>([&](auto op) { return 15; })
@@ -446,6 +447,29 @@ static LogicalResult printOperation(CppEmitter &emitter, emitc::CmpOp cmpOp) {
   return printBinaryOperation(emitter, operation, binaryOperator);
 }
 
+static LogicalResult printOperation(CppEmitter &emitter,
+                                    emitc::ConditionalOp conditionalOp) {
+  raw_ostream &os = emitter.ostream();
+
+  if (failed(emitter.emitAssignPrefix(*conditionalOp)))
+    return failure();
+
+  if (failed(emitter.emitOperand(conditionalOp.getCondition())))
+    return failure();
+
+  os << " ? ";
+
+  if (failed(emitter.emitOperand(conditionalOp.getTrueValue())))
+    return failure();
+
+  os << " : ";
+
+  if (failed(emitter.emitOperand(conditionalOp.getFalseValue())))
+    return failure();
+
+  return success();
+}
+
 static LogicalResult printOperation(CppEmitter &emitter,
                                     emitc::VerbatimOp verbatimOp) {
   raw_ostream &os = emitter.ostream();
@@ -1383,12 +1407,13 @@ LogicalResult CppEmitter::emitOperation(Operation &op, bool trailingSemicolon) {
                 emitc::BitwiseNotOp, emitc::BitwiseOrOp,
                 emitc::BitwiseRightShiftOp, emitc::BitwiseXorOp, emitc::CallOp,
                 emitc::CallOpaqueOp, emitc::CastOp, emitc::CmpOp,
-                emitc::ConstantOp, emitc::DeclareFuncOp, emitc::DivOp,
-                emitc::ExpressionOp, emitc::ForOp, emitc::FuncOp, emitc::IfOp,
-                emitc::IncludeOp, emitc::LogicalAndOp, emitc::LogicalNotOp,
-                emitc::LogicalOrOp, emitc::MulOp, emitc::RemOp, emitc::ReturnOp,
-                emitc::SubOp, emitc::UnaryMinusOp, emitc::UnaryPlusOp,
-                emitc::VariableOp, emitc::VerbatimOp>(
+                emitc::ConditionalOp, emitc::ConstantOp, emitc::DeclareFuncOp,
+                emitc::DivOp, emitc::ExpressionOp, emitc::ForOp, emitc::FuncOp,
+                emitc::IfOp, emitc::IncludeOp, emitc::LogicalAndOp,
+                emitc::LogicalNotOp, emitc::LogicalOrOp, emitc::MulOp,
+                emitc::RemOp, emitc::ReturnOp, emitc::SubOp,
+                emitc::UnaryMinusOp, emitc::UnaryPlusOp, emitc::VariableOp,
+                emitc::VerbatimOp>(
               [&](auto op) { return printOperation(*this, op); })
           // Func ops.
           .Case<func::CallOp, func::FuncOp, func::ReturnOp>(
diff --git a/mlir/test/Conversion/ArithToEmitC/arith-to-emitc.mlir b/mlir/test/Conversion/ArithToEmitC/arith-to-emitc.mlir
index 2886810c01e9..022530ef4db8 100644
--- a/mlir/test/Conversion/ArithToEmitC/arith-to-emitc.mlir
+++ b/mlir/test/Conversion/ArithToEmitC/arith-to-emitc.mlir
@@ -34,3 +34,11 @@ func.func @arith_ops(%arg0: f32, %arg1: f32) {
 
   return
 }
+
+// -----
+
+func.func @arith_select(%arg0: i1, %arg1: tensor<8xi32>, %arg2: tensor<8xi32>) -> () {
+  // CHECK: [[V0:[^ ]*]] = emitc.conditional %arg0, %arg1, %arg2 : tensor<8xi32>
+  %0 = arith.select %arg0, %arg1, %arg2 : i1, tensor<8xi32>
+  return
+}
diff --git a/mlir/test/Dialect/EmitC/ops.mlir b/mlir/test/Dialect/EmitC/ops.mlir
index 122b1d9ef105..5f00a295ed74 100644
--- a/mlir/test/Dialect/EmitC/ops.mlir
+++ b/mlir/test/Dialect/EmitC/ops.mlir
@@ -71,6 +71,11 @@ func.func @bitwise(%arg0: i32, %arg1: i32) -> () {
   return
 }
 
+func.func @cond(%cond: i1, %arg0: i32, %arg1: i32) -> () {
+  %0 = emitc.conditional %cond, %arg0, %arg1 : i32
+  return
+}
+
 func.func @div_int(%arg0: i32, %arg1: i32) {
   %1 = "emitc.div" (%arg0, %arg1) : (i32, i32) -> i32
   return
diff --git a/mlir/test/Target/Cpp/conditional.mlir b/mlir/test/Target/Cpp/conditional.mlir
new file mode 100644
index 000000000000..2470fbeb33ad
--- /dev/null
+++ b/mlir/test/Target/Cpp/conditional.mlir
@@ -0,0 +1,9 @@
+// RUN: mlir-translate -mlir-to-cpp %s | FileCheck %s
+
+func.func @cond(%cond: i1, %arg0: i32, %arg1: i32) -> () {
+  %0 = emitc.conditional %cond, %arg0, %arg1 : i32
+  return
+}
+
+// CHECK-LABEL: void cond
+// CHECK-NEXT:  int32_t [[V3:[^ ]*]] = [[V0:[^ ]*]] ? [[V1:[^ ]*]] : [[V2:[^ ]*]];
-- 
cgit v1.2.3


From 23ffb2bdb96cf5a8eebce86b1ab21acf88979661 Mon Sep 17 00:00:00 2001
From: Dave Abrahams <dabrahams@adobe.com>
Date: Tue, 12 Mar 2024 03:30:43 -0700
Subject: [CMake] Enable new policy for CMAKE_MSVC_DEBUG_INFORMATION_FORMAT
 (#82371)

---
 cmake/Modules/CMakePolicy.cmake | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/cmake/Modules/CMakePolicy.cmake b/cmake/Modules/CMakePolicy.cmake
index 0ec32ad8637f..b5bd1e6cdffe 100644
--- a/cmake/Modules/CMakePolicy.cmake
+++ b/cmake/Modules/CMakePolicy.cmake
@@ -10,3 +10,16 @@ endif()
 if(POLICY CMP0116)
   cmake_policy(SET CMP0116 OLD)
 endif()
+
+# MSVC debug information format flags are selected via
+# CMAKE_MSVC_DEBUG_INFORMATION_FORMAT, instead of
+# embedding flags in e.g. CMAKE_CXX_FLAGS_RELEASE.
+# New in CMake 3.25.
+#
+# Supports debug info with SCCache
+# (https://github.com/mozilla/sccache?tab=readme-ov-file#usage)
+# avoiding “fatal error C1041: cannot open program database; if
+# multiple CL.EXE write to the same .PDB file, please use /FS"
++if(POLICY CMP0141)
++  cmake_policy(SET CMP0141 NEW)
++endif()
-- 
cgit v1.2.3


From 5a100551d5652da586800c67edd4ccb3b18b10dc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20Storsj=C3=B6?= <martin@martin.st>
Date: Tue, 12 Mar 2024 12:32:48 +0200
Subject: [Analysis] Treat ldexpf() as missing on MinGW (#84748)

The function does exist, but it is a plain wrapper over regular ldexp(),
so there's no benefit in calling it over regular ldexp(). Therefore,
treat it as missing.

This fixes builds of Wine for aarch64 with Clang in mingw mode, which
regressed recently in 8d976c7f20fe8d92fe6f54af411594e15fac25ae. That
commit unlocked transforming calls to ldexp into ldexpf, for some
codepaths within Wine. Wine can use compilers in mingw mode without
the regular mingw runtime libraries, which caused this to fail.
(However, if the transformation to use ldexpf() would have made sense,
the right fix would have been for Wine to provide a similar
ldexpf->ldexp wrapper just like mingw does.)
---
 llvm/lib/Analysis/TargetLibraryInfo.cpp    | 5 +++++
 llvm/test/Transforms/InstCombine/exp2-1.ll | 1 +
 2 files changed, 6 insertions(+)

diff --git a/llvm/lib/Analysis/TargetLibraryInfo.cpp b/llvm/lib/Analysis/TargetLibraryInfo.cpp
index 835268bb2d85..c8195584ade3 100644
--- a/llvm/lib/Analysis/TargetLibraryInfo.cpp
+++ b/llvm/lib/Analysis/TargetLibraryInfo.cpp
@@ -456,6 +456,11 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T,
     TLI.setUnavailable(LibFunc_uname);
     TLI.setUnavailable(LibFunc_unsetenv);
     TLI.setUnavailable(LibFunc_utimes);
+
+    // MinGW does have ldexpf, but it is a plain wrapper over regular ldexp.
+    // Therefore it's not beneficial to transform code to use it, i.e.
+    // just pretend that the function is not available.
+    TLI.setUnavailable(LibFunc_ldexpf);
   }
 
   // Pick just one set of new/delete variants.
diff --git a/llvm/test/Transforms/InstCombine/exp2-1.ll b/llvm/test/Transforms/InstCombine/exp2-1.ll
index 79aeded2fa5c..8419854d3ec6 100644
--- a/llvm/test/Transforms/InstCombine/exp2-1.ll
+++ b/llvm/test/Transforms/InstCombine/exp2-1.ll
@@ -4,6 +4,7 @@
 ; RUN: opt < %s -passes=instcombine -S -mtriple=unknown | FileCheck %s -check-prefixes=LDEXP32
 ; RUN: opt < %s -passes=instcombine -S -mtriple=msp430 | FileCheck %s -check-prefixes=LDEXP16
 ; RUN: opt < %s -passes=instcombine -S -mtriple=i386-pc-win32 | FileCheck %s -check-prefixes=NOLDEXPF
+; RUN: opt < %s -passes=instcombine -S -mtriple=i386-windows-gnu | FileCheck %s -check-prefixes=NOLDEXPF
 ; RUN: opt < %s -passes=instcombine -S -mtriple=amdgcn-unknown-unknown | FileCheck %s -check-prefixes=NOLDEXP
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
-- 
cgit v1.2.3


From 1b945e35a6a59fda436585f8fca12c82a27fc6a9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20Storsj=C3=B6?= <martin@martin.st>
Date: Tue, 12 Mar 2024 12:35:35 +0200
Subject: [CMake] Fix a typo in 23ffb2bdb96cf5a8eebce86b1ab21acf88979661

---
 cmake/Modules/CMakePolicy.cmake | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/cmake/Modules/CMakePolicy.cmake b/cmake/Modules/CMakePolicy.cmake
index b5bd1e6cdffe..1c18c1810dae 100644
--- a/cmake/Modules/CMakePolicy.cmake
+++ b/cmake/Modules/CMakePolicy.cmake
@@ -20,6 +20,6 @@ endif()
 # (https://github.com/mozilla/sccache?tab=readme-ov-file#usage)
 # avoiding “fatal error C1041: cannot open program database; if
 # multiple CL.EXE write to the same .PDB file, please use /FS"
-+if(POLICY CMP0141)
-+  cmake_policy(SET CMP0141 NEW)
-+endif()
+if(POLICY CMP0141)
+  cmake_policy(SET CMP0141 NEW)
+endif()
-- 
cgit v1.2.3


From 3358838446428976a41390fde98fe5b04b08a132 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Tue, 12 Mar 2024 10:41:59 +0000
Subject: [ADT] Add APIntOps::abds signed absolute difference and rename
 absdiff -> abdu (#84791)

When I created APIntOps::absdiff, I totally missed that we already have ISD::ABDS/ABDU nodes, and we use this term in other places/targets as well.

I've added the APIntOps::abds implementation and renamed APIntOps::absdiff to APIntOps::abdu.

Given that APIntOps::absdiff is so young I don't think we need to create a deprecation wrapper, but I can if anyone thinks it important.

I'll do a KnownBits rename patch after this.
---
 llvm/include/llvm/ADT/APInt.h                  |  7 ++-
 llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp |  4 +-
 llvm/unittests/ADT/APIntTest.cpp               | 66 +++++++++++++++++++-------
 llvm/unittests/Support/KnownBitsTest.cpp       |  4 +-
 4 files changed, 59 insertions(+), 22 deletions(-)

diff --git a/llvm/include/llvm/ADT/APInt.h b/llvm/include/llvm/ADT/APInt.h
index 1fc3c7b2236a..bea3e28adf30 100644
--- a/llvm/include/llvm/ADT/APInt.h
+++ b/llvm/include/llvm/ADT/APInt.h
@@ -2188,8 +2188,13 @@ inline const APInt &umax(const APInt &A, const APInt &B) {
   return A.ugt(B) ? A : B;
 }
 
+/// Determine the absolute difference of two APInts considered to be signed.
+inline const APInt abds(const APInt &A, const APInt &B) {
+  return A.sge(B) ? (A - B) : (B - A);
+}
+
 /// Determine the absolute difference of two APInts considered to be unsigned.
-inline const APInt absdiff(const APInt &A, const APInt &B) {
+inline const APInt abdu(const APInt &A, const APInt &B) {
   return A.uge(B) ? (A - B) : (B - A);
 }
 
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 7a0c1c328df1..c24303592769 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -6068,9 +6068,9 @@ static std::optional<APInt> FoldValue(unsigned Opcode, const APInt &C1,
     return (C1Ext + C2Ext + 1).extractBits(C1.getBitWidth(), 1);
   }
   case ISD::ABDS:
-    return APIntOps::smax(C1, C2) - APIntOps::smin(C1, C2);
+    return APIntOps::abds(C1, C2);
   case ISD::ABDU:
-    return APIntOps::umax(C1, C2) - APIntOps::umin(C1, C2);
+    return APIntOps::abdu(C1, C2);
   }
   return std::nullopt;
 }
diff --git a/llvm/unittests/ADT/APIntTest.cpp b/llvm/unittests/ADT/APIntTest.cpp
index 24324822356b..11237d2e1602 100644
--- a/llvm/unittests/ADT/APIntTest.cpp
+++ b/llvm/unittests/ADT/APIntTest.cpp
@@ -2532,38 +2532,72 @@ TEST(APIntTest, clearLowBits) {
   EXPECT_EQ(16u, i32hi16.popcount());
 }
 
-TEST(APIntTest, AbsDiff) {
-  using APIntOps::absdiff;
+TEST(APIntTest, abds) {
+  using APIntOps::abds;
 
   APInt MaxU1(1, 1, false);
   APInt MinU1(1, 0, false);
-  EXPECT_EQ(1u, absdiff(MaxU1, MinU1).getZExtValue());
-  EXPECT_EQ(1u, absdiff(MinU1, MaxU1).getZExtValue());
+  EXPECT_EQ(1u, abds(MaxU1, MinU1).getZExtValue());
+  EXPECT_EQ(1u, abds(MinU1, MaxU1).getZExtValue());
 
   APInt MaxU4(4, 15, false);
   APInt MinU4(4, 0, false);
-  EXPECT_EQ(15u, absdiff(MaxU4, MinU4).getZExtValue());
-  EXPECT_EQ(15u, absdiff(MinU4, MaxU4).getZExtValue());
+  EXPECT_EQ(1, abds(MaxU4, MinU4).getSExtValue());
+  EXPECT_EQ(1, abds(MinU4, MaxU4).getSExtValue());
 
   APInt MaxS8(8, 127, true);
   APInt MinS8(8, -128, true);
-  EXPECT_EQ(1u, absdiff(MaxS8, MinS8).getZExtValue());
-  EXPECT_EQ(1u, absdiff(MinS8, MaxS8).getZExtValue());
+  EXPECT_EQ(-1, abds(MaxS8, MinS8).getSExtValue());
+  EXPECT_EQ(-1, abds(MinS8, MaxS8).getSExtValue());
 
   APInt MaxU16(16, 65535, false);
   APInt MinU16(16, 0, false);
-  EXPECT_EQ(65535u, absdiff(MaxU16, MinU16).getZExtValue());
-  EXPECT_EQ(65535u, absdiff(MinU16, MaxU16).getZExtValue());
+  EXPECT_EQ(1, abds(MaxU16, MinU16).getSExtValue());
+  EXPECT_EQ(1, abds(MinU16, MaxU16).getSExtValue());
 
   APInt MaxS16(16, 32767, true);
   APInt MinS16(16, -32768, true);
   APInt ZeroS16(16, 0, true);
-  EXPECT_EQ(1u, absdiff(MaxS16, MinS16).getZExtValue());
-  EXPECT_EQ(1u, absdiff(MinS16, MaxS16).getZExtValue());
-  EXPECT_EQ(32768u, absdiff(ZeroS16, MinS16));
-  EXPECT_EQ(32768u, absdiff(MinS16, ZeroS16));
-  EXPECT_EQ(32767u, absdiff(ZeroS16, MaxS16));
-  EXPECT_EQ(32767u, absdiff(MaxS16, ZeroS16));
+  EXPECT_EQ(-1, abds(MaxS16, MinS16).getSExtValue());
+  EXPECT_EQ(-1, abds(MinS16, MaxS16).getSExtValue());
+  EXPECT_EQ(32768u, abds(ZeroS16, MinS16));
+  EXPECT_EQ(32768u, abds(MinS16, ZeroS16));
+  EXPECT_EQ(32767u, abds(ZeroS16, MaxS16));
+  EXPECT_EQ(32767u, abds(MaxS16, ZeroS16));
+}
+
+TEST(APIntTest, abdu) {
+  using APIntOps::abdu;
+
+  APInt MaxU1(1, 1, false);
+  APInt MinU1(1, 0, false);
+  EXPECT_EQ(1u, abdu(MaxU1, MinU1).getZExtValue());
+  EXPECT_EQ(1u, abdu(MinU1, MaxU1).getZExtValue());
+
+  APInt MaxU4(4, 15, false);
+  APInt MinU4(4, 0, false);
+  EXPECT_EQ(15u, abdu(MaxU4, MinU4).getZExtValue());
+  EXPECT_EQ(15u, abdu(MinU4, MaxU4).getZExtValue());
+
+  APInt MaxS8(8, 127, true);
+  APInt MinS8(8, -128, true);
+  EXPECT_EQ(1u, abdu(MaxS8, MinS8).getZExtValue());
+  EXPECT_EQ(1u, abdu(MinS8, MaxS8).getZExtValue());
+
+  APInt MaxU16(16, 65535, false);
+  APInt MinU16(16, 0, false);
+  EXPECT_EQ(65535u, abdu(MaxU16, MinU16).getZExtValue());
+  EXPECT_EQ(65535u, abdu(MinU16, MaxU16).getZExtValue());
+
+  APInt MaxS16(16, 32767, true);
+  APInt MinS16(16, -32768, true);
+  APInt ZeroS16(16, 0, true);
+  EXPECT_EQ(1u, abdu(MaxS16, MinS16).getZExtValue());
+  EXPECT_EQ(1u, abdu(MinS16, MaxS16).getZExtValue());
+  EXPECT_EQ(32768u, abdu(ZeroS16, MinS16));
+  EXPECT_EQ(32768u, abdu(MinS16, ZeroS16));
+  EXPECT_EQ(32767u, abdu(ZeroS16, MaxS16));
+  EXPECT_EQ(32767u, abdu(MaxS16, ZeroS16));
 }
 
 TEST(APIntTest, GCD) {
diff --git a/llvm/unittests/Support/KnownBitsTest.cpp b/llvm/unittests/Support/KnownBitsTest.cpp
index 7c183e9626f9..2ac25f0b2801 100644
--- a/llvm/unittests/Support/KnownBitsTest.cpp
+++ b/llvm/unittests/Support/KnownBitsTest.cpp
@@ -361,9 +361,7 @@ TEST(KnownBitsTest, BinaryExhaustive) {
       [](const KnownBits &Known1, const KnownBits &Known2) {
         return KnownBits::absdiff(Known1, Known2);
       },
-      [](const APInt &N1, const APInt &N2) {
-        return APIntOps::absdiff(N1, N2);
-      },
+      [](const APInt &N1, const APInt &N2) { return APIntOps::abdu(N1, N2); },
       checkCorrectnessOnlyBinary);
   testBinaryOpExhaustive(
       [](const KnownBits &Known1, const KnownBits &Known2) {
-- 
cgit v1.2.3


From a7ef83f005beeb3b1c7f34d44167b5abc5b6c4e5 Mon Sep 17 00:00:00 2001
From: Dani <daniel.kiss@arm.com>
Date: Tue, 12 Mar 2024 12:36:05 +0100
Subject: [AArch64][SME] Add BTI and No Exec Stack markers to sme-abi.S
 (#84895)

Adding BTI landing pads compiler-rt is built with -mbranch-protectoin.
Tabulators are changed to 2 spaces for consistency.
---
 compiler-rt/lib/builtins/aarch64/sme-abi.S | 34 +++++++++++++++++++-----------
 1 file changed, 22 insertions(+), 12 deletions(-)

diff --git a/compiler-rt/lib/builtins/aarch64/sme-abi.S b/compiler-rt/lib/builtins/aarch64/sme-abi.S
index d470ecaf7aaa..4c0ff66931db 100644
--- a/compiler-rt/lib/builtins/aarch64/sme-abi.S
+++ b/compiler-rt/lib/builtins/aarch64/sme-abi.S
@@ -26,9 +26,10 @@
 // abort(). Note that there is no need to preserve any state before the call,
 // because the function does not return.
 DEFINE_COMPILERRT_PRIVATE_FUNCTION(do_abort)
-.cfi_startproc
-	.variant_pcs SYMBOL_NAME(do_abort)
-	stp	x29, x30, [sp, #-32]!
+  .cfi_startproc
+  .variant_pcs SYMBOL_NAME(do_abort)
+  BTI_C
+  stp  x29, x30, [sp, #-32]!
   cntd x0
   // Store VG to a stack location that we describe with .cfi_offset
   str x0, [sp, #16]
@@ -36,22 +37,23 @@ DEFINE_COMPILERRT_PRIVATE_FUNCTION(do_abort)
   .cfi_offset w30, -24
   .cfi_offset w29, -32
   .cfi_offset 46, -16
-	bl	__arm_sme_state
-	tbz	x0, #0, 2f
+  bl  __arm_sme_state
+  tbz  x0, #0, 2f
 1:
-	smstop sm
+  smstop sm
 2:
   // We can't make this into a tail-call because the unwinder would
   // need to restore the value of VG.
-	bl	SYMBOL_NAME(abort)
-.cfi_endproc
+  bl  SYMBOL_NAME(abort)
+  .cfi_endproc
 END_COMPILERRT_FUNCTION(do_abort)
 
 // __arm_sme_state fills the result registers based on a local
 // that is set as part of the compiler-rt startup code.
 //   __aarch64_has_sme_and_tpidr2_el0
 DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(__arm_sme_state)
-	.variant_pcs __arm_sme_state
+  .variant_pcs __arm_sme_state
+  BTI_C
   mov x0, xzr
   mov x1, xzr
 
@@ -68,7 +70,8 @@ DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(__arm_sme_state)
 END_COMPILERRT_OUTLINE_FUNCTION(__arm_sme_state)
 
 DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(__arm_tpidr2_restore)
-	.variant_pcs __arm_tpidr2_restore
+  .variant_pcs __arm_tpidr2_restore
+  BTI_C
   // If TPIDR2_EL0 is nonnull, the subroutine aborts in some platform-specific
   // manner.
   mrs x14, TPIDR2_EL0
@@ -103,7 +106,8 @@ DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(__arm_tpidr2_restore)
 END_COMPILERRT_OUTLINE_FUNCTION(__arm_tpidr2_restore)
 
 DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(__arm_tpidr2_save)
-	.variant_pcs __arm_tpidr2_restore
+  .variant_pcs __arm_tpidr2_restore
+  BTI_C
   // If the current thread does not have access to TPIDR2_EL0, the subroutine
   // does nothing.
   adrp  x14, TPIDR2_SYMBOL
@@ -143,7 +147,8 @@ DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(__arm_tpidr2_save)
 END_COMPILERRT_OUTLINE_FUNCTION(__arm_tpidr2_save)
 
 DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(__arm_za_disable)
-	.variant_pcs __arm_tpidr2_restore
+  .variant_pcs __arm_tpidr2_restore
+  BTI_C
   // If the current thread does not have access to SME, the subroutine does
   // nothing.
   adrp  x14, TPIDR2_SYMBOL
@@ -174,3 +179,8 @@ DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(__arm_za_disable)
 0:
   ret
 END_COMPILERRT_OUTLINE_FUNCTION(__arm_za_disable)
+
+NO_EXEC_STACK_DIRECTIVE
+
+// GNU property note for BTI and PAC
+GNU_PROPERTY_BTI_PAC
-- 
cgit v1.2.3


From eb319708dc5371bc560c301742abcf94cc5b3de5 Mon Sep 17 00:00:00 2001
From: Aaron Ballman <aaron@aaronballman.com>
Date: Tue, 12 Mar 2024 07:40:27 -0400
Subject: Add bit-precise overloads for builtin operators  (#84755)

We previously were not adding them to the candidate set and so use of a
bit-precise integer as a class member could lead to ambiguous overload
sets.

Fixes https://github.com/llvm/llvm-project/issues/82998
---
 clang/docs/ReleaseNotes.rst            |  3 +++
 clang/lib/Sema/SemaOverload.cpp        | 29 +++++++++++++++++++++--
 clang/test/SemaCXX/overload-bitint.cpp | 42 ++++++++++++++++++++++++++++++++++
 3 files changed, 72 insertions(+), 2 deletions(-)
 create mode 100644 clang/test/SemaCXX/overload-bitint.cpp

diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 88e552d5c461..4a08b78d78b6 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -265,6 +265,9 @@ Bug Fixes in This Version
   operator.
   Fixes (#GH83267).
 
+- Clang now correctly generates overloads for bit-precise integer types for
+  builtin operators in C++. Fixes #GH82998.
+
 Bug Fixes to Compiler Builtins
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
diff --git a/clang/lib/Sema/SemaOverload.cpp b/clang/lib/Sema/SemaOverload.cpp
index b0c693f078ef..f6bd85bdc646 100644
--- a/clang/lib/Sema/SemaOverload.cpp
+++ b/clang/lib/Sema/SemaOverload.cpp
@@ -8516,6 +8516,9 @@ class BuiltinCandidateTypeSet  {
   /// candidates.
   TypeSet MatrixTypes;
 
+  /// The set of _BitInt types that will be used in the built-in candidates.
+  TypeSet BitIntTypes;
+
   /// A flag indicating non-record types are viable candidates
   bool HasNonRecordTypes;
 
@@ -8564,6 +8567,7 @@ public:
   }
   llvm::iterator_range<iterator> vector_types() { return VectorTypes; }
   llvm::iterator_range<iterator> matrix_types() { return MatrixTypes; }
+  llvm::iterator_range<iterator> bitint_types() { return BitIntTypes; }
 
   bool containsMatrixType(QualType Ty) const { return MatrixTypes.count(Ty); }
   bool hasNonRecordTypes() { return HasNonRecordTypes; }
@@ -8735,6 +8739,9 @@ BuiltinCandidateTypeSet::AddTypesConvertedFrom(QualType Ty,
   } else if (Ty->isEnumeralType()) {
     HasArithmeticOrEnumeralTypes = true;
     EnumerationTypes.insert(Ty);
+  } else if (Ty->isBitIntType()) {
+    HasArithmeticOrEnumeralTypes = true;
+    BitIntTypes.insert(Ty);
   } else if (Ty->isVectorType()) {
     // We treat vector types as arithmetic types in many contexts as an
     // extension.
@@ -8913,7 +8920,7 @@ class BuiltinOperatorOverloadBuilder {
   SmallVectorImpl<BuiltinCandidateTypeSet> &CandidateTypes;
   OverloadCandidateSet &CandidateSet;
 
-  static constexpr int ArithmeticTypesCap = 24;
+  static constexpr int ArithmeticTypesCap = 26;
   SmallVector<CanQualType, ArithmeticTypesCap> ArithmeticTypes;
 
   // Define some indices used to iterate over the arithmetic types in
@@ -8955,6 +8962,20 @@ class BuiltinOperatorOverloadBuilder {
         (S.Context.getAuxTargetInfo() &&
          S.Context.getAuxTargetInfo()->hasInt128Type()))
       ArithmeticTypes.push_back(S.Context.UnsignedInt128Ty);
+
+    /// We add candidates for the unique, unqualified _BitInt types present in
+    /// the candidate type set. The candidate set already handled ensuring the
+    /// type is unqualified and canonical, but because we're adding from N
+    /// different sets, we need to do some extra work to unique things. Insert
+    /// the candidates into a unique set, then move from that set into the list
+    /// of arithmetic types.
+    llvm::SmallSetVector<CanQualType, 2> BitIntCandidates;
+    llvm::for_each(CandidateTypes, [&BitIntCandidates](
+                                       BuiltinCandidateTypeSet &Candidate) {
+      for (QualType BitTy : Candidate.bitint_types())
+        BitIntCandidates.insert(CanQualType::CreateUnsafe(BitTy));
+    });
+    llvm::move(BitIntCandidates, std::back_inserter(ArithmeticTypes));
     LastPromotedIntegralType = ArithmeticTypes.size();
     LastPromotedArithmeticType = ArithmeticTypes.size();
     // End of promoted types.
@@ -8975,7 +8996,11 @@ class BuiltinOperatorOverloadBuilder {
     // End of integral types.
     // FIXME: What about complex? What about half?
 
-    assert(ArithmeticTypes.size() <= ArithmeticTypesCap &&
+    // We don't know for sure how many bit-precise candidates were involved, so
+    // we subtract those from the total when testing whether we're under the
+    // cap or not.
+    assert(ArithmeticTypes.size() - BitIntCandidates.size() <=
+               ArithmeticTypesCap &&
            "Enough inline storage for all arithmetic types.");
   }
 
diff --git a/clang/test/SemaCXX/overload-bitint.cpp b/clang/test/SemaCXX/overload-bitint.cpp
new file mode 100644
index 000000000000..b834a3b01fed
--- /dev/null
+++ b/clang/test/SemaCXX/overload-bitint.cpp
@@ -0,0 +1,42 @@
+// RUN: %clang_cc1 -std=c++20 %s -verify
+// expected-no-diagnostics
+
+#include "Inputs/std-compare.h"
+
+struct S {
+  _BitInt(12) a;
+
+  constexpr operator _BitInt(12)() const { return a; }
+};
+
+// None of these used to compile because we weren't adding _BitInt types to the
+// overload set for builtin operators. See GH82998.
+static_assert(S{10} < 11);
+static_assert(S{10} <= 11);
+static_assert(S{12} > 11);
+static_assert(S{12} >= 11);
+static_assert(S{10} == 10);
+static_assert((S{10} <=> 10) == 0);
+static_assert(S{10} != 11);
+static_assert(S{10} + 0 == 10);
+static_assert(S{10} - 0 == 10);
+static_assert(S{10} * 1 == 10);
+static_assert(S{10} / 1 == 10);
+static_assert(S{10} % 1 == 0);
+static_assert(S{10} << 0 == 10);
+static_assert(S{10} >> 0 == 10);
+static_assert((S{10} | 0) == 10);
+static_assert((S{10} & 10) == 10);
+static_assert((S{10} ^ 0) == 10);
+static_assert(-S{10} == -10);
+static_assert(+S{10} == +10);
+static_assert(~S{10} == ~10);
+
+struct A {
+  _BitInt(12) a;
+
+  bool operator==(const A&) const = default;
+  bool operator!=(const A&) const = default;
+  std::strong_ordering operator<=>(const A&) const = default;
+};
+
-- 
cgit v1.2.3


From 4e3310a81391fbc283d263715a68d8732e73d01d Mon Sep 17 00:00:00 2001
From: mikaoP <raul.penacoba@bsc.es>
Date: Tue, 12 Mar 2024 12:50:35 +0100
Subject: [clang] Fix OMPT ident flag in combined distribute parallel for
 pragma (#80987)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Authored-by: Raúl Peñacoba Veigas <rpenacob@bsc.es>
---
 clang/lib/CodeGen/CGOpenMPRuntime.cpp              |  3 +
 clang/lib/CodeGen/CGStmtOpenMP.cpp                 | 15 ++--
 clang/lib/CodeGen/CodeGenFunction.h                |  2 +
 clang/test/OpenMP/amdgcn_target_device_vla.cpp     |  2 +-
 .../OpenMP/amdgpu_target_with_aligned_attribute.c  |  2 +-
 clang/test/OpenMP/bug60602.cpp                     |  2 +-
 .../OpenMP/distribute_parallel_for_codegen.cpp     | 64 +++++++--------
 ...istribute_parallel_for_firstprivate_codegen.cpp | 12 +--
 .../OpenMP/distribute_parallel_for_if_codegen.cpp  | 16 ++--
 ...distribute_parallel_for_lastprivate_codegen.cpp | 12 +--
 ...distribute_parallel_for_num_threads_codegen.cpp | 48 +++++------
 .../distribute_parallel_for_private_codegen.cpp    | 12 +--
 .../distribute_parallel_for_proc_bind_codegen.cpp  |  6 +-
 ...tribute_parallel_for_reduction_task_codegen.cpp | 34 ++++----
 .../distribute_parallel_for_simd_codegen.cpp       | 60 +++++++-------
 ...bute_parallel_for_simd_firstprivate_codegen.cpp | 12 +--
 .../distribute_parallel_for_simd_if_codegen.cpp    | 68 +++++++--------
 ...ibute_parallel_for_simd_lastprivate_codegen.cpp | 12 +--
 ...ibute_parallel_for_simd_num_threads_codegen.cpp | 48 +++++------
 ...istribute_parallel_for_simd_private_codegen.cpp | 12 +--
 ...tribute_parallel_for_simd_proc_bind_codegen.cpp |  6 +-
 .../OpenMP/metadirective_device_arch_codegen.cpp   |  2 +-
 clang/test/OpenMP/nvptx_SPMD_codegen.cpp           | 96 +++++++++++-----------
 ...tx_distribute_parallel_generic_mode_codegen.cpp |  4 +-
 ...arget_teams_distribute_parallel_for_codegen.cpp | 36 ++++----
 ...istribute_parallel_for_generic_mode_codegen.cpp |  4 +-
 ..._teams_distribute_parallel_for_simd_codegen.cpp | 16 ++--
 .../nvptx_target_teams_generic_loop_codegen.cpp    | 36 ++++----
 ...get_teams_generic_loop_generic_mode_codegen.cpp |  8 +-
 clang/test/OpenMP/reduction_implicit_map.cpp       |  4 +-
 .../OpenMP/target_ompx_dyn_cgroup_mem_codegen.cpp  |  8 +-
 ...arget_teams_distribute_parallel_for_codegen.cpp | 30 +++----
 ...ms_distribute_parallel_for_collapse_codegen.cpp | 12 +--
 ...stribute_parallel_for_dist_schedule_codegen.cpp | 36 ++++----
 ...istribute_parallel_for_firstprivate_codegen.cpp | 20 ++---
 ...et_teams_distribute_parallel_for_if_codegen.cpp | 16 ++--
 ...distribute_parallel_for_lastprivate_codegen.cpp | 12 +--
 ...teams_distribute_parallel_for_order_codegen.cpp |  2 +-
 ...ams_distribute_parallel_for_private_codegen.cpp | 20 ++---
 ...s_distribute_parallel_for_proc_bind_codegen.cpp |  6 +-
 ...s_distribute_parallel_for_reduction_codegen.cpp | 10 +--
 ...tribute_parallel_for_reduction_task_codegen.cpp | 38 ++++-----
 ...ms_distribute_parallel_for_schedule_codegen.cpp | 72 ++++++++--------
 ..._teams_distribute_parallel_for_simd_codegen.cpp | 24 +++---
 ...stribute_parallel_for_simd_collapse_codegen.cpp | 12 +--
 ...ute_parallel_for_simd_dist_schedule_codegen.cpp | 36 ++++----
 ...bute_parallel_for_simd_firstprivate_codegen.cpp | 20 ++---
 ...ams_distribute_parallel_for_simd_if_codegen.cpp | 68 +++++++--------
 ...ibute_parallel_for_simd_lastprivate_codegen.cpp | 12 +--
 ...istribute_parallel_for_simd_private_codegen.cpp | 20 ++---
 ...tribute_parallel_for_simd_proc_bind_codegen.cpp |  6 +-
 ...tribute_parallel_for_simd_reduction_codegen.cpp | 10 +--
 ...stribute_parallel_for_simd_schedule_codegen.cpp | 72 ++++++++--------
 .../OpenMP/target_teams_generic_loop_codegen-1.cpp | 30 +++----
 .../OpenMP/target_teams_generic_loop_codegen.cpp   | 12 +--
 .../target_teams_generic_loop_collapse_codegen.cpp | 20 ++---
 .../target_teams_generic_loop_if_codegen.cpp       | 18 ++--
 .../target_teams_generic_loop_order_codegen.cpp    |  4 +-
 .../target_teams_generic_loop_private_codegen.cpp  | 32 ++++----
 ...target_teams_generic_loop_reduction_codegen.cpp | 10 +--
 ..._teams_generic_loop_uses_allocators_codegen.cpp |  4 +-
 .../teams_distribute_parallel_for_codegen.cpp      | 28 +++----
 ...ms_distribute_parallel_for_collapse_codegen.cpp | 12 +--
 ...eams_distribute_parallel_for_copyin_codegen.cpp | 10 +--
 ...stribute_parallel_for_dist_schedule_codegen.cpp | 36 ++++----
 ...istribute_parallel_for_firstprivate_codegen.cpp | 10 +--
 .../teams_distribute_parallel_for_if_codegen.cpp   | 16 ++--
 ...distribute_parallel_for_lastprivate_codegen.cpp | 12 +--
 ...distribute_parallel_for_num_threads_codegen.cpp | 24 +++---
 ...ams_distribute_parallel_for_private_codegen.cpp | 10 +--
 ...s_distribute_parallel_for_proc_bind_codegen.cpp |  6 +-
 ...s_distribute_parallel_for_reduction_codegen.cpp | 10 +--
 ...tribute_parallel_for_reduction_task_codegen.cpp | 38 ++++-----
 ...ms_distribute_parallel_for_schedule_codegen.cpp | 72 ++++++++--------
 .../teams_distribute_parallel_for_simd_codegen.cpp | 24 +++---
 ...stribute_parallel_for_simd_collapse_codegen.cpp | 12 +--
 ...ute_parallel_for_simd_dist_schedule_codegen.cpp | 36 ++++----
 ...bute_parallel_for_simd_firstprivate_codegen.cpp | 10 +--
 ...ams_distribute_parallel_for_simd_if_codegen.cpp | 68 +++++++--------
 ...ibute_parallel_for_simd_lastprivate_codegen.cpp | 12 +--
 ...ibute_parallel_for_simd_num_threads_codegen.cpp | 24 +++---
 ...istribute_parallel_for_simd_private_codegen.cpp | 10 +--
 ...tribute_parallel_for_simd_proc_bind_codegen.cpp |  6 +-
 ...tribute_parallel_for_simd_reduction_codegen.cpp | 10 +--
 ...stribute_parallel_for_simd_schedule_codegen.cpp | 72 ++++++++--------
 clang/test/OpenMP/teams_generic_loop_codegen-1.cpp | 40 ++++-----
 clang/test/OpenMP/teams_generic_loop_codegen.cpp   | 24 +++---
 .../OpenMP/teams_generic_loop_collapse_codegen.cpp | 20 ++---
 .../OpenMP/teams_generic_loop_private_codegen.cpp  | 16 ++--
 .../teams_generic_loop_reduction_codegen.cpp       | 16 ++--
 90 files changed, 1019 insertions(+), 1011 deletions(-)

diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index a7b72df6d9f8..e8a68dbcc687 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -2647,6 +2647,9 @@ void CGOpenMPRuntime::emitDistributeStaticInit(
 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
                                           SourceLocation Loc,
                                           OpenMPDirectiveKind DKind) {
+  assert(DKind == OMPD_distribute || DKind == OMPD_for ||
+         DKind == OMPD_sections &&
+             "Expected distribute, for, or sections directive kind");
   if (!CGF.HaveInsertPoint())
     return;
   // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp
index 3fbd2e03eb61..452ce6983f6a 100644
--- a/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -2910,10 +2910,10 @@ void CodeGenFunction::EmitOMPOuterLoop(
   EmitBlock(LoopExit.getBlock());
 
   // Tell the runtime we are done.
-  auto &&CodeGen = [DynamicOrOrdered, &S](CodeGenFunction &CGF) {
+  auto &&CodeGen = [DynamicOrOrdered, &S, &LoopArgs](CodeGenFunction &CGF) {
     if (!DynamicOrOrdered)
       CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getEndLoc(),
-                                                     S.getDirectiveKind());
+                                                     LoopArgs.DKind);
   };
   OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen);
 }
@@ -3019,6 +3019,7 @@ void CodeGenFunction::EmitOMPForOuterLoop(
   OuterLoopArgs.Cond = S.getCond();
   OuterLoopArgs.NextLB = S.getNextLowerBound();
   OuterLoopArgs.NextUB = S.getNextUpperBound();
+  OuterLoopArgs.DKind = LoopArgs.DKind;
   EmitOMPOuterLoop(DynamicOrOrdered, IsMonotonic, S, LoopScope, OuterLoopArgs,
                    emitOMPLoopBodyWithStopPoint, CodeGenOrdered);
 }
@@ -3080,6 +3081,7 @@ void CodeGenFunction::EmitOMPDistributeOuterLoop(
   OuterLoopArgs.NextUB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
                              ? S.getCombinedNextUpperBound()
                              : S.getNextUpperBound();
+  OuterLoopArgs.DKind = OMPD_distribute;
 
   EmitOMPOuterLoop(/* DynamicOrOrdered = */ false, /* IsMonotonic = */ false, S,
                    LoopScope, OuterLoopArgs, CodeGenLoopContent,
@@ -3452,15 +3454,16 @@ bool CodeGenFunction::EmitOMPWorksharingLoop(
         // Tell the runtime we are done.
         auto &&CodeGen = [&S](CodeGenFunction &CGF) {
           CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getEndLoc(),
-                                                         S.getDirectiveKind());
+                                                         OMPD_for);
         };
         OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen);
       } else {
         // Emit the outer loop, which requests its work chunk [LB..UB] from
         // runtime and runs the inner loop to process it.
-        const OMPLoopArguments LoopArguments(
+        OMPLoopArguments LoopArguments(
             LB.getAddress(*this), UB.getAddress(*this), ST.getAddress(*this),
             IL.getAddress(*this), Chunk, EUB);
+        LoopArguments.DKind = OMPD_for;
         EmitOMPForOuterLoop(ScheduleKind, IsMonotonic, S, LoopScope, Ordered,
                             LoopArguments, CGDispatchBounds);
       }
@@ -4082,7 +4085,7 @@ void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) {
     // Tell the runtime we are done.
     auto &&CodeGen = [&S](CodeGenFunction &CGF) {
       CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getEndLoc(),
-                                                     S.getDirectiveKind());
+                                                     OMPD_sections);
     };
     CGF.OMPCancelStack.emitExit(CGF, S.getDirectiveKind(), CodeGen);
     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
@@ -5782,7 +5785,7 @@ void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S,
             });
         EmitBlock(LoopExit.getBlock());
         // Tell the runtime we are done.
-        RT.emitForStaticFinish(*this, S.getEndLoc(), S.getDirectiveKind());
+        RT.emitForStaticFinish(*this, S.getEndLoc(), OMPD_distribute);
       } else {
         // Emit the outer loop, which requests its work chunk [LB..UB] from
         // runtime and runs the inner loop to process it.
diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h
index 6c825a302913..e8f8aa601ed0 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -3831,6 +3831,8 @@ private:
     Expr *NextLB = nullptr;
     /// Update of UB after a whole chunk has been executed
     Expr *NextUB = nullptr;
+    /// Distinguish between the for distribute and sections
+    OpenMPDirectiveKind DKind = llvm::omp::OMPD_unknown;
     OMPLoopArguments() = default;
     OMPLoopArguments(Address LB, Address UB, Address ST, Address IL,
                      llvm::Value *Chunk = nullptr, Expr *EUB = nullptr,
diff --git a/clang/test/OpenMP/amdgcn_target_device_vla.cpp b/clang/test/OpenMP/amdgcn_target_device_vla.cpp
index de150a0fcb4a..58fef517a9e7 100644
--- a/clang/test/OpenMP/amdgcn_target_device_vla.cpp
+++ b/clang/test/OpenMP/amdgcn_target_device_vla.cpp
@@ -539,7 +539,7 @@ int main() {
 // CHECK:       omp.loop.exit:
 // CHECK-NEXT:    [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
 // CHECK-NEXT:    [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4
-// CHECK-NEXT:    call void @__kmpc_distribute_static_fini(ptr addrspacecast (ptr addrspace(1) @[[GLOB2]] to ptr), i32 [[TMP35]])
+// CHECK-NEXT:    call void @__kmpc_for_static_fini(ptr addrspacecast (ptr addrspace(1) @[[GLOB3]] to ptr), i32 [[TMP35]])
 // CHECK-NEXT:    br label [[OMP_PRECOND_END]]
 // CHECK:       omp.precond.end:
 // CHECK-NEXT:    ret void
diff --git a/clang/test/OpenMP/amdgpu_target_with_aligned_attribute.c b/clang/test/OpenMP/amdgpu_target_with_aligned_attribute.c
index dd33e8405c34..cc0cc0def48b 100644
--- a/clang/test/OpenMP/amdgpu_target_with_aligned_attribute.c
+++ b/clang/test/OpenMP/amdgpu_target_with_aligned_attribute.c
@@ -301,7 +301,7 @@ void write_to_aligned_array(int *a, int N) {
 // CHECK-AMD:       omp.loop.exit:
 // CHECK-AMD-NEXT:    [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
 // CHECK-AMD-NEXT:    [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4
-// CHECK-AMD-NEXT:    call void @__kmpc_distribute_static_fini(ptr addrspacecast (ptr addrspace(1) @[[GLOB2]] to ptr), i32 [[TMP18]])
+// CHECK-AMD-NEXT:    call void @__kmpc_for_static_fini(ptr addrspacecast (ptr addrspace(1) @[[GLOB3]] to ptr), i32 [[TMP18]])
 // CHECK-AMD-NEXT:    br label [[OMP_PRECOND_END]]
 // CHECK-AMD:       omp.precond.end:
 // CHECK-AMD-NEXT:    ret void
diff --git a/clang/test/OpenMP/bug60602.cpp b/clang/test/OpenMP/bug60602.cpp
index 3ecc70cab778..48dc341a0822 100644
--- a/clang/test/OpenMP/bug60602.cpp
+++ b/clang/test/OpenMP/bug60602.cpp
@@ -564,7 +564,7 @@ int kernel_within_loop(int *a, int *b, int N, int num_iters) {
 // CHECK:       omp.loop.exit:
 // CHECK-NEXT:    [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK-NEXT:    [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4
-// CHECK-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP23]])
+// CHECK-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP23]])
 // CHECK-NEXT:    br label [[OMP_PRECOND_END]]
 // CHECK:       omp.precond.end:
 // CHECK-NEXT:    ret void
diff --git a/clang/test/OpenMP/distribute_parallel_for_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_codegen.cpp
index 95adefa8020f..9cb0a1553065 100644
--- a/clang/test/OpenMP/distribute_parallel_for_codegen.cpp
+++ b/clang/test/OpenMP/distribute_parallel_for_codegen.cpp
@@ -1027,7 +1027,7 @@ int main() {
 // CHECK1:       omp.loop.exit:
 // CHECK1-NEXT:    [[TMP33:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK1-NEXT:    [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP34]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP34]])
 // CHECK1-NEXT:    br label [[OMP_PRECOND_END]]
 // CHECK1:       omp.precond.end:
 // CHECK1-NEXT:    ret void
@@ -1266,7 +1266,7 @@ int main() {
 // CHECK1:       omp.loop.exit:
 // CHECK1-NEXT:    [[TMP33:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK1-NEXT:    [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP34]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP34]])
 // CHECK1-NEXT:    br label [[OMP_PRECOND_END]]
 // CHECK1:       omp.precond.end:
 // CHECK1-NEXT:    ret void
@@ -1535,7 +1535,7 @@ int main() {
 // CHECK1:       omp.loop.exit:
 // CHECK1-NEXT:    [[TMP33:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK1-NEXT:    [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP34]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP34]])
 // CHECK1-NEXT:    br label [[OMP_PRECOND_END]]
 // CHECK1:       omp.precond.end:
 // CHECK1-NEXT:    ret void
@@ -1774,7 +1774,7 @@ int main() {
 // CHECK1:       omp.loop.exit:
 // CHECK1-NEXT:    [[TMP33:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK1-NEXT:    [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP34]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP34]])
 // CHECK1-NEXT:    br label [[OMP_PRECOND_END]]
 // CHECK1:       omp.precond.end:
 // CHECK1-NEXT:    ret void
@@ -2047,7 +2047,7 @@ int main() {
 // CHECK1:       omp.dispatch.end:
 // CHECK1-NEXT:    [[TMP40:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK1-NEXT:    [[TMP41:%.*]] = load i32, ptr [[TMP40]], align 4
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP41]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP41]])
 // CHECK1-NEXT:    br label [[OMP_PRECOND_END]]
 // CHECK1:       omp.precond.end:
 // CHECK1-NEXT:    ret void
@@ -2791,7 +2791,7 @@ int main() {
 // CHECK3:       omp.loop.exit:
 // CHECK3-NEXT:    [[TMP33:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK3-NEXT:    [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4
-// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP34]])
+// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP34]])
 // CHECK3-NEXT:    br label [[OMP_PRECOND_END]]
 // CHECK3:       omp.precond.end:
 // CHECK3-NEXT:    ret void
@@ -3023,7 +3023,7 @@ int main() {
 // CHECK3:       omp.loop.exit:
 // CHECK3-NEXT:    [[TMP33:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK3-NEXT:    [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4
-// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP34]])
+// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP34]])
 // CHECK3-NEXT:    br label [[OMP_PRECOND_END]]
 // CHECK3:       omp.precond.end:
 // CHECK3-NEXT:    ret void
@@ -3285,7 +3285,7 @@ int main() {
 // CHECK3:       omp.loop.exit:
 // CHECK3-NEXT:    [[TMP33:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK3-NEXT:    [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4
-// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP34]])
+// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP34]])
 // CHECK3-NEXT:    br label [[OMP_PRECOND_END]]
 // CHECK3:       omp.precond.end:
 // CHECK3-NEXT:    ret void
@@ -3517,7 +3517,7 @@ int main() {
 // CHECK3:       omp.loop.exit:
 // CHECK3-NEXT:    [[TMP33:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK3-NEXT:    [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4
-// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP34]])
+// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP34]])
 // CHECK3-NEXT:    br label [[OMP_PRECOND_END]]
 // CHECK3:       omp.precond.end:
 // CHECK3-NEXT:    ret void
@@ -3781,7 +3781,7 @@ int main() {
 // CHECK3:       omp.dispatch.end:
 // CHECK3-NEXT:    [[TMP40:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK3-NEXT:    [[TMP41:%.*]] = load i32, ptr [[TMP40]], align 4
-// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP41]])
+// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP41]])
 // CHECK3-NEXT:    br label [[OMP_PRECOND_END]]
 // CHECK3:       omp.precond.end:
 // CHECK3-NEXT:    ret void
@@ -5108,7 +5108,7 @@ int main() {
 // CHECK9:       omp.loop.exit:
 // CHECK9-NEXT:    [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK9-NEXT:    [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4
-// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]])
+// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP30]])
 // CHECK9-NEXT:    br label [[OMP_PRECOND_END]]
 // CHECK9:       omp.precond.end:
 // CHECK9-NEXT:    ret void
@@ -5337,7 +5337,7 @@ int main() {
 // CHECK9:       omp.loop.exit:
 // CHECK9-NEXT:    [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK9-NEXT:    [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4
-// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]])
+// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP30]])
 // CHECK9-NEXT:    br label [[OMP_PRECOND_END]]
 // CHECK9:       omp.precond.end:
 // CHECK9-NEXT:    ret void
@@ -5596,7 +5596,7 @@ int main() {
 // CHECK9:       omp.loop.exit:
 // CHECK9-NEXT:    [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK9-NEXT:    [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4
-// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]])
+// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP30]])
 // CHECK9-NEXT:    br label [[OMP_PRECOND_END]]
 // CHECK9:       omp.precond.end:
 // CHECK9-NEXT:    ret void
@@ -5825,7 +5825,7 @@ int main() {
 // CHECK9:       omp.loop.exit:
 // CHECK9-NEXT:    [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK9-NEXT:    [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4
-// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]])
+// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP30]])
 // CHECK9-NEXT:    br label [[OMP_PRECOND_END]]
 // CHECK9:       omp.precond.end:
 // CHECK9-NEXT:    ret void
@@ -6088,7 +6088,7 @@ int main() {
 // CHECK9:       omp.dispatch.end:
 // CHECK9-NEXT:    [[TMP36:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK9-NEXT:    [[TMP37:%.*]] = load i32, ptr [[TMP36]], align 4
-// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP37]])
+// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP37]])
 // CHECK9-NEXT:    br label [[OMP_PRECOND_END]]
 // CHECK9:       omp.precond.end:
 // CHECK9-NEXT:    ret void
@@ -7414,12 +7414,12 @@ int main() {
 // CHECK9:       omp.loop.exit:
 // CHECK9-NEXT:    [[TMP33:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK9-NEXT:    [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4
-// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP34]])
+// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP34]])
 // CHECK9-NEXT:    br label [[OMP_PRECOND_END]]
 // CHECK9:       cancel.exit:
 // CHECK9-NEXT:    [[TMP35:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK9-NEXT:    [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4
-// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP36]])
+// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP36]])
 // CHECK9-NEXT:    br label [[CANCEL_CONT:%.*]]
 // CHECK9:       omp.precond.end:
 // CHECK9-NEXT:    br label [[CANCEL_CONT]]
@@ -7650,7 +7650,7 @@ int main() {
 // CHECK9:       omp.loop.exit:
 // CHECK9-NEXT:    [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK9-NEXT:    [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4
-// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]])
+// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP30]])
 // CHECK9-NEXT:    br label [[OMP_PRECOND_END]]
 // CHECK9:       omp.precond.end:
 // CHECK9-NEXT:    ret void
@@ -7909,7 +7909,7 @@ int main() {
 // CHECK9:       omp.loop.exit:
 // CHECK9-NEXT:    [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK9-NEXT:    [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4
-// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]])
+// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP30]])
 // CHECK9-NEXT:    br label [[OMP_PRECOND_END]]
 // CHECK9:       omp.precond.end:
 // CHECK9-NEXT:    ret void
@@ -8138,7 +8138,7 @@ int main() {
 // CHECK9:       omp.loop.exit:
 // CHECK9-NEXT:    [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK9-NEXT:    [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4
-// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]])
+// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP30]])
 // CHECK9-NEXT:    br label [[OMP_PRECOND_END]]
 // CHECK9:       omp.precond.end:
 // CHECK9-NEXT:    ret void
@@ -8401,7 +8401,7 @@ int main() {
 // CHECK9:       omp.dispatch.end:
 // CHECK9-NEXT:    [[TMP36:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK9-NEXT:    [[TMP37:%.*]] = load i32, ptr [[TMP36]], align 4
-// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP37]])
+// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP37]])
 // CHECK9-NEXT:    br label [[OMP_PRECOND_END]]
 // CHECK9:       omp.precond.end:
 // CHECK9-NEXT:    ret void
@@ -9715,7 +9715,7 @@ int main() {
 // CHECK11:       omp.loop.exit:
 // CHECK11-NEXT:    [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK11-NEXT:    [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4
-// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]])
+// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP30]])
 // CHECK11-NEXT:    br label [[OMP_PRECOND_END]]
 // CHECK11:       omp.precond.end:
 // CHECK11-NEXT:    ret void
@@ -9937,7 +9937,7 @@ int main() {
 // CHECK11:       omp.loop.exit:
 // CHECK11-NEXT:    [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK11-NEXT:    [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4
-// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]])
+// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP30]])
 // CHECK11-NEXT:    br label [[OMP_PRECOND_END]]
 // CHECK11:       omp.precond.end:
 // CHECK11-NEXT:    ret void
@@ -10189,7 +10189,7 @@ int main() {
 // CHECK11:       omp.loop.exit:
 // CHECK11-NEXT:    [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK11-NEXT:    [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4
-// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]])
+// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP30]])
 // CHECK11-NEXT:    br label [[OMP_PRECOND_END]]
 // CHECK11:       omp.precond.end:
 // CHECK11-NEXT:    ret void
@@ -10411,7 +10411,7 @@ int main() {
 // CHECK11:       omp.loop.exit:
 // CHECK11-NEXT:    [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK11-NEXT:    [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4
-// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]])
+// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP30]])
 // CHECK11-NEXT:    br label [[OMP_PRECOND_END]]
 // CHECK11:       omp.precond.end:
 // CHECK11-NEXT:    ret void
@@ -10665,7 +10665,7 @@ int main() {
 // CHECK11:       omp.dispatch.end:
 // CHECK11-NEXT:    [[TMP36:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK11-NEXT:    [[TMP37:%.*]] = load i32, ptr [[TMP36]], align 4
-// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP37]])
+// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP37]])
 // CHECK11-NEXT:    br label [[OMP_PRECOND_END]]
 // CHECK11:       omp.precond.end:
 // CHECK11-NEXT:    ret void
@@ -11970,12 +11970,12 @@ int main() {
 // CHECK11:       omp.loop.exit:
 // CHECK11-NEXT:    [[TMP33:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK11-NEXT:    [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4
-// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP34]])
+// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP34]])
 // CHECK11-NEXT:    br label [[OMP_PRECOND_END]]
 // CHECK11:       cancel.exit:
 // CHECK11-NEXT:    [[TMP35:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK11-NEXT:    [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4
-// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP36]])
+// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP36]])
 // CHECK11-NEXT:    br label [[CANCEL_CONT:%.*]]
 // CHECK11:       omp.precond.end:
 // CHECK11-NEXT:    br label [[CANCEL_CONT]]
@@ -12199,7 +12199,7 @@ int main() {
 // CHECK11:       omp.loop.exit:
 // CHECK11-NEXT:    [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK11-NEXT:    [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4
-// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]])
+// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP30]])
 // CHECK11-NEXT:    br label [[OMP_PRECOND_END]]
 // CHECK11:       omp.precond.end:
 // CHECK11-NEXT:    ret void
@@ -12451,7 +12451,7 @@ int main() {
 // CHECK11:       omp.loop.exit:
 // CHECK11-NEXT:    [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK11-NEXT:    [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4
-// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]])
+// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP30]])
 // CHECK11-NEXT:    br label [[OMP_PRECOND_END]]
 // CHECK11:       omp.precond.end:
 // CHECK11-NEXT:    ret void
@@ -12673,7 +12673,7 @@ int main() {
 // CHECK11:       omp.loop.exit:
 // CHECK11-NEXT:    [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK11-NEXT:    [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4
-// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]])
+// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP30]])
 // CHECK11-NEXT:    br label [[OMP_PRECOND_END]]
 // CHECK11:       omp.precond.end:
 // CHECK11-NEXT:    ret void
@@ -12927,7 +12927,7 @@ int main() {
 // CHECK11:       omp.dispatch.end:
 // CHECK11-NEXT:    [[TMP36:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK11-NEXT:    [[TMP37:%.*]] = load i32, ptr [[TMP36]], align 4
-// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP37]])
+// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP37]])
 // CHECK11-NEXT:    br label [[OMP_PRECOND_END]]
 // CHECK11:       omp.precond.end:
 // CHECK11-NEXT:    ret void
diff --git a/clang/test/OpenMP/distribute_parallel_for_firstprivate_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_firstprivate_codegen.cpp
index 46c115e40e43..6084a9a6cf93 100644
--- a/clang/test/OpenMP/distribute_parallel_for_firstprivate_codegen.cpp
+++ b/clang/test/OpenMP/distribute_parallel_for_firstprivate_codegen.cpp
@@ -500,7 +500,7 @@ int main() {
 // CHECK1:       omp.inner.for.end:
 // CHECK1-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK1:       omp.loop.exit:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK1-NEXT:    ret void
 //
 //
@@ -748,7 +748,7 @@ int main() {
 // CHECK3:       omp.inner.for.end:
 // CHECK3-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK3:       omp.loop.exit:
-// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]])
+// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP5]])
 // CHECK3-NEXT:    ret void
 //
 //
@@ -1167,7 +1167,7 @@ int main() {
 // CHECK8:       omp.loop.exit:
 // CHECK8-NEXT:    [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK8-NEXT:    [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4
-// CHECK8-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]])
+// CHECK8-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP21]])
 // CHECK8-NEXT:    call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR6]]) #[[ATTR4]]
 // CHECK8-NEXT:    [[ARRAY_BEGIN12:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR4]], i32 0, i32 0
 // CHECK8-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN12]], i64 2
@@ -1612,7 +1612,7 @@ int main() {
 // CHECK8:       omp.loop.exit:
 // CHECK8-NEXT:    [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK8-NEXT:    [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4
-// CHECK8-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]])
+// CHECK8-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP21]])
 // CHECK8-NEXT:    call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR6]]) #[[ATTR4]]
 // CHECK8-NEXT:    [[ARRAY_BEGIN12:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i32 0, i32 0
 // CHECK8-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN12]], i64 2
@@ -2080,7 +2080,7 @@ int main() {
 // CHECK10:       omp.loop.exit:
 // CHECK10-NEXT:    [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK10-NEXT:    [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4
-// CHECK10-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]])
+// CHECK10-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP21]])
 // CHECK10-NEXT:    call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR4]]
 // CHECK10-NEXT:    [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR3]], i32 0, i32 0
 // CHECK10-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN10]], i32 2
@@ -2519,7 +2519,7 @@ int main() {
 // CHECK10:       omp.loop.exit:
 // CHECK10-NEXT:    [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK10-NEXT:    [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4
-// CHECK10-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]])
+// CHECK10-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP21]])
 // CHECK10-NEXT:    call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR4]]
 // CHECK10-NEXT:    [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 0
 // CHECK10-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN10]], i32 2
diff --git a/clang/test/OpenMP/distribute_parallel_for_if_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_if_codegen.cpp
index 846e7beb5d92..d3b6654e57e2 100644
--- a/clang/test/OpenMP/distribute_parallel_for_if_codegen.cpp
+++ b/clang/test/OpenMP/distribute_parallel_for_if_codegen.cpp
@@ -329,7 +329,7 @@ int main() {
 // CHECK1:       omp.inner.for.end:
 // CHECK1-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK1:       omp.loop.exit:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK1-NEXT:    ret void
 //
 //
@@ -472,7 +472,7 @@ int main() {
 // CHECK1:       omp.inner.for.end:
 // CHECK1-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK1:       omp.loop.exit:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK1-NEXT:    ret void
 //
 //
@@ -740,7 +740,7 @@ int main() {
 // CHECK1:       omp.inner.for.end:
 // CHECK1-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK1:       omp.loop.exit:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK1-NEXT:    ret void
 //
 //
@@ -883,7 +883,7 @@ int main() {
 // CHECK1:       omp.inner.for.end:
 // CHECK1-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK1:       omp.loop.exit:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK1-NEXT:    ret void
 //
 //
@@ -1040,7 +1040,7 @@ int main() {
 // CHECK1:       omp.inner.for.end:
 // CHECK1-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK1:       omp.loop.exit:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK1-NEXT:    ret void
 //
 //
@@ -1306,7 +1306,7 @@ int main() {
 // CHECK1:       omp.inner.for.end:
 // CHECK1-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK1:       omp.loop.exit:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK1-NEXT:    ret void
 //
 //
@@ -1449,7 +1449,7 @@ int main() {
 // CHECK1:       omp.inner.for.end:
 // CHECK1-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK1:       omp.loop.exit:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK1-NEXT:    ret void
 //
 //
@@ -1606,6 +1606,6 @@ int main() {
 // CHECK1:       omp.inner.for.end:
 // CHECK1-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK1:       omp.loop.exit:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK1-NEXT:    ret void
 //
diff --git a/clang/test/OpenMP/distribute_parallel_for_lastprivate_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_lastprivate_codegen.cpp
index aa981f606cc8..1f069c4070ae 100644
--- a/clang/test/OpenMP/distribute_parallel_for_lastprivate_codegen.cpp
+++ b/clang/test/OpenMP/distribute_parallel_for_lastprivate_codegen.cpp
@@ -443,7 +443,7 @@ int main() {
 // CHECK1:       omp.inner.for.end:
 // CHECK1-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK1:       omp.loop.exit:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP8]])
 // CHECK1-NEXT:    [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK1-NEXT:    [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0
 // CHECK1-NEXT:    br i1 [[TMP23]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]]
@@ -708,7 +708,7 @@ int main() {
 // CHECK3:       omp.inner.for.end:
 // CHECK3-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK3:       omp.loop.exit:
-// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]])
+// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP8]])
 // CHECK3-NEXT:    [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK3-NEXT:    [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0
 // CHECK3-NEXT:    br i1 [[TMP23]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]]
@@ -1153,7 +1153,7 @@ int main() {
 // CHECK9:       omp.loop.exit:
 // CHECK9-NEXT:    [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK9-NEXT:    [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4
-// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]])
+// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP22]])
 // CHECK9-NEXT:    [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK9-NEXT:    [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0
 // CHECK9-NEXT:    br i1 [[TMP24]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]]
@@ -1636,7 +1636,7 @@ int main() {
 // CHECK9:       omp.loop.exit:
 // CHECK9-NEXT:    [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK9-NEXT:    [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4
-// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]])
+// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP21]])
 // CHECK9-NEXT:    [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK9-NEXT:    [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0
 // CHECK9-NEXT:    br i1 [[TMP23]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]]
@@ -2139,7 +2139,7 @@ int main() {
 // CHECK11:       omp.loop.exit:
 // CHECK11-NEXT:    [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK11-NEXT:    [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4
-// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]])
+// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP22]])
 // CHECK11-NEXT:    [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK11-NEXT:    [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0
 // CHECK11-NEXT:    br i1 [[TMP24]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]]
@@ -2616,7 +2616,7 @@ int main() {
 // CHECK11:       omp.loop.exit:
 // CHECK11-NEXT:    [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK11-NEXT:    [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4
-// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]])
+// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP21]])
 // CHECK11-NEXT:    [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK11-NEXT:    [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0
 // CHECK11-NEXT:    br i1 [[TMP23]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]]
diff --git a/clang/test/OpenMP/distribute_parallel_for_num_threads_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_num_threads_codegen.cpp
index 5d9244268d55..b6ae783b74d0 100644
--- a/clang/test/OpenMP/distribute_parallel_for_num_threads_codegen.cpp
+++ b/clang/test/OpenMP/distribute_parallel_for_num_threads_codegen.cpp
@@ -386,7 +386,7 @@ int main() {
 // CHECK1:       omp.inner.for.end:
 // CHECK1-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK1:       omp.loop.exit:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK1-NEXT:    ret void
 // CHECK1:       terminate.lpad:
 // CHECK1-NEXT:    [[TMP11:%.*]] = landingpad { ptr, i32 }
@@ -547,7 +547,7 @@ int main() {
 // CHECK1:       omp.inner.for.end:
 // CHECK1-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK1:       omp.loop.exit:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK1-NEXT:    ret void
 // CHECK1:       terminate.lpad:
 // CHECK1-NEXT:    [[TMP11:%.*]] = landingpad { ptr, i32 }
@@ -879,7 +879,7 @@ int main() {
 // CHECK1:       omp.inner.for.end:
 // CHECK1-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK1:       omp.loop.exit:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK1-NEXT:    ret void
 // CHECK1:       terminate.lpad:
 // CHECK1-NEXT:    [[TMP11:%.*]] = landingpad { ptr, i32 }
@@ -1026,7 +1026,7 @@ int main() {
 // CHECK1:       omp.inner.for.end:
 // CHECK1-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK1:       omp.loop.exit:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK1-NEXT:    ret void
 // CHECK1:       terminate.lpad:
 // CHECK1-NEXT:    [[TMP11:%.*]] = landingpad { ptr, i32 }
@@ -1173,7 +1173,7 @@ int main() {
 // CHECK1:       omp.inner.for.end:
 // CHECK1-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK1:       omp.loop.exit:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK1-NEXT:    ret void
 // CHECK1:       terminate.lpad:
 // CHECK1-NEXT:    [[TMP11:%.*]] = landingpad { ptr, i32 }
@@ -1335,7 +1335,7 @@ int main() {
 // CHECK1:       omp.inner.for.end:
 // CHECK1-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK1:       omp.loop.exit:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK1-NEXT:    ret void
 // CHECK1:       terminate.lpad:
 // CHECK1-NEXT:    [[TMP11:%.*]] = landingpad { ptr, i32 }
@@ -1638,7 +1638,7 @@ int main() {
 // CHECK5:       omp.inner.for.end:
 // CHECK5-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK5:       omp.loop.exit:
-// CHECK5-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK5-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK5-NEXT:    ret void
 // CHECK5:       terminate.lpad:
 // CHECK5-NEXT:    [[TMP11:%.*]] = landingpad { ptr, i32 }
@@ -1799,7 +1799,7 @@ int main() {
 // CHECK5:       omp.inner.for.end:
 // CHECK5-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK5:       omp.loop.exit:
-// CHECK5-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK5-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK5-NEXT:    ret void
 // CHECK5:       terminate.lpad:
 // CHECK5-NEXT:    [[TMP11:%.*]] = landingpad { ptr, i32 }
@@ -2122,7 +2122,7 @@ int main() {
 // CHECK5:       omp.inner.for.end:
 // CHECK5-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK5:       omp.loop.exit:
-// CHECK5-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK5-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK5-NEXT:    ret void
 // CHECK5:       terminate.lpad:
 // CHECK5-NEXT:    [[TMP11:%.*]] = landingpad { ptr, i32 }
@@ -2269,7 +2269,7 @@ int main() {
 // CHECK5:       omp.inner.for.end:
 // CHECK5-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK5:       omp.loop.exit:
-// CHECK5-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK5-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK5-NEXT:    ret void
 // CHECK5:       terminate.lpad:
 // CHECK5-NEXT:    [[TMP11:%.*]] = landingpad { ptr, i32 }
@@ -2416,7 +2416,7 @@ int main() {
 // CHECK5:       omp.inner.for.end:
 // CHECK5-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK5:       omp.loop.exit:
-// CHECK5-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK5-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK5-NEXT:    ret void
 // CHECK5:       terminate.lpad:
 // CHECK5-NEXT:    [[TMP11:%.*]] = landingpad { ptr, i32 }
@@ -2578,7 +2578,7 @@ int main() {
 // CHECK5:       omp.inner.for.end:
 // CHECK5-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK5:       omp.loop.exit:
-// CHECK5-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK5-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK5-NEXT:    ret void
 // CHECK5:       terminate.lpad:
 // CHECK5-NEXT:    [[TMP11:%.*]] = landingpad { ptr, i32 }
@@ -2890,7 +2890,7 @@ int main() {
 // CHECK9:       omp.inner.for.end:
 // CHECK9-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK9:       omp.loop.exit:
-// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK9-NEXT:    ret void
 // CHECK9:       terminate.lpad:
 // CHECK9-NEXT:    [[TMP11:%.*]] = landingpad { ptr, i32 }
@@ -3051,7 +3051,7 @@ int main() {
 // CHECK9:       omp.inner.for.end:
 // CHECK9-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK9:       omp.loop.exit:
-// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK9-NEXT:    ret void
 // CHECK9:       terminate.lpad:
 // CHECK9-NEXT:    [[TMP11:%.*]] = landingpad { ptr, i32 }
@@ -3383,7 +3383,7 @@ int main() {
 // CHECK9:       omp.inner.for.end:
 // CHECK9-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK9:       omp.loop.exit:
-// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK9-NEXT:    ret void
 // CHECK9:       terminate.lpad:
 // CHECK9-NEXT:    [[TMP11:%.*]] = landingpad { ptr, i32 }
@@ -3530,7 +3530,7 @@ int main() {
 // CHECK9:       omp.inner.for.end:
 // CHECK9-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK9:       omp.loop.exit:
-// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK9-NEXT:    ret void
 // CHECK9:       terminate.lpad:
 // CHECK9-NEXT:    [[TMP11:%.*]] = landingpad { ptr, i32 }
@@ -3677,7 +3677,7 @@ int main() {
 // CHECK9:       omp.inner.for.end:
 // CHECK9-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK9:       omp.loop.exit:
-// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK9-NEXT:    ret void
 // CHECK9:       terminate.lpad:
 // CHECK9-NEXT:    [[TMP11:%.*]] = landingpad { ptr, i32 }
@@ -3839,7 +3839,7 @@ int main() {
 // CHECK9:       omp.inner.for.end:
 // CHECK9-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK9:       omp.loop.exit:
-// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK9-NEXT:    ret void
 // CHECK9:       terminate.lpad:
 // CHECK9-NEXT:    [[TMP11:%.*]] = landingpad { ptr, i32 }
@@ -4142,7 +4142,7 @@ int main() {
 // CHECK13:       omp.inner.for.end:
 // CHECK13-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK13:       omp.loop.exit:
-// CHECK13-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK13-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK13-NEXT:    ret void
 // CHECK13:       terminate.lpad:
 // CHECK13-NEXT:    [[TMP11:%.*]] = landingpad { ptr, i32 }
@@ -4303,7 +4303,7 @@ int main() {
 // CHECK13:       omp.inner.for.end:
 // CHECK13-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK13:       omp.loop.exit:
-// CHECK13-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK13-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK13-NEXT:    ret void
 // CHECK13:       terminate.lpad:
 // CHECK13-NEXT:    [[TMP11:%.*]] = landingpad { ptr, i32 }
@@ -4626,7 +4626,7 @@ int main() {
 // CHECK13:       omp.inner.for.end:
 // CHECK13-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK13:       omp.loop.exit:
-// CHECK13-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK13-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK13-NEXT:    ret void
 // CHECK13:       terminate.lpad:
 // CHECK13-NEXT:    [[TMP11:%.*]] = landingpad { ptr, i32 }
@@ -4773,7 +4773,7 @@ int main() {
 // CHECK13:       omp.inner.for.end:
 // CHECK13-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK13:       omp.loop.exit:
-// CHECK13-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK13-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK13-NEXT:    ret void
 // CHECK13:       terminate.lpad:
 // CHECK13-NEXT:    [[TMP11:%.*]] = landingpad { ptr, i32 }
@@ -4920,7 +4920,7 @@ int main() {
 // CHECK13:       omp.inner.for.end:
 // CHECK13-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK13:       omp.loop.exit:
-// CHECK13-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK13-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK13-NEXT:    ret void
 // CHECK13:       terminate.lpad:
 // CHECK13-NEXT:    [[TMP11:%.*]] = landingpad { ptr, i32 }
@@ -5082,7 +5082,7 @@ int main() {
 // CHECK13:       omp.inner.for.end:
 // CHECK13-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK13:       omp.loop.exit:
-// CHECK13-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK13-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK13-NEXT:    ret void
 // CHECK13:       terminate.lpad:
 // CHECK13-NEXT:    [[TMP11:%.*]] = landingpad { ptr, i32 }
diff --git a/clang/test/OpenMP/distribute_parallel_for_private_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_private_codegen.cpp
index 249609c7d831..e2b0d64093eb 100644
--- a/clang/test/OpenMP/distribute_parallel_for_private_codegen.cpp
+++ b/clang/test/OpenMP/distribute_parallel_for_private_codegen.cpp
@@ -313,7 +313,7 @@ int main() {
 // CHECK1:       omp.inner.for.end:
 // CHECK1-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK1:       omp.loop.exit:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK1-NEXT:    ret void
 //
 //
@@ -491,7 +491,7 @@ int main() {
 // CHECK3:       omp.inner.for.end:
 // CHECK3-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK3:       omp.loop.exit:
-// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK3-NEXT:    ret void
 //
 //
@@ -795,7 +795,7 @@ int main() {
 // CHECK9:       omp.loop.exit:
 // CHECK9-NEXT:    [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK9-NEXT:    [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4
-// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP16]])
+// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP16]])
 // CHECK9-NEXT:    call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]]
 // CHECK9-NEXT:    [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0
 // CHECK9-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN8]], i64 2
@@ -1147,7 +1147,7 @@ int main() {
 // CHECK9:       omp.loop.exit:
 // CHECK9-NEXT:    [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK9-NEXT:    [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4
-// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP16]])
+// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP16]])
 // CHECK9-NEXT:    call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]]
 // CHECK9-NEXT:    [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0
 // CHECK9-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN8]], i64 2
@@ -1500,7 +1500,7 @@ int main() {
 // CHECK11:       omp.loop.exit:
 // CHECK11-NEXT:    [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK11-NEXT:    [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4
-// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP16]])
+// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP16]])
 // CHECK11-NEXT:    call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]]
 // CHECK11-NEXT:    [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0
 // CHECK11-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN6]], i32 2
@@ -1846,7 +1846,7 @@ int main() {
 // CHECK11:       omp.loop.exit:
 // CHECK11-NEXT:    [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK11-NEXT:    [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4
-// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP16]])
+// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP16]])
 // CHECK11-NEXT:    call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]]
 // CHECK11-NEXT:    [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0
 // CHECK11-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN6]], i32 2
diff --git a/clang/test/OpenMP/distribute_parallel_for_proc_bind_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_proc_bind_codegen.cpp
index 8784611d8399..040f90b9ef78 100644
--- a/clang/test/OpenMP/distribute_parallel_for_proc_bind_codegen.cpp
+++ b/clang/test/OpenMP/distribute_parallel_for_proc_bind_codegen.cpp
@@ -266,7 +266,7 @@ int main() {
 // CHECK1:       omp.inner.for.end:
 // CHECK1-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK1:       omp.loop.exit:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK1-NEXT:    ret void
 //
 //
@@ -404,7 +404,7 @@ int main() {
 // CHECK1:       omp.inner.for.end:
 // CHECK1-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK1:       omp.loop.exit:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK1-NEXT:    ret void
 //
 //
@@ -583,6 +583,6 @@ int main() {
 // CHECK1:       omp.inner.for.end:
 // CHECK1-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK1:       omp.loop.exit:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK1-NEXT:    ret void
 //
diff --git a/clang/test/OpenMP/distribute_parallel_for_reduction_task_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_reduction_task_codegen.cpp
index 7caca83c25d6..a019f09c14cb 100644
--- a/clang/test/OpenMP/distribute_parallel_for_reduction_task_codegen.cpp
+++ b/clang/test/OpenMP/distribute_parallel_for_reduction_task_codegen.cpp
@@ -328,7 +328,7 @@ int main(int argc, char **argv) {
 // CHECK1:       omp.loop.exit:
 // CHECK1-NEXT:    [[TMP78:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK1-NEXT:    [[TMP79:%.*]] = load i32, ptr [[TMP78]], align 4
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP79]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP79]])
 // CHECK1-NEXT:    [[TMP80:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK1-NEXT:    [[TMP81:%.*]] = load i32, ptr [[TMP80]], align 4
 // CHECK1-NEXT:    call void @__kmpc_task_reduction_modifier_fini(ptr @[[GLOB2]], i32 [[TMP81]], i32 1)
@@ -343,8 +343,8 @@ int main(int argc, char **argv) {
 // CHECK1-NEXT:    [[TMP87:%.*]] = load i32, ptr [[TMP86]], align 4
 // CHECK1-NEXT:    [[TMP88:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB4:[0-9]+]], i32 [[TMP87]], i32 2, i64 24, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l14.omp_outlined.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var)
 // CHECK1-NEXT:    switch i32 [[TMP88]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
-// CHECK1-NEXT:    i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
-// CHECK1-NEXT:    i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
+// CHECK1-NEXT:      i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
+// CHECK1-NEXT:      i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
 // CHECK1-NEXT:    ]
 // CHECK1:       .omp.reduction.case1:
 // CHECK1-NEXT:    [[TMP89:%.*]] = load i32, ptr [[TMP0]], align 4
@@ -536,21 +536,21 @@ int main(int argc, char **argv) {
 // CHECK1-NEXT:    call void @llvm.experimental.noalias.scope.decl(metadata [[META6:![0-9]+]])
 // CHECK1-NEXT:    call void @llvm.experimental.noalias.scope.decl(metadata [[META8:![0-9]+]])
 // CHECK1-NEXT:    call void @llvm.experimental.noalias.scope.decl(metadata [[META10:![0-9]+]])
-// CHECK1-NEXT:    store i32 [[TMP2]], ptr [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !12
-// CHECK1-NEXT:    store ptr [[TMP5]], ptr [[DOTPART_ID__ADDR_I]], align 8, !noalias !12
-// CHECK1-NEXT:    store ptr [[TMP8]], ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12
-// CHECK1-NEXT:    store ptr @.omp_task_privates_map., ptr [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !12
-// CHECK1-NEXT:    store ptr [[TMP3]], ptr [[DOTTASK_T__ADDR_I]], align 8, !noalias !12
-// CHECK1-NEXT:    store ptr [[TMP7]], ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !12
-// CHECK1-NEXT:    [[TMP9:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !12
-// CHECK1-NEXT:    [[TMP10:%.*]] = load ptr, ptr [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !12
-// CHECK1-NEXT:    [[TMP11:%.*]] = load ptr, ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12
+// CHECK1-NEXT:    store i32 [[TMP2]], ptr [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias [[META12:![0-9]+]]
+// CHECK1-NEXT:    store ptr [[TMP5]], ptr [[DOTPART_ID__ADDR_I]], align 8, !noalias [[META12]]
+// CHECK1-NEXT:    store ptr [[TMP8]], ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias [[META12]]
+// CHECK1-NEXT:    store ptr @.omp_task_privates_map., ptr [[DOTCOPY_FN__ADDR_I]], align 8, !noalias [[META12]]
+// CHECK1-NEXT:    store ptr [[TMP3]], ptr [[DOTTASK_T__ADDR_I]], align 8, !noalias [[META12]]
+// CHECK1-NEXT:    store ptr [[TMP7]], ptr [[__CONTEXT_ADDR_I]], align 8, !noalias [[META12]]
+// CHECK1-NEXT:    [[TMP9:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, !noalias [[META12]]
+// CHECK1-NEXT:    [[TMP10:%.*]] = load ptr, ptr [[DOTCOPY_FN__ADDR_I]], align 8, !noalias [[META12]]
+// CHECK1-NEXT:    [[TMP11:%.*]] = load ptr, ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias [[META12]]
 // CHECK1-NEXT:    call void [[TMP10]](ptr [[TMP11]], ptr [[DOTFIRSTPRIV_PTR_ADDR_I]]) #[[ATTR2]]
-// CHECK1-NEXT:    [[TMP12:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !12
+// CHECK1-NEXT:    [[TMP12:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias [[META12]]
 // CHECK1-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP9]], i32 0, i32 1
 // CHECK1-NEXT:    [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 8
 // CHECK1-NEXT:    [[TMP15:%.*]] = load ptr, ptr [[TMP12]], align 8
-// CHECK1-NEXT:    [[TMP16:%.*]] = load i32, ptr [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !12
+// CHECK1-NEXT:    [[TMP16:%.*]] = load i32, ptr [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias [[META12]]
 // CHECK1-NEXT:    [[TMP17:%.*]] = call ptr @__kmpc_task_reduction_get_th_data(i32 [[TMP16]], ptr [[TMP15]], ptr [[TMP14]])
 // CHECK1-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP9]], i32 0, i32 2
 // CHECK1-NEXT:    [[TMP19:%.*]] = load ptr, ptr [[TMP18]], align 8
@@ -570,7 +570,7 @@ int main(int argc, char **argv) {
 // CHECK1-NEXT:    [[TMP30:%.*]] = sub i64 [[TMP28]], [[TMP29]]
 // CHECK1-NEXT:    [[TMP31:%.*]] = add nuw i64 [[TMP30]], 1
 // CHECK1-NEXT:    [[TMP32:%.*]] = mul nuw i64 [[TMP31]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64)
-// CHECK1-NEXT:    store i64 [[TMP31]], ptr @{{reduction_size[.].+[.]}}, align 8, !noalias !12
+// CHECK1-NEXT:    store i64 [[TMP31]], ptr @{{reduction_size[.].+[.]}}, align 8, !noalias [[META12]]
 // CHECK1-NEXT:    [[TMP33:%.*]] = load ptr, ptr [[TMP12]], align 8
 // CHECK1-NEXT:    [[TMP34:%.*]] = call ptr @__kmpc_task_reduction_get_th_data(i32 [[TMP16]], ptr [[TMP33]], ptr [[TMP20]])
 // CHECK1-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP9]], i32 0, i32 2
@@ -580,8 +580,8 @@ int main(int argc, char **argv) {
 // CHECK1-NEXT:    [[TMP39:%.*]] = ptrtoint ptr [[TMP20]] to i64
 // CHECK1-NEXT:    [[TMP40:%.*]] = sub i64 [[TMP38]], [[TMP39]]
 // CHECK1-NEXT:    [[TMP41:%.*]] = getelementptr i8, ptr [[TMP34]], i64 [[TMP40]]
-// CHECK1-NEXT:    store ptr [[TMP4_I]], ptr [[TMP_I]], align 8, !noalias !12
-// CHECK1-NEXT:    store ptr [[TMP41]], ptr [[TMP4_I]], align 8, !noalias !12
+// CHECK1-NEXT:    store ptr [[TMP4_I]], ptr [[TMP_I]], align 8, !noalias [[META12]]
+// CHECK1-NEXT:    store ptr [[TMP41]], ptr [[TMP4_I]], align 8, !noalias [[META12]]
 // CHECK1-NEXT:    ret i32 0
 //
 //
diff --git a/clang/test/OpenMP/distribute_parallel_for_simd_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_simd_codegen.cpp
index e0618cb99245..77336877e2be 100644
--- a/clang/test/OpenMP/distribute_parallel_for_simd_codegen.cpp
+++ b/clang/test/OpenMP/distribute_parallel_for_simd_codegen.cpp
@@ -1039,7 +1039,7 @@ int main() {
 // CHECK1:       omp.loop.exit:
 // CHECK1-NEXT:    [[TMP33:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK1-NEXT:    [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP34]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP34]])
 // CHECK1-NEXT:    [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK1-NEXT:    [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0
 // CHECK1-NEXT:    br i1 [[TMP36]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -1302,7 +1302,7 @@ int main() {
 // CHECK1:       omp.loop.exit:
 // CHECK1-NEXT:    [[TMP33:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK1-NEXT:    [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP34]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP34]])
 // CHECK1-NEXT:    [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK1-NEXT:    [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0
 // CHECK1-NEXT:    br i1 [[TMP36]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -1595,7 +1595,7 @@ int main() {
 // CHECK1:       omp.loop.exit:
 // CHECK1-NEXT:    [[TMP33:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK1-NEXT:    [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP34]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP34]])
 // CHECK1-NEXT:    [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK1-NEXT:    [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0
 // CHECK1-NEXT:    br i1 [[TMP36]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -1858,7 +1858,7 @@ int main() {
 // CHECK1:       omp.loop.exit:
 // CHECK1-NEXT:    [[TMP33:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK1-NEXT:    [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP34]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP34]])
 // CHECK1-NEXT:    [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK1-NEXT:    [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0
 // CHECK1-NEXT:    br i1 [[TMP36]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -2155,7 +2155,7 @@ int main() {
 // CHECK1:       omp.dispatch.end:
 // CHECK1-NEXT:    [[TMP40:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK1-NEXT:    [[TMP41:%.*]] = load i32, ptr [[TMP40]], align 4
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP41]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP41]])
 // CHECK1-NEXT:    [[TMP42:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK1-NEXT:    [[TMP43:%.*]] = icmp ne i32 [[TMP42]], 0
 // CHECK1-NEXT:    br i1 [[TMP43]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -2971,7 +2971,7 @@ int main() {
 // CHECK3:       omp.loop.exit:
 // CHECK3-NEXT:    [[TMP33:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK3-NEXT:    [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4
-// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP34]])
+// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP34]])
 // CHECK3-NEXT:    [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK3-NEXT:    [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0
 // CHECK3-NEXT:    br i1 [[TMP36]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -3227,7 +3227,7 @@ int main() {
 // CHECK3:       omp.loop.exit:
 // CHECK3-NEXT:    [[TMP33:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK3-NEXT:    [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4
-// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP34]])
+// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP34]])
 // CHECK3-NEXT:    [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK3-NEXT:    [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0
 // CHECK3-NEXT:    br i1 [[TMP36]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -3513,7 +3513,7 @@ int main() {
 // CHECK3:       omp.loop.exit:
 // CHECK3-NEXT:    [[TMP33:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK3-NEXT:    [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4
-// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP34]])
+// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP34]])
 // CHECK3-NEXT:    [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK3-NEXT:    [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0
 // CHECK3-NEXT:    br i1 [[TMP36]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -3769,7 +3769,7 @@ int main() {
 // CHECK3:       omp.loop.exit:
 // CHECK3-NEXT:    [[TMP33:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK3-NEXT:    [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4
-// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP34]])
+// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP34]])
 // CHECK3-NEXT:    [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK3-NEXT:    [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0
 // CHECK3-NEXT:    br i1 [[TMP36]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -4057,7 +4057,7 @@ int main() {
 // CHECK3:       omp.dispatch.end:
 // CHECK3-NEXT:    [[TMP40:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK3-NEXT:    [[TMP41:%.*]] = load i32, ptr [[TMP40]], align 4
-// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP41]])
+// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP41]])
 // CHECK3-NEXT:    [[TMP42:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK3-NEXT:    [[TMP43:%.*]] = icmp ne i32 [[TMP42]], 0
 // CHECK3-NEXT:    br i1 [[TMP43]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -5510,7 +5510,7 @@ int main() {
 // CHECK9:       omp.loop.exit:
 // CHECK9-NEXT:    [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK9-NEXT:    [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4
-// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]])
+// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP30]])
 // CHECK9-NEXT:    [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK9-NEXT:    [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0
 // CHECK9-NEXT:    br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -5763,7 +5763,7 @@ int main() {
 // CHECK9:       omp.loop.exit:
 // CHECK9-NEXT:    [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK9-NEXT:    [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4
-// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]])
+// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP30]])
 // CHECK9-NEXT:    [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK9-NEXT:    [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0
 // CHECK9-NEXT:    br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -6046,7 +6046,7 @@ int main() {
 // CHECK9:       omp.loop.exit:
 // CHECK9-NEXT:    [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK9-NEXT:    [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4
-// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]])
+// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP30]])
 // CHECK9-NEXT:    [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK9-NEXT:    [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0
 // CHECK9-NEXT:    br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -6299,7 +6299,7 @@ int main() {
 // CHECK9:       omp.loop.exit:
 // CHECK9-NEXT:    [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK9-NEXT:    [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4
-// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]])
+// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP30]])
 // CHECK9-NEXT:    [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK9-NEXT:    [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0
 // CHECK9-NEXT:    br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -6586,7 +6586,7 @@ int main() {
 // CHECK9:       omp.dispatch.end:
 // CHECK9-NEXT:    [[TMP36:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK9-NEXT:    [[TMP37:%.*]] = load i32, ptr [[TMP36]], align 4
-// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP37]])
+// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP37]])
 // CHECK9-NEXT:    [[TMP38:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK9-NEXT:    [[TMP39:%.*]] = icmp ne i32 [[TMP38]], 0
 // CHECK9-NEXT:    br i1 [[TMP39]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -7976,7 +7976,7 @@ int main() {
 // CHECK9:       omp.loop.exit:
 // CHECK9-NEXT:    [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK9-NEXT:    [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4
-// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]])
+// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP30]])
 // CHECK9-NEXT:    [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK9-NEXT:    [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0
 // CHECK9-NEXT:    br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -8229,7 +8229,7 @@ int main() {
 // CHECK9:       omp.loop.exit:
 // CHECK9-NEXT:    [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK9-NEXT:    [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4
-// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]])
+// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP30]])
 // CHECK9-NEXT:    [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK9-NEXT:    [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0
 // CHECK9-NEXT:    br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -8512,7 +8512,7 @@ int main() {
 // CHECK9:       omp.loop.exit:
 // CHECK9-NEXT:    [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK9-NEXT:    [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4
-// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]])
+// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP30]])
 // CHECK9-NEXT:    [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK9-NEXT:    [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0
 // CHECK9-NEXT:    br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -8765,7 +8765,7 @@ int main() {
 // CHECK9:       omp.loop.exit:
 // CHECK9-NEXT:    [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK9-NEXT:    [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4
-// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]])
+// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP30]])
 // CHECK9-NEXT:    [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK9-NEXT:    [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0
 // CHECK9-NEXT:    br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -9052,7 +9052,7 @@ int main() {
 // CHECK9:       omp.dispatch.end:
 // CHECK9-NEXT:    [[TMP36:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK9-NEXT:    [[TMP37:%.*]] = load i32, ptr [[TMP36]], align 4
-// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP37]])
+// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP37]])
 // CHECK9-NEXT:    [[TMP38:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK9-NEXT:    [[TMP39:%.*]] = icmp ne i32 [[TMP38]], 0
 // CHECK9-NEXT:    br i1 [[TMP39]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -10438,7 +10438,7 @@ int main() {
 // CHECK11:       omp.loop.exit:
 // CHECK11-NEXT:    [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK11-NEXT:    [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4
-// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]])
+// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP30]])
 // CHECK11-NEXT:    [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK11-NEXT:    [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0
 // CHECK11-NEXT:    br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -10684,7 +10684,7 @@ int main() {
 // CHECK11:       omp.loop.exit:
 // CHECK11-NEXT:    [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK11-NEXT:    [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4
-// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]])
+// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP30]])
 // CHECK11-NEXT:    [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK11-NEXT:    [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0
 // CHECK11-NEXT:    br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -10960,7 +10960,7 @@ int main() {
 // CHECK11:       omp.loop.exit:
 // CHECK11-NEXT:    [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK11-NEXT:    [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4
-// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]])
+// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP30]])
 // CHECK11-NEXT:    [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK11-NEXT:    [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0
 // CHECK11-NEXT:    br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -11206,7 +11206,7 @@ int main() {
 // CHECK11:       omp.loop.exit:
 // CHECK11-NEXT:    [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK11-NEXT:    [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4
-// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]])
+// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP30]])
 // CHECK11-NEXT:    [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK11-NEXT:    [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0
 // CHECK11-NEXT:    br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -11484,7 +11484,7 @@ int main() {
 // CHECK11:       omp.dispatch.end:
 // CHECK11-NEXT:    [[TMP36:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK11-NEXT:    [[TMP37:%.*]] = load i32, ptr [[TMP36]], align 4
-// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP37]])
+// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP37]])
 // CHECK11-NEXT:    [[TMP38:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK11-NEXT:    [[TMP39:%.*]] = icmp ne i32 [[TMP38]], 0
 // CHECK11-NEXT:    br i1 [[TMP39]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -12853,7 +12853,7 @@ int main() {
 // CHECK11:       omp.loop.exit:
 // CHECK11-NEXT:    [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK11-NEXT:    [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4
-// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]])
+// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP30]])
 // CHECK11-NEXT:    [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK11-NEXT:    [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0
 // CHECK11-NEXT:    br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -13099,7 +13099,7 @@ int main() {
 // CHECK11:       omp.loop.exit:
 // CHECK11-NEXT:    [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK11-NEXT:    [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4
-// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]])
+// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP30]])
 // CHECK11-NEXT:    [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK11-NEXT:    [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0
 // CHECK11-NEXT:    br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -13375,7 +13375,7 @@ int main() {
 // CHECK11:       omp.loop.exit:
 // CHECK11-NEXT:    [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK11-NEXT:    [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4
-// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]])
+// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP30]])
 // CHECK11-NEXT:    [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK11-NEXT:    [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0
 // CHECK11-NEXT:    br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -13621,7 +13621,7 @@ int main() {
 // CHECK11:       omp.loop.exit:
 // CHECK11-NEXT:    [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK11-NEXT:    [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4
-// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]])
+// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP30]])
 // CHECK11-NEXT:    [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK11-NEXT:    [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0
 // CHECK11-NEXT:    br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -13899,7 +13899,7 @@ int main() {
 // CHECK11:       omp.dispatch.end:
 // CHECK11-NEXT:    [[TMP36:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK11-NEXT:    [[TMP37:%.*]] = load i32, ptr [[TMP36]], align 4
-// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP37]])
+// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP37]])
 // CHECK11-NEXT:    [[TMP38:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK11-NEXT:    [[TMP39:%.*]] = icmp ne i32 [[TMP38]], 0
 // CHECK11-NEXT:    br i1 [[TMP39]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
diff --git a/clang/test/OpenMP/distribute_parallel_for_simd_firstprivate_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_simd_firstprivate_codegen.cpp
index 5c9b2aa1f47f..f47b64b1e299 100644
--- a/clang/test/OpenMP/distribute_parallel_for_simd_firstprivate_codegen.cpp
+++ b/clang/test/OpenMP/distribute_parallel_for_simd_firstprivate_codegen.cpp
@@ -506,7 +506,7 @@ int main() {
 // CHECK1:       omp.inner.for.end:
 // CHECK1-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK1:       omp.loop.exit:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK1-NEXT:    [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK1-NEXT:    [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0
 // CHECK1-NEXT:    br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -768,7 +768,7 @@ int main() {
 // CHECK3:       omp.inner.for.end:
 // CHECK3-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK3:       omp.loop.exit:
-// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]])
+// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP5]])
 // CHECK3-NEXT:    [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK3-NEXT:    [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0
 // CHECK3-NEXT:    br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -1237,7 +1237,7 @@ int main() {
 // CHECK8:       omp.loop.exit:
 // CHECK8-NEXT:    [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK8-NEXT:    [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4
-// CHECK8-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]])
+// CHECK8-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP21]])
 // CHECK8-NEXT:    [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK8-NEXT:    [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0
 // CHECK8-NEXT:    br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -1696,7 +1696,7 @@ int main() {
 // CHECK8:       omp.loop.exit:
 // CHECK8-NEXT:    [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK8-NEXT:    [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4
-// CHECK8-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]])
+// CHECK8-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP21]])
 // CHECK8-NEXT:    [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK8-NEXT:    [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0
 // CHECK8-NEXT:    br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -2178,7 +2178,7 @@ int main() {
 // CHECK10:       omp.loop.exit:
 // CHECK10-NEXT:    [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK10-NEXT:    [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4
-// CHECK10-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]])
+// CHECK10-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP21]])
 // CHECK10-NEXT:    [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK10-NEXT:    [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0
 // CHECK10-NEXT:    br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -2631,7 +2631,7 @@ int main() {
 // CHECK10:       omp.loop.exit:
 // CHECK10-NEXT:    [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK10-NEXT:    [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4
-// CHECK10-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]])
+// CHECK10-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP21]])
 // CHECK10-NEXT:    [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK10-NEXT:    [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0
 // CHECK10-NEXT:    br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
diff --git a/clang/test/OpenMP/distribute_parallel_for_simd_if_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_simd_if_codegen.cpp
index 67384abc7751..b3e964fddcf0 100644
--- a/clang/test/OpenMP/distribute_parallel_for_simd_if_codegen.cpp
+++ b/clang/test/OpenMP/distribute_parallel_for_simd_if_codegen.cpp
@@ -333,7 +333,7 @@ int main() {
 // CHECK1:       omp.inner.for.end:
 // CHECK1-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK1:       omp.loop.exit:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK1-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK1-NEXT:    [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
 // CHECK1-NEXT:    br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -490,7 +490,7 @@ int main() {
 // CHECK1:       omp.inner.for.end:
 // CHECK1-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK1:       omp.loop.exit:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK1-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK1-NEXT:    [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
 // CHECK1-NEXT:    br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -772,7 +772,7 @@ int main() {
 // CHECK1:       omp.inner.for.end:
 // CHECK1-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK1:       omp.loop.exit:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK1-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK1-NEXT:    [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
 // CHECK1-NEXT:    br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -929,7 +929,7 @@ int main() {
 // CHECK1:       omp.inner.for.end:
 // CHECK1-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK1:       omp.loop.exit:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK1-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK1-NEXT:    [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
 // CHECK1-NEXT:    br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -1100,7 +1100,7 @@ int main() {
 // CHECK1:       omp.inner.for.end:
 // CHECK1-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK1:       omp.loop.exit:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK1-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK1-NEXT:    [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
 // CHECK1-NEXT:    br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -1380,7 +1380,7 @@ int main() {
 // CHECK1:       omp.inner.for.end:
 // CHECK1-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK1:       omp.loop.exit:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK1-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK1-NEXT:    [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
 // CHECK1-NEXT:    br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -1537,7 +1537,7 @@ int main() {
 // CHECK1:       omp.inner.for.end:
 // CHECK1-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK1:       omp.loop.exit:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK1-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK1-NEXT:    [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
 // CHECK1-NEXT:    br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -1708,7 +1708,7 @@ int main() {
 // CHECK1:       omp.inner.for.end:
 // CHECK1-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK1:       omp.loop.exit:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK1-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK1-NEXT:    [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
 // CHECK1-NEXT:    br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -1935,7 +1935,7 @@ int main() {
 // CHECK3:       omp.inner.for.end:
 // CHECK3-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK3:       omp.loop.exit:
-// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK3-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK3-NEXT:    [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
 // CHECK3-NEXT:    br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -2092,7 +2092,7 @@ int main() {
 // CHECK3:       omp.inner.for.end:
 // CHECK3-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK3:       omp.loop.exit:
-// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK3-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK3-NEXT:    [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
 // CHECK3-NEXT:    br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -2374,7 +2374,7 @@ int main() {
 // CHECK3:       omp.inner.for.end:
 // CHECK3-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK3:       omp.loop.exit:
-// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK3-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK3-NEXT:    [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
 // CHECK3-NEXT:    br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -2531,7 +2531,7 @@ int main() {
 // CHECK3:       omp.inner.for.end:
 // CHECK3-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK3:       omp.loop.exit:
-// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK3-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK3-NEXT:    [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
 // CHECK3-NEXT:    br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -2810,7 +2810,7 @@ int main() {
 // CHECK3:       omp.loop.exit:
 // CHECK3-NEXT:    [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK3-NEXT:    [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4
-// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]])
+// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP22]])
 // CHECK3-NEXT:    [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK3-NEXT:    [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0
 // CHECK3-NEXT:    br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -2937,7 +2937,7 @@ int main() {
 // CHECK3:       omp.loop.exit:
 // CHECK3-NEXT:    [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK3-NEXT:    [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4
-// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]])
+// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP22]])
 // CHECK3-NEXT:    [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK3-NEXT:    [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0
 // CHECK3-NEXT:    br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -3217,7 +3217,7 @@ int main() {
 // CHECK3:       omp.inner.for.end:
 // CHECK3-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK3:       omp.loop.exit:
-// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK3-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK3-NEXT:    [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
 // CHECK3-NEXT:    br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -3374,7 +3374,7 @@ int main() {
 // CHECK3:       omp.inner.for.end:
 // CHECK3-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK3:       omp.loop.exit:
-// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK3-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK3-NEXT:    [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
 // CHECK3-NEXT:    br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -3545,7 +3545,7 @@ int main() {
 // CHECK3:       omp.inner.for.end:
 // CHECK3-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK3:       omp.loop.exit:
-// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK3-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK3-NEXT:    [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
 // CHECK3-NEXT:    br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -4348,7 +4348,7 @@ int main() {
 // CHECK9:       omp.inner.for.end:
 // CHECK9-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK9:       omp.loop.exit:
-// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK9-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK9-NEXT:    [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
 // CHECK9-NEXT:    br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -4505,7 +4505,7 @@ int main() {
 // CHECK9:       omp.inner.for.end:
 // CHECK9-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK9:       omp.loop.exit:
-// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK9-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK9-NEXT:    [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
 // CHECK9-NEXT:    br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -4787,7 +4787,7 @@ int main() {
 // CHECK9:       omp.inner.for.end:
 // CHECK9-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK9:       omp.loop.exit:
-// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK9-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK9-NEXT:    [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
 // CHECK9-NEXT:    br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -4944,7 +4944,7 @@ int main() {
 // CHECK9:       omp.inner.for.end:
 // CHECK9-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK9:       omp.loop.exit:
-// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK9-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK9-NEXT:    [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
 // CHECK9-NEXT:    br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -5115,7 +5115,7 @@ int main() {
 // CHECK9:       omp.inner.for.end:
 // CHECK9-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK9:       omp.loop.exit:
-// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK9-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK9-NEXT:    [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
 // CHECK9-NEXT:    br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -5395,7 +5395,7 @@ int main() {
 // CHECK9:       omp.inner.for.end:
 // CHECK9-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK9:       omp.loop.exit:
-// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK9-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK9-NEXT:    [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
 // CHECK9-NEXT:    br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -5552,7 +5552,7 @@ int main() {
 // CHECK9:       omp.inner.for.end:
 // CHECK9-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK9:       omp.loop.exit:
-// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK9-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK9-NEXT:    [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
 // CHECK9-NEXT:    br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -5723,7 +5723,7 @@ int main() {
 // CHECK9:       omp.inner.for.end:
 // CHECK9-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK9:       omp.loop.exit:
-// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK9-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK9-NEXT:    [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
 // CHECK9-NEXT:    br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -5950,7 +5950,7 @@ int main() {
 // CHECK11:       omp.inner.for.end:
 // CHECK11-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK11:       omp.loop.exit:
-// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK11-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK11-NEXT:    [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
 // CHECK11-NEXT:    br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -6107,7 +6107,7 @@ int main() {
 // CHECK11:       omp.inner.for.end:
 // CHECK11-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK11:       omp.loop.exit:
-// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK11-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK11-NEXT:    [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
 // CHECK11-NEXT:    br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -6389,7 +6389,7 @@ int main() {
 // CHECK11:       omp.inner.for.end:
 // CHECK11-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK11:       omp.loop.exit:
-// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK11-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK11-NEXT:    [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
 // CHECK11-NEXT:    br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -6546,7 +6546,7 @@ int main() {
 // CHECK11:       omp.inner.for.end:
 // CHECK11-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK11:       omp.loop.exit:
-// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK11-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK11-NEXT:    [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
 // CHECK11-NEXT:    br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -6825,7 +6825,7 @@ int main() {
 // CHECK11:       omp.loop.exit:
 // CHECK11-NEXT:    [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK11-NEXT:    [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4
-// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]])
+// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP22]])
 // CHECK11-NEXT:    [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK11-NEXT:    [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0
 // CHECK11-NEXT:    br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -6952,7 +6952,7 @@ int main() {
 // CHECK11:       omp.loop.exit:
 // CHECK11-NEXT:    [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK11-NEXT:    [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4
-// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]])
+// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP22]])
 // CHECK11-NEXT:    [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK11-NEXT:    [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0
 // CHECK11-NEXT:    br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -7232,7 +7232,7 @@ int main() {
 // CHECK11:       omp.inner.for.end:
 // CHECK11-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK11:       omp.loop.exit:
-// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK11-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK11-NEXT:    [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
 // CHECK11-NEXT:    br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -7389,7 +7389,7 @@ int main() {
 // CHECK11:       omp.inner.for.end:
 // CHECK11-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK11:       omp.loop.exit:
-// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK11-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK11-NEXT:    [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
 // CHECK11-NEXT:    br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -7560,7 +7560,7 @@ int main() {
 // CHECK11:       omp.inner.for.end:
 // CHECK11-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK11:       omp.loop.exit:
-// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK11-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK11-NEXT:    [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
 // CHECK11-NEXT:    br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
diff --git a/clang/test/OpenMP/distribute_parallel_for_simd_lastprivate_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_simd_lastprivate_codegen.cpp
index adef55eee1cd..7656fb7bc2c5 100644
--- a/clang/test/OpenMP/distribute_parallel_for_simd_lastprivate_codegen.cpp
+++ b/clang/test/OpenMP/distribute_parallel_for_simd_lastprivate_codegen.cpp
@@ -453,7 +453,7 @@ int main() {
 // CHECK1:       omp.inner.for.end:
 // CHECK1-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK1:       omp.loop.exit:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP8]])
 // CHECK1-NEXT:    [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK1-NEXT:    [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0
 // CHECK1-NEXT:    br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -732,7 +732,7 @@ int main() {
 // CHECK3:       omp.inner.for.end:
 // CHECK3-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK3:       omp.loop.exit:
-// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]])
+// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP8]])
 // CHECK3-NEXT:    [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK3-NEXT:    [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0
 // CHECK3-NEXT:    br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -1227,7 +1227,7 @@ int main() {
 // CHECK9:       omp.loop.exit:
 // CHECK9-NEXT:    [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK9-NEXT:    [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4
-// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]])
+// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP22]])
 // CHECK9-NEXT:    [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK9-NEXT:    [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0
 // CHECK9-NEXT:    br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -1724,7 +1724,7 @@ int main() {
 // CHECK9:       omp.loop.exit:
 // CHECK9-NEXT:    [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK9-NEXT:    [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4
-// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]])
+// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP21]])
 // CHECK9-NEXT:    [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK9-NEXT:    [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0
 // CHECK9-NEXT:    br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -2241,7 +2241,7 @@ int main() {
 // CHECK11:       omp.loop.exit:
 // CHECK11-NEXT:    [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK11-NEXT:    [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4
-// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]])
+// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP22]])
 // CHECK11-NEXT:    [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK11-NEXT:    [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0
 // CHECK11-NEXT:    br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -2732,7 +2732,7 @@ int main() {
 // CHECK11:       omp.loop.exit:
 // CHECK11-NEXT:    [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK11-NEXT:    [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4
-// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]])
+// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP21]])
 // CHECK11-NEXT:    [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK11-NEXT:    [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0
 // CHECK11-NEXT:    br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
diff --git a/clang/test/OpenMP/distribute_parallel_for_simd_num_threads_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_simd_num_threads_codegen.cpp
index 0a0ed699acb1..9f4fffbea63d 100644
--- a/clang/test/OpenMP/distribute_parallel_for_simd_num_threads_codegen.cpp
+++ b/clang/test/OpenMP/distribute_parallel_for_simd_num_threads_codegen.cpp
@@ -393,7 +393,7 @@ int main() {
 // CHECK1:       omp.inner.for.end:
 // CHECK1-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK1:       omp.loop.exit:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK1-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK1-NEXT:    [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
 // CHECK1-NEXT:    br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -568,7 +568,7 @@ int main() {
 // CHECK1:       omp.inner.for.end:
 // CHECK1-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK1:       omp.loop.exit:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK1-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK1-NEXT:    [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
 // CHECK1-NEXT:    br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -914,7 +914,7 @@ int main() {
 // CHECK1:       omp.inner.for.end:
 // CHECK1-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK1:       omp.loop.exit:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK1-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK1-NEXT:    [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
 // CHECK1-NEXT:    br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -1075,7 +1075,7 @@ int main() {
 // CHECK1:       omp.inner.for.end:
 // CHECK1-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK1:       omp.loop.exit:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK1-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK1-NEXT:    [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
 // CHECK1-NEXT:    br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -1236,7 +1236,7 @@ int main() {
 // CHECK1:       omp.inner.for.end:
 // CHECK1-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK1:       omp.loop.exit:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK1-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK1-NEXT:    [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
 // CHECK1-NEXT:    br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -1412,7 +1412,7 @@ int main() {
 // CHECK1:       omp.inner.for.end:
 // CHECK1-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK1:       omp.loop.exit:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK1-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK1-NEXT:    [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
 // CHECK1-NEXT:    br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -2068,7 +2068,7 @@ int main() {
 // CHECK5:       omp.inner.for.end:
 // CHECK5-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK5:       omp.loop.exit:
-// CHECK5-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK5-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK5-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK5-NEXT:    [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
 // CHECK5-NEXT:    br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -2243,7 +2243,7 @@ int main() {
 // CHECK5:       omp.inner.for.end:
 // CHECK5-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK5:       omp.loop.exit:
-// CHECK5-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK5-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK5-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK5-NEXT:    [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
 // CHECK5-NEXT:    br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -2580,7 +2580,7 @@ int main() {
 // CHECK5:       omp.inner.for.end:
 // CHECK5-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK5:       omp.loop.exit:
-// CHECK5-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK5-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK5-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK5-NEXT:    [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
 // CHECK5-NEXT:    br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -2741,7 +2741,7 @@ int main() {
 // CHECK5:       omp.inner.for.end:
 // CHECK5-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK5:       omp.loop.exit:
-// CHECK5-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK5-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK5-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK5-NEXT:    [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
 // CHECK5-NEXT:    br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -2902,7 +2902,7 @@ int main() {
 // CHECK5:       omp.inner.for.end:
 // CHECK5-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK5:       omp.loop.exit:
-// CHECK5-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK5-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK5-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK5-NEXT:    [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
 // CHECK5-NEXT:    br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -3078,7 +3078,7 @@ int main() {
 // CHECK5:       omp.inner.for.end:
 // CHECK5-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK5:       omp.loop.exit:
-// CHECK5-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK5-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK5-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK5-NEXT:    [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
 // CHECK5-NEXT:    br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -3404,7 +3404,7 @@ int main() {
 // CHECK9:       omp.inner.for.end:
 // CHECK9-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK9:       omp.loop.exit:
-// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK9-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK9-NEXT:    [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
 // CHECK9-NEXT:    br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -3579,7 +3579,7 @@ int main() {
 // CHECK9:       omp.inner.for.end:
 // CHECK9-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK9:       omp.loop.exit:
-// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK9-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK9-NEXT:    [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
 // CHECK9-NEXT:    br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -3925,7 +3925,7 @@ int main() {
 // CHECK9:       omp.inner.for.end:
 // CHECK9-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK9:       omp.loop.exit:
-// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK9-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK9-NEXT:    [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
 // CHECK9-NEXT:    br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -4086,7 +4086,7 @@ int main() {
 // CHECK9:       omp.inner.for.end:
 // CHECK9-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK9:       omp.loop.exit:
-// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK9-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK9-NEXT:    [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
 // CHECK9-NEXT:    br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -4247,7 +4247,7 @@ int main() {
 // CHECK9:       omp.inner.for.end:
 // CHECK9-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK9:       omp.loop.exit:
-// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK9-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK9-NEXT:    [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
 // CHECK9-NEXT:    br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -4423,7 +4423,7 @@ int main() {
 // CHECK9:       omp.inner.for.end:
 // CHECK9-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK9:       omp.loop.exit:
-// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK9-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK9-NEXT:    [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
 // CHECK9-NEXT:    br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -5079,7 +5079,7 @@ int main() {
 // CHECK13:       omp.inner.for.end:
 // CHECK13-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK13:       omp.loop.exit:
-// CHECK13-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK13-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK13-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK13-NEXT:    [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
 // CHECK13-NEXT:    br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -5254,7 +5254,7 @@ int main() {
 // CHECK13:       omp.inner.for.end:
 // CHECK13-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK13:       omp.loop.exit:
-// CHECK13-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK13-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK13-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK13-NEXT:    [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
 // CHECK13-NEXT:    br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -5591,7 +5591,7 @@ int main() {
 // CHECK13:       omp.inner.for.end:
 // CHECK13-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK13:       omp.loop.exit:
-// CHECK13-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK13-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK13-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK13-NEXT:    [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
 // CHECK13-NEXT:    br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -5752,7 +5752,7 @@ int main() {
 // CHECK13:       omp.inner.for.end:
 // CHECK13-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK13:       omp.loop.exit:
-// CHECK13-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK13-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK13-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK13-NEXT:    [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
 // CHECK13-NEXT:    br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -5913,7 +5913,7 @@ int main() {
 // CHECK13:       omp.inner.for.end:
 // CHECK13-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK13:       omp.loop.exit:
-// CHECK13-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK13-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK13-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK13-NEXT:    [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
 // CHECK13-NEXT:    br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -6089,7 +6089,7 @@ int main() {
 // CHECK13:       omp.inner.for.end:
 // CHECK13-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK13:       omp.loop.exit:
-// CHECK13-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK13-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK13-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK13-NEXT:    [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
 // CHECK13-NEXT:    br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
diff --git a/clang/test/OpenMP/distribute_parallel_for_simd_private_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_simd_private_codegen.cpp
index 22208e2e2c1d..61f35d8b456c 100644
--- a/clang/test/OpenMP/distribute_parallel_for_simd_private_codegen.cpp
+++ b/clang/test/OpenMP/distribute_parallel_for_simd_private_codegen.cpp
@@ -320,7 +320,7 @@ int main() {
 // CHECK1:       omp.inner.for.end:
 // CHECK1-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK1:       omp.loop.exit:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK1-NEXT:    [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK1-NEXT:    [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0
 // CHECK1-NEXT:    br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -512,7 +512,7 @@ int main() {
 // CHECK3:       omp.inner.for.end:
 // CHECK3-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK3:       omp.loop.exit:
-// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK3-NEXT:    [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK3-NEXT:    [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0
 // CHECK3-NEXT:    br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -856,7 +856,7 @@ int main() {
 // CHECK9:       omp.loop.exit:
 // CHECK9-NEXT:    [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK9-NEXT:    [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4
-// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP16]])
+// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP16]])
 // CHECK9-NEXT:    [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK9-NEXT:    [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0
 // CHECK9-NEXT:    br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -1222,7 +1222,7 @@ int main() {
 // CHECK9:       omp.loop.exit:
 // CHECK9-NEXT:    [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK9-NEXT:    [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4
-// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP16]])
+// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP16]])
 // CHECK9-NEXT:    [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK9-NEXT:    [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0
 // CHECK9-NEXT:    br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -1589,7 +1589,7 @@ int main() {
 // CHECK11:       omp.loop.exit:
 // CHECK11-NEXT:    [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK11-NEXT:    [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4
-// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP16]])
+// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP16]])
 // CHECK11-NEXT:    [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK11-NEXT:    [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0
 // CHECK11-NEXT:    br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -1949,7 +1949,7 @@ int main() {
 // CHECK11:       omp.loop.exit:
 // CHECK11-NEXT:    [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK11-NEXT:    [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4
-// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP16]])
+// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP16]])
 // CHECK11-NEXT:    [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK11-NEXT:    [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0
 // CHECK11-NEXT:    br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
diff --git a/clang/test/OpenMP/distribute_parallel_for_simd_proc_bind_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_simd_proc_bind_codegen.cpp
index d452ac3bed48..334965f59a82 100644
--- a/clang/test/OpenMP/distribute_parallel_for_simd_proc_bind_codegen.cpp
+++ b/clang/test/OpenMP/distribute_parallel_for_simd_proc_bind_codegen.cpp
@@ -273,7 +273,7 @@ int main() {
 // CHECK1:       omp.inner.for.end:
 // CHECK1-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK1:       omp.loop.exit:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK1-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK1-NEXT:    [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
 // CHECK1-NEXT:    br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -425,7 +425,7 @@ int main() {
 // CHECK1:       omp.inner.for.end:
 // CHECK1-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK1:       omp.loop.exit:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK1-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK1-NEXT:    [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
 // CHECK1-NEXT:    br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -618,7 +618,7 @@ int main() {
 // CHECK1:       omp.inner.for.end:
 // CHECK1-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK1:       omp.loop.exit:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK1-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK1-NEXT:    [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
 // CHECK1-NEXT:    br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
diff --git a/clang/test/OpenMP/metadirective_device_arch_codegen.cpp b/clang/test/OpenMP/metadirective_device_arch_codegen.cpp
index 6150b7c07c16..eecae310d0a7 100644
--- a/clang/test/OpenMP/metadirective_device_arch_codegen.cpp
+++ b/clang/test/OpenMP/metadirective_device_arch_codegen.cpp
@@ -60,6 +60,6 @@ int metadirective1() {
 // CHECK: omp.inner.for.body:
 // CHECK: store atomic {{.*}} monotonic
 // CHECK: omp.loop.exit:
-// CHECK-NEXT: call void @__kmpc_distribute_static_fini
+// CHECK-NEXT: call void @__kmpc_for_static_fini
 // CHECK-NEXT: ret void
 
diff --git a/clang/test/OpenMP/nvptx_SPMD_codegen.cpp b/clang/test/OpenMP/nvptx_SPMD_codegen.cpp
index 1ac545499427..d47025cd1aca 100644
--- a/clang/test/OpenMP/nvptx_SPMD_codegen.cpp
+++ b/clang/test/OpenMP/nvptx_SPMD_codegen.cpp
@@ -573,7 +573,7 @@ int a;
 // CHECK-64:       omp.loop.exit:
 // CHECK-64-NEXT:    [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK-64-NEXT:    [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4
-// CHECK-64-NEXT:    call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP20]])
+// CHECK-64-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP20]])
 // CHECK-64-NEXT:    [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK-64-NEXT:    [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0
 // CHECK-64-NEXT:    br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -680,7 +680,7 @@ int a;
 // CHECK-64:       omp.loop.exit:
 // CHECK-64-NEXT:    [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK-64-NEXT:    [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4
-// CHECK-64-NEXT:    call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP20]])
+// CHECK-64-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP20]])
 // CHECK-64-NEXT:    [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK-64-NEXT:    [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0
 // CHECK-64-NEXT:    br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -873,7 +873,7 @@ int a;
 // CHECK-64:       omp.inner.for.end:
 // CHECK-64-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK-64:       omp.loop.exit:
-// CHECK-64-NEXT:    call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
+// CHECK-64-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP3]])
 // CHECK-64-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK-64-NEXT:    [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
 // CHECK-64-NEXT:    br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -1057,7 +1057,7 @@ int a;
 // CHECK-64:       omp.inner.for.end:
 // CHECK-64-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK-64:       omp.loop.exit:
-// CHECK-64-NEXT:    call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
+// CHECK-64-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP3]])
 // CHECK-64-NEXT:    [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK-64-NEXT:    [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0
 // CHECK-64-NEXT:    br i1 [[TMP11]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -2028,7 +2028,7 @@ int a;
 // CHECK-64:       omp.inner.for.end:
 // CHECK-64-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK-64:       omp.loop.exit:
-// CHECK-64-NEXT:    call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
+// CHECK-64-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP3]])
 // CHECK-64-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK-64-NEXT:    [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
 // CHECK-64-NEXT:    br i1 [[TMP12]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]]
@@ -2215,7 +2215,7 @@ int a;
 // CHECK-64:       omp.inner.for.end:
 // CHECK-64-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK-64:       omp.loop.exit:
-// CHECK-64-NEXT:    call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
+// CHECK-64-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP3]])
 // CHECK-64-NEXT:    ret void
 //
 //
@@ -2385,7 +2385,7 @@ int a;
 // CHECK-64:       omp.inner.for.end:
 // CHECK-64-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK-64:       omp.loop.exit:
-// CHECK-64-NEXT:    call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
+// CHECK-64-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP3]])
 // CHECK-64-NEXT:    ret void
 //
 //
@@ -3271,7 +3271,7 @@ int a;
 // CHECK-64:       omp.inner.for.end:
 // CHECK-64-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK-64:       omp.loop.exit:
-// CHECK-64-NEXT:    call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
+// CHECK-64-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP3]])
 // CHECK-64-NEXT:    [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK-64-NEXT:    [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0
 // CHECK-64-NEXT:    br i1 [[TMP11]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -3466,7 +3466,7 @@ int a;
 // CHECK-64:       omp.inner.for.end:
 // CHECK-64-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK-64:       omp.loop.exit:
-// CHECK-64-NEXT:    call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
+// CHECK-64-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP3]])
 // CHECK-64-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK-64-NEXT:    [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
 // CHECK-64-NEXT:    br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -3634,7 +3634,7 @@ int a;
 // CHECK-64:       omp.inner.for.end:
 // CHECK-64-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK-64:       omp.loop.exit:
-// CHECK-64-NEXT:    call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
+// CHECK-64-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP3]])
 // CHECK-64-NEXT:    [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK-64-NEXT:    [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0
 // CHECK-64-NEXT:    br i1 [[TMP11]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -4595,7 +4595,7 @@ int a;
 // CHECK-64:       omp.inner.for.end:
 // CHECK-64-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK-64:       omp.loop.exit:
-// CHECK-64-NEXT:    call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
+// CHECK-64-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP3]])
 // CHECK-64-NEXT:    ret void
 //
 //
@@ -4774,7 +4774,7 @@ int a;
 // CHECK-64:       omp.inner.for.end:
 // CHECK-64-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK-64:       omp.loop.exit:
-// CHECK-64-NEXT:    call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
+// CHECK-64-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP3]])
 // CHECK-64-NEXT:    ret void
 //
 //
@@ -4944,7 +4944,7 @@ int a;
 // CHECK-64:       omp.inner.for.end:
 // CHECK-64-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK-64:       omp.loop.exit:
-// CHECK-64-NEXT:    call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
+// CHECK-64-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP3]])
 // CHECK-64-NEXT:    ret void
 //
 //
@@ -5822,7 +5822,7 @@ int a;
 // CHECK-64:       omp.inner.for.end:
 // CHECK-64-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK-64:       omp.loop.exit:
-// CHECK-64-NEXT:    call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
+// CHECK-64-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP3]])
 // CHECK-64-NEXT:    ret void
 //
 //
@@ -6001,7 +6001,7 @@ int a;
 // CHECK-64:       omp.inner.for.end:
 // CHECK-64-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK-64:       omp.loop.exit:
-// CHECK-64-NEXT:    call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
+// CHECK-64-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP3]])
 // CHECK-64-NEXT:    ret void
 //
 //
@@ -6171,7 +6171,7 @@ int a;
 // CHECK-64:       omp.inner.for.end:
 // CHECK-64-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK-64:       omp.loop.exit:
-// CHECK-64-NEXT:    call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
+// CHECK-64-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP3]])
 // CHECK-64-NEXT:    ret void
 //
 //
@@ -9534,7 +9534,7 @@ int a;
 // CHECK-32:       omp.loop.exit:
 // CHECK-32-NEXT:    [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK-32-NEXT:    [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4
-// CHECK-32-NEXT:    call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP20]])
+// CHECK-32-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP20]])
 // CHECK-32-NEXT:    [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK-32-NEXT:    [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0
 // CHECK-32-NEXT:    br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -9637,7 +9637,7 @@ int a;
 // CHECK-32:       omp.loop.exit:
 // CHECK-32-NEXT:    [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK-32-NEXT:    [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4
-// CHECK-32-NEXT:    call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP20]])
+// CHECK-32-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP20]])
 // CHECK-32-NEXT:    [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK-32-NEXT:    [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0
 // CHECK-32-NEXT:    br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -9826,7 +9826,7 @@ int a;
 // CHECK-32:       omp.inner.for.end:
 // CHECK-32-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK-32:       omp.loop.exit:
-// CHECK-32-NEXT:    call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
+// CHECK-32-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP3]])
 // CHECK-32-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK-32-NEXT:    [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
 // CHECK-32-NEXT:    br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -10005,7 +10005,7 @@ int a;
 // CHECK-32:       omp.inner.for.end:
 // CHECK-32-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK-32:       omp.loop.exit:
-// CHECK-32-NEXT:    call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
+// CHECK-32-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP3]])
 // CHECK-32-NEXT:    [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK-32-NEXT:    [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0
 // CHECK-32-NEXT:    br i1 [[TMP11]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -10955,7 +10955,7 @@ int a;
 // CHECK-32:       omp.inner.for.end:
 // CHECK-32-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK-32:       omp.loop.exit:
-// CHECK-32-NEXT:    call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
+// CHECK-32-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP3]])
 // CHECK-32-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK-32-NEXT:    [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
 // CHECK-32-NEXT:    br i1 [[TMP12]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]]
@@ -11138,7 +11138,7 @@ int a;
 // CHECK-32:       omp.inner.for.end:
 // CHECK-32-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK-32:       omp.loop.exit:
-// CHECK-32-NEXT:    call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
+// CHECK-32-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP3]])
 // CHECK-32-NEXT:    ret void
 //
 //
@@ -11303,7 +11303,7 @@ int a;
 // CHECK-32:       omp.inner.for.end:
 // CHECK-32-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK-32:       omp.loop.exit:
-// CHECK-32-NEXT:    call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
+// CHECK-32-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP3]])
 // CHECK-32-NEXT:    ret void
 //
 //
@@ -12168,7 +12168,7 @@ int a;
 // CHECK-32:       omp.inner.for.end:
 // CHECK-32-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK-32:       omp.loop.exit:
-// CHECK-32-NEXT:    call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
+// CHECK-32-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP3]])
 // CHECK-32-NEXT:    [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK-32-NEXT:    [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0
 // CHECK-32-NEXT:    br i1 [[TMP11]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -12359,7 +12359,7 @@ int a;
 // CHECK-32:       omp.inner.for.end:
 // CHECK-32-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK-32:       omp.loop.exit:
-// CHECK-32-NEXT:    call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
+// CHECK-32-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP3]])
 // CHECK-32-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK-32-NEXT:    [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
 // CHECK-32-NEXT:    br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -12522,7 +12522,7 @@ int a;
 // CHECK-32:       omp.inner.for.end:
 // CHECK-32-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK-32:       omp.loop.exit:
-// CHECK-32-NEXT:    call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
+// CHECK-32-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP3]])
 // CHECK-32-NEXT:    [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK-32-NEXT:    [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0
 // CHECK-32-NEXT:    br i1 [[TMP11]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -13462,7 +13462,7 @@ int a;
 // CHECK-32:       omp.inner.for.end:
 // CHECK-32-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK-32:       omp.loop.exit:
-// CHECK-32-NEXT:    call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
+// CHECK-32-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP3]])
 // CHECK-32-NEXT:    ret void
 //
 //
@@ -13637,7 +13637,7 @@ int a;
 // CHECK-32:       omp.inner.for.end:
 // CHECK-32-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK-32:       omp.loop.exit:
-// CHECK-32-NEXT:    call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
+// CHECK-32-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP3]])
 // CHECK-32-NEXT:    ret void
 //
 //
@@ -13802,7 +13802,7 @@ int a;
 // CHECK-32:       omp.inner.for.end:
 // CHECK-32-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK-32:       omp.loop.exit:
-// CHECK-32-NEXT:    call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
+// CHECK-32-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP3]])
 // CHECK-32-NEXT:    ret void
 //
 //
@@ -14659,7 +14659,7 @@ int a;
 // CHECK-32:       omp.inner.for.end:
 // CHECK-32-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK-32:       omp.loop.exit:
-// CHECK-32-NEXT:    call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
+// CHECK-32-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP3]])
 // CHECK-32-NEXT:    ret void
 //
 //
@@ -14834,7 +14834,7 @@ int a;
 // CHECK-32:       omp.inner.for.end:
 // CHECK-32-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK-32:       omp.loop.exit:
-// CHECK-32-NEXT:    call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
+// CHECK-32-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP3]])
 // CHECK-32-NEXT:    ret void
 //
 //
@@ -14999,7 +14999,7 @@ int a;
 // CHECK-32:       omp.inner.for.end:
 // CHECK-32-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK-32:       omp.loop.exit:
-// CHECK-32-NEXT:    call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
+// CHECK-32-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP3]])
 // CHECK-32-NEXT:    ret void
 //
 //
@@ -18346,7 +18346,7 @@ int a;
 // CHECK-32-EX:       omp.loop.exit:
 // CHECK-32-EX-NEXT:    [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK-32-EX-NEXT:    [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4
-// CHECK-32-EX-NEXT:    call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP20]])
+// CHECK-32-EX-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP20]])
 // CHECK-32-EX-NEXT:    [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK-32-EX-NEXT:    [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0
 // CHECK-32-EX-NEXT:    br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -18449,7 +18449,7 @@ int a;
 // CHECK-32-EX:       omp.loop.exit:
 // CHECK-32-EX-NEXT:    [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK-32-EX-NEXT:    [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4
-// CHECK-32-EX-NEXT:    call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP20]])
+// CHECK-32-EX-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP20]])
 // CHECK-32-EX-NEXT:    [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK-32-EX-NEXT:    [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0
 // CHECK-32-EX-NEXT:    br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -18638,7 +18638,7 @@ int a;
 // CHECK-32-EX:       omp.inner.for.end:
 // CHECK-32-EX-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK-32-EX:       omp.loop.exit:
-// CHECK-32-EX-NEXT:    call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
+// CHECK-32-EX-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP3]])
 // CHECK-32-EX-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK-32-EX-NEXT:    [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
 // CHECK-32-EX-NEXT:    br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -18817,7 +18817,7 @@ int a;
 // CHECK-32-EX:       omp.inner.for.end:
 // CHECK-32-EX-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK-32-EX:       omp.loop.exit:
-// CHECK-32-EX-NEXT:    call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
+// CHECK-32-EX-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP3]])
 // CHECK-32-EX-NEXT:    [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK-32-EX-NEXT:    [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0
 // CHECK-32-EX-NEXT:    br i1 [[TMP11]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -19767,7 +19767,7 @@ int a;
 // CHECK-32-EX:       omp.inner.for.end:
 // CHECK-32-EX-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK-32-EX:       omp.loop.exit:
-// CHECK-32-EX-NEXT:    call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
+// CHECK-32-EX-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP3]])
 // CHECK-32-EX-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK-32-EX-NEXT:    [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
 // CHECK-32-EX-NEXT:    br i1 [[TMP12]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]]
@@ -19950,7 +19950,7 @@ int a;
 // CHECK-32-EX:       omp.inner.for.end:
 // CHECK-32-EX-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK-32-EX:       omp.loop.exit:
-// CHECK-32-EX-NEXT:    call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
+// CHECK-32-EX-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP3]])
 // CHECK-32-EX-NEXT:    ret void
 //
 //
@@ -20115,7 +20115,7 @@ int a;
 // CHECK-32-EX:       omp.inner.for.end:
 // CHECK-32-EX-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK-32-EX:       omp.loop.exit:
-// CHECK-32-EX-NEXT:    call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
+// CHECK-32-EX-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP3]])
 // CHECK-32-EX-NEXT:    ret void
 //
 //
@@ -20980,7 +20980,7 @@ int a;
 // CHECK-32-EX:       omp.inner.for.end:
 // CHECK-32-EX-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK-32-EX:       omp.loop.exit:
-// CHECK-32-EX-NEXT:    call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
+// CHECK-32-EX-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP3]])
 // CHECK-32-EX-NEXT:    [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK-32-EX-NEXT:    [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0
 // CHECK-32-EX-NEXT:    br i1 [[TMP11]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -21171,7 +21171,7 @@ int a;
 // CHECK-32-EX:       omp.inner.for.end:
 // CHECK-32-EX-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK-32-EX:       omp.loop.exit:
-// CHECK-32-EX-NEXT:    call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
+// CHECK-32-EX-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP3]])
 // CHECK-32-EX-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK-32-EX-NEXT:    [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
 // CHECK-32-EX-NEXT:    br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -21334,7 +21334,7 @@ int a;
 // CHECK-32-EX:       omp.inner.for.end:
 // CHECK-32-EX-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK-32-EX:       omp.loop.exit:
-// CHECK-32-EX-NEXT:    call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
+// CHECK-32-EX-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP3]])
 // CHECK-32-EX-NEXT:    [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK-32-EX-NEXT:    [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0
 // CHECK-32-EX-NEXT:    br i1 [[TMP11]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -22274,7 +22274,7 @@ int a;
 // CHECK-32-EX:       omp.inner.for.end:
 // CHECK-32-EX-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK-32-EX:       omp.loop.exit:
-// CHECK-32-EX-NEXT:    call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
+// CHECK-32-EX-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP3]])
 // CHECK-32-EX-NEXT:    ret void
 //
 //
@@ -22449,7 +22449,7 @@ int a;
 // CHECK-32-EX:       omp.inner.for.end:
 // CHECK-32-EX-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK-32-EX:       omp.loop.exit:
-// CHECK-32-EX-NEXT:    call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
+// CHECK-32-EX-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP3]])
 // CHECK-32-EX-NEXT:    ret void
 //
 //
@@ -22614,7 +22614,7 @@ int a;
 // CHECK-32-EX:       omp.inner.for.end:
 // CHECK-32-EX-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK-32-EX:       omp.loop.exit:
-// CHECK-32-EX-NEXT:    call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
+// CHECK-32-EX-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP3]])
 // CHECK-32-EX-NEXT:    ret void
 //
 //
@@ -23471,7 +23471,7 @@ int a;
 // CHECK-32-EX:       omp.inner.for.end:
 // CHECK-32-EX-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK-32-EX:       omp.loop.exit:
-// CHECK-32-EX-NEXT:    call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
+// CHECK-32-EX-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP3]])
 // CHECK-32-EX-NEXT:    ret void
 //
 //
@@ -23646,7 +23646,7 @@ int a;
 // CHECK-32-EX:       omp.inner.for.end:
 // CHECK-32-EX-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK-32-EX:       omp.loop.exit:
-// CHECK-32-EX-NEXT:    call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
+// CHECK-32-EX-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP3]])
 // CHECK-32-EX-NEXT:    ret void
 //
 //
@@ -23811,7 +23811,7 @@ int a;
 // CHECK-32-EX:       omp.inner.for.end:
 // CHECK-32-EX-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK-32-EX:       omp.loop.exit:
-// CHECK-32-EX-NEXT:    call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
+// CHECK-32-EX-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP3]])
 // CHECK-32-EX-NEXT:    ret void
 //
 //
diff --git a/clang/test/OpenMP/nvptx_distribute_parallel_generic_mode_codegen.cpp b/clang/test/OpenMP/nvptx_distribute_parallel_generic_mode_codegen.cpp
index 7402698af3e4..986aeadaf998 100644
--- a/clang/test/OpenMP/nvptx_distribute_parallel_generic_mode_codegen.cpp
+++ b/clang/test/OpenMP/nvptx_distribute_parallel_generic_mode_codegen.cpp
@@ -329,7 +329,7 @@ int main(int argc, char **argv) {
 // CHECK4:       omp.loop.exit:
 // CHECK4-NEXT:    [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK4-NEXT:    [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4
-// CHECK4-NEXT:    call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP23]])
+// CHECK4-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP23]])
 // CHECK4-NEXT:    [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK4-NEXT:    [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0
 // CHECK4-NEXT:    br i1 [[TMP25]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]]
@@ -639,7 +639,7 @@ int main(int argc, char **argv) {
 // CHECK5:       omp.loop.exit:
 // CHECK5-NEXT:    [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK5-NEXT:    [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4
-// CHECK5-NEXT:    call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP23]])
+// CHECK5-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP23]])
 // CHECK5-NEXT:    [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK5-NEXT:    [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0
 // CHECK5-NEXT:    br i1 [[TMP25]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]]
diff --git a/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp b/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp
index 4d6982d10616..8397b93cbeed 100644
--- a/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp
+++ b/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp
@@ -371,7 +371,7 @@ int bar(int n){
 // CHECK1:       omp.dispatch.end:
 // CHECK1-NEXT:    [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK1-NEXT:    [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4
-// CHECK1-NEXT:    call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP27]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP27]])
 // CHECK1-NEXT:    [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK1-NEXT:    [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0
 // CHECK1-NEXT:    br i1 [[TMP29]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]]
@@ -633,7 +633,7 @@ int bar(int n){
 // CHECK1:       omp.loop.exit:
 // CHECK1-NEXT:    [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK1-NEXT:    [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4
-// CHECK1-NEXT:    call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP18]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP18]])
 // CHECK1-NEXT:    br label [[OMP_PRECOND_END]]
 // CHECK1:       omp.precond.end:
 // CHECK1-NEXT:    ret void
@@ -822,7 +822,7 @@ int bar(int n){
 // CHECK1:       omp.inner.for.end:
 // CHECK1-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK1:       omp.loop.exit:
-// CHECK1-NEXT:    call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP4]])
 // CHECK1-NEXT:    ret void
 //
 //
@@ -1050,7 +1050,7 @@ int bar(int n){
 // CHECK1:       omp.inner.for.end:
 // CHECK1-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK1:       omp.loop.exit:
-// CHECK1-NEXT:    call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP4]])
 // CHECK1-NEXT:    ret void
 //
 //
@@ -1360,7 +1360,7 @@ int bar(int n){
 // CHECK1:       omp.loop.exit:
 // CHECK1-NEXT:    [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK1-NEXT:    [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4
-// CHECK1-NEXT:    call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP28]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP28]])
 // CHECK1-NEXT:    br label [[OMP_PRECOND_END]]
 // CHECK1:       omp.precond.end:
 // CHECK1-NEXT:    ret void
@@ -1625,7 +1625,7 @@ int bar(int n){
 // CHECK1:       omp.loop.exit:
 // CHECK1-NEXT:    [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK1-NEXT:    [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4
-// CHECK1-NEXT:    call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP20]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP20]])
 // CHECK1-NEXT:    br label [[OMP_PRECOND_END]]
 // CHECK1:       omp.precond.end:
 // CHECK1-NEXT:    ret void
@@ -1931,7 +1931,7 @@ int bar(int n){
 // CHECK2:       omp.dispatch.end:
 // CHECK2-NEXT:    [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK2-NEXT:    [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4
-// CHECK2-NEXT:    call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP27]])
+// CHECK2-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP27]])
 // CHECK2-NEXT:    [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK2-NEXT:    [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0
 // CHECK2-NEXT:    br i1 [[TMP29]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]]
@@ -2193,7 +2193,7 @@ int bar(int n){
 // CHECK2:       omp.loop.exit:
 // CHECK2-NEXT:    [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK2-NEXT:    [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4
-// CHECK2-NEXT:    call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP18]])
+// CHECK2-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP18]])
 // CHECK2-NEXT:    br label [[OMP_PRECOND_END]]
 // CHECK2:       omp.precond.end:
 // CHECK2-NEXT:    ret void
@@ -2382,7 +2382,7 @@ int bar(int n){
 // CHECK2:       omp.inner.for.end:
 // CHECK2-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK2:       omp.loop.exit:
-// CHECK2-NEXT:    call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
+// CHECK2-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP4]])
 // CHECK2-NEXT:    ret void
 //
 //
@@ -2610,7 +2610,7 @@ int bar(int n){
 // CHECK2:       omp.inner.for.end:
 // CHECK2-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK2:       omp.loop.exit:
-// CHECK2-NEXT:    call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
+// CHECK2-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP4]])
 // CHECK2-NEXT:    ret void
 //
 //
@@ -2915,7 +2915,7 @@ int bar(int n){
 // CHECK2:       omp.loop.exit:
 // CHECK2-NEXT:    [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK2-NEXT:    [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4
-// CHECK2-NEXT:    call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP28]])
+// CHECK2-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP28]])
 // CHECK2-NEXT:    br label [[OMP_PRECOND_END]]
 // CHECK2:       omp.precond.end:
 // CHECK2-NEXT:    ret void
@@ -3180,7 +3180,7 @@ int bar(int n){
 // CHECK2:       omp.loop.exit:
 // CHECK2-NEXT:    [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK2-NEXT:    [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4
-// CHECK2-NEXT:    call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP20]])
+// CHECK2-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP20]])
 // CHECK2-NEXT:    br label [[OMP_PRECOND_END]]
 // CHECK2:       omp.precond.end:
 // CHECK2-NEXT:    ret void
@@ -3479,7 +3479,7 @@ int bar(int n){
 // CHECK3:       omp.dispatch.end:
 // CHECK3-NEXT:    [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK3-NEXT:    [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4
-// CHECK3-NEXT:    call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP27]])
+// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP27]])
 // CHECK3-NEXT:    [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK3-NEXT:    [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0
 // CHECK3-NEXT:    br i1 [[TMP29]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]]
@@ -3735,7 +3735,7 @@ int bar(int n){
 // CHECK3:       omp.loop.exit:
 // CHECK3-NEXT:    [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK3-NEXT:    [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4
-// CHECK3-NEXT:    call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP18]])
+// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP18]])
 // CHECK3-NEXT:    br label [[OMP_PRECOND_END]]
 // CHECK3:       omp.precond.end:
 // CHECK3-NEXT:    ret void
@@ -3918,7 +3918,7 @@ int bar(int n){
 // CHECK3:       omp.inner.for.end:
 // CHECK3-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK3:       omp.loop.exit:
-// CHECK3-NEXT:    call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
+// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP4]])
 // CHECK3-NEXT:    ret void
 //
 //
@@ -4139,7 +4139,7 @@ int bar(int n){
 // CHECK3:       omp.inner.for.end:
 // CHECK3-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK3:       omp.loop.exit:
-// CHECK3-NEXT:    call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
+// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP4]])
 // CHECK3-NEXT:    ret void
 //
 //
@@ -4452,7 +4452,7 @@ int bar(int n){
 // CHECK3:       omp.loop.exit:
 // CHECK3-NEXT:    [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK3-NEXT:    [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4
-// CHECK3-NEXT:    call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP28]])
+// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP28]])
 // CHECK3-NEXT:    br label [[OMP_PRECOND_END]]
 // CHECK3:       omp.precond.end:
 // CHECK3-NEXT:    ret void
@@ -4710,7 +4710,7 @@ int bar(int n){
 // CHECK3:       omp.loop.exit:
 // CHECK3-NEXT:    [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK3-NEXT:    [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4
-// CHECK3-NEXT:    call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP20]])
+// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP20]])
 // CHECK3-NEXT:    br label [[OMP_PRECOND_END]]
 // CHECK3:       omp.precond.end:
 // CHECK3-NEXT:    ret void
diff --git a/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_generic_mode_codegen.cpp b/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_generic_mode_codegen.cpp
index 045cd39b07b7..e687a537ecf1 100644
--- a/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_generic_mode_codegen.cpp
+++ b/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_generic_mode_codegen.cpp
@@ -323,7 +323,7 @@ int main(int argc, char **argv) {
 // CHECK1:       omp.dispatch.end:
 // CHECK1-NEXT:    [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK1-NEXT:    [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4
-// CHECK1-NEXT:    call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP26]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP26]])
 // CHECK1-NEXT:    br label [[OMP_PRECOND_END]]
 // CHECK1:       omp.precond.end:
 // CHECK1-NEXT:    ret void
@@ -617,7 +617,7 @@ int main(int argc, char **argv) {
 // CHECK2:       omp.dispatch.end:
 // CHECK2-NEXT:    [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK2-NEXT:    [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4
-// CHECK2-NEXT:    call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP26]])
+// CHECK2-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP26]])
 // CHECK2-NEXT:    br label [[OMP_PRECOND_END]]
 // CHECK2:       omp.precond.end:
 // CHECK2-NEXT:    ret void
diff --git a/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp b/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp
index 2520713da50e..9aee4bd09282 100644
--- a/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp
+++ b/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp
@@ -371,7 +371,7 @@ int bar(int n){
 // CHECK1:       omp.dispatch.end:
 // CHECK1-NEXT:    [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK1-NEXT:    [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4
-// CHECK1-NEXT:    call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP27]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP27]])
 // CHECK1-NEXT:    [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK1-NEXT:    [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0
 // CHECK1-NEXT:    br i1 [[TMP29]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -657,7 +657,7 @@ int bar(int n){
 // CHECK1:       omp.loop.exit:
 // CHECK1-NEXT:    [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK1-NEXT:    [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4
-// CHECK1-NEXT:    call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP18]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP18]])
 // CHECK1-NEXT:    [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK1-NEXT:    [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0
 // CHECK1-NEXT:    br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -865,7 +865,7 @@ int bar(int n){
 // CHECK1:       omp.inner.for.end:
 // CHECK1-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK1:       omp.loop.exit:
-// CHECK1-NEXT:    call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP4]])
 // CHECK1-NEXT:    [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK1-NEXT:    [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0
 // CHECK1-NEXT:    br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -1108,7 +1108,7 @@ int bar(int n){
 // CHECK1:       omp.inner.for.end:
 // CHECK1-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK1:       omp.loop.exit:
-// CHECK1-NEXT:    call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP4]])
 // CHECK1-NEXT:    [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK1-NEXT:    [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0
 // CHECK1-NEXT:    br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -1425,7 +1425,7 @@ int bar(int n){
 // CHECK2:       omp.dispatch.end:
 // CHECK2-NEXT:    [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK2-NEXT:    [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4
-// CHECK2-NEXT:    call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP27]])
+// CHECK2-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP27]])
 // CHECK2-NEXT:    [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK2-NEXT:    [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0
 // CHECK2-NEXT:    br i1 [[TMP29]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -1705,7 +1705,7 @@ int bar(int n){
 // CHECK2:       omp.loop.exit:
 // CHECK2-NEXT:    [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK2-NEXT:    [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4
-// CHECK2-NEXT:    call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP18]])
+// CHECK2-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP18]])
 // CHECK2-NEXT:    [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK2-NEXT:    [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0
 // CHECK2-NEXT:    br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -1907,7 +1907,7 @@ int bar(int n){
 // CHECK2:       omp.inner.for.end:
 // CHECK2-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK2:       omp.loop.exit:
-// CHECK2-NEXT:    call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
+// CHECK2-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP4]])
 // CHECK2-NEXT:    [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK2-NEXT:    [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0
 // CHECK2-NEXT:    br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -2143,7 +2143,7 @@ int bar(int n){
 // CHECK2:       omp.inner.for.end:
 // CHECK2-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK2:       omp.loop.exit:
-// CHECK2-NEXT:    call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
+// CHECK2-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP4]])
 // CHECK2-NEXT:    [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK2-NEXT:    [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0
 // CHECK2-NEXT:    br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
diff --git a/clang/test/OpenMP/nvptx_target_teams_generic_loop_codegen.cpp b/clang/test/OpenMP/nvptx_target_teams_generic_loop_codegen.cpp
index fc83500a09f9..5226b7498e4c 100644
--- a/clang/test/OpenMP/nvptx_target_teams_generic_loop_codegen.cpp
+++ b/clang/test/OpenMP/nvptx_target_teams_generic_loop_codegen.cpp
@@ -219,7 +219,7 @@ int bar(int n){
 // CHECK1:       omp.loop.exit:
 // CHECK1-NEXT:    [[TMP40:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK1-NEXT:    [[TMP41:%.*]] = load i32, ptr [[TMP40]], align 4
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB3:[0-9]+]], i32 [[TMP41]])
+// CHECK1-NEXT:    call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP41]])
 // CHECK1-NEXT:    br label [[OMP_PRECOND_END]]
 // CHECK1:       omp.precond.end:
 // CHECK1-NEXT:    ret void
@@ -276,7 +276,7 @@ int bar(int n){
 // CHECK1-NEXT:    store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK1-NEXT:    [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK1-NEXT:    [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4
-// CHECK1-NEXT:    call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP8]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
+// CHECK1-NEXT:    call void @__kmpc_for_static_init_4(ptr @[[GLOB3:[0-9]+]], i32 [[TMP8]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
 // CHECK1-NEXT:    [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
 // CHECK1-NEXT:    store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4
 // CHECK1-NEXT:    br label [[OMP_INNER_FOR_COND:%.*]]
@@ -432,7 +432,7 @@ int bar(int n){
 // CHECK1:       omp.inner.for.end:
 // CHECK1-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK1:       omp.loop.exit:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP2]])
+// CHECK1-NEXT:    call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP2]])
 // CHECK1-NEXT:    ret void
 //
 //
@@ -637,7 +637,7 @@ int bar(int n){
 // CHECK1:       omp.inner.for.end:
 // CHECK1-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK1:       omp.loop.exit:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP2]])
+// CHECK1-NEXT:    call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP2]])
 // CHECK1-NEXT:    ret void
 //
 //
@@ -904,7 +904,7 @@ int bar(int n){
 // CHECK1:       omp.loop.exit:
 // CHECK1-NEXT:    [[TMP41:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK1-NEXT:    [[TMP42:%.*]] = load i32, ptr [[TMP41]], align 4
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP42]])
+// CHECK1-NEXT:    call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP42]])
 // CHECK1-NEXT:    br label [[OMP_PRECOND_END]]
 // CHECK1:       omp.precond.end:
 // CHECK1-NEXT:    ret void
@@ -1208,7 +1208,7 @@ int bar(int n){
 // CHECK1:       omp.loop.exit:
 // CHECK1-NEXT:    [[TMP42:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK1-NEXT:    [[TMP43:%.*]] = load i32, ptr [[TMP42]], align 4
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP43]])
+// CHECK1-NEXT:    call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP43]])
 // CHECK1-NEXT:    br label [[OMP_PRECOND_END]]
 // CHECK1:       omp.precond.end:
 // CHECK1-NEXT:    ret void
@@ -1465,7 +1465,7 @@ int bar(int n){
 // CHECK2:       omp.loop.exit:
 // CHECK2-NEXT:    [[TMP40:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK2-NEXT:    [[TMP41:%.*]] = load i32, ptr [[TMP40]], align 4
-// CHECK2-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB3:[0-9]+]], i32 [[TMP41]])
+// CHECK2-NEXT:    call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP41]])
 // CHECK2-NEXT:    br label [[OMP_PRECOND_END]]
 // CHECK2:       omp.precond.end:
 // CHECK2-NEXT:    ret void
@@ -1522,7 +1522,7 @@ int bar(int n){
 // CHECK2-NEXT:    store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK2-NEXT:    [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK2-NEXT:    [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4
-// CHECK2-NEXT:    call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP8]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
+// CHECK2-NEXT:    call void @__kmpc_for_static_init_4(ptr @[[GLOB3:[0-9]+]], i32 [[TMP8]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
 // CHECK2-NEXT:    [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
 // CHECK2-NEXT:    store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4
 // CHECK2-NEXT:    br label [[OMP_INNER_FOR_COND:%.*]]
@@ -1678,7 +1678,7 @@ int bar(int n){
 // CHECK2:       omp.inner.for.end:
 // CHECK2-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK2:       omp.loop.exit:
-// CHECK2-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP2]])
+// CHECK2-NEXT:    call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP2]])
 // CHECK2-NEXT:    ret void
 //
 //
@@ -1883,7 +1883,7 @@ int bar(int n){
 // CHECK2:       omp.inner.for.end:
 // CHECK2-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK2:       omp.loop.exit:
-// CHECK2-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP2]])
+// CHECK2-NEXT:    call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP2]])
 // CHECK2-NEXT:    ret void
 //
 //
@@ -2149,7 +2149,7 @@ int bar(int n){
 // CHECK2:       omp.loop.exit:
 // CHECK2-NEXT:    [[TMP43:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK2-NEXT:    [[TMP44:%.*]] = load i32, ptr [[TMP43]], align 4
-// CHECK2-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP44]])
+// CHECK2-NEXT:    call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP44]])
 // CHECK2-NEXT:    br label [[OMP_PRECOND_END]]
 // CHECK2:       omp.precond.end:
 // CHECK2-NEXT:    ret void
@@ -2449,7 +2449,7 @@ int bar(int n){
 // CHECK2:       omp.loop.exit:
 // CHECK2-NEXT:    [[TMP42:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK2-NEXT:    [[TMP43:%.*]] = load i32, ptr [[TMP42]], align 4
-// CHECK2-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP43]])
+// CHECK2-NEXT:    call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP43]])
 // CHECK2-NEXT:    br label [[OMP_PRECOND_END]]
 // CHECK2:       omp.precond.end:
 // CHECK2-NEXT:    ret void
@@ -2704,7 +2704,7 @@ int bar(int n){
 // CHECK3:       omp.loop.exit:
 // CHECK3-NEXT:    [[TMP38:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK3-NEXT:    [[TMP39:%.*]] = load i32, ptr [[TMP38]], align 4
-// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB3:[0-9]+]], i32 [[TMP39]])
+// CHECK3-NEXT:    call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP39]])
 // CHECK3-NEXT:    br label [[OMP_PRECOND_END]]
 // CHECK3:       omp.precond.end:
 // CHECK3-NEXT:    ret void
@@ -2759,7 +2759,7 @@ int bar(int n){
 // CHECK3-NEXT:    store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK3-NEXT:    [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK3-NEXT:    [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4
-// CHECK3-NEXT:    call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP8]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
+// CHECK3-NEXT:    call void @__kmpc_for_static_init_4(ptr @[[GLOB3:[0-9]+]], i32 [[TMP8]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
 // CHECK3-NEXT:    [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
 // CHECK3-NEXT:    store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4
 // CHECK3-NEXT:    br label [[OMP_INNER_FOR_COND:%.*]]
@@ -2911,7 +2911,7 @@ int bar(int n){
 // CHECK3:       omp.inner.for.end:
 // CHECK3-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK3:       omp.loop.exit:
-// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP2]])
+// CHECK3-NEXT:    call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP2]])
 // CHECK3-NEXT:    ret void
 //
 //
@@ -3110,7 +3110,7 @@ int bar(int n){
 // CHECK3:       omp.inner.for.end:
 // CHECK3-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK3:       omp.loop.exit:
-// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP2]])
+// CHECK3-NEXT:    call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP2]])
 // CHECK3-NEXT:    ret void
 //
 //
@@ -3374,7 +3374,7 @@ int bar(int n){
 // CHECK3:       omp.loop.exit:
 // CHECK3-NEXT:    [[TMP43:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK3-NEXT:    [[TMP44:%.*]] = load i32, ptr [[TMP43]], align 4
-// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP44]])
+// CHECK3-NEXT:    call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP44]])
 // CHECK3-NEXT:    br label [[OMP_PRECOND_END]]
 // CHECK3:       omp.precond.end:
 // CHECK3-NEXT:    ret void
@@ -3677,7 +3677,7 @@ int bar(int n){
 // CHECK3:       omp.loop.exit:
 // CHECK3-NEXT:    [[TMP40:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK3-NEXT:    [[TMP41:%.*]] = load i32, ptr [[TMP40]], align 4
-// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP41]])
+// CHECK3-NEXT:    call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP41]])
 // CHECK3-NEXT:    br label [[OMP_PRECOND_END]]
 // CHECK3:       omp.precond.end:
 // CHECK3-NEXT:    ret void
diff --git a/clang/test/OpenMP/nvptx_target_teams_generic_loop_generic_mode_codegen.cpp b/clang/test/OpenMP/nvptx_target_teams_generic_loop_generic_mode_codegen.cpp
index ef26c9b1003a..ca2670f0cd64 100644
--- a/clang/test/OpenMP/nvptx_target_teams_generic_loop_generic_mode_codegen.cpp
+++ b/clang/test/OpenMP/nvptx_target_teams_generic_loop_generic_mode_codegen.cpp
@@ -183,7 +183,7 @@ int main(int argc, char **argv) {
 // CHECK1:       omp.loop.exit:
 // CHECK1-NEXT:    [[TMP40:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK1-NEXT:    [[TMP41:%.*]] = load i32, ptr [[TMP40]], align 4
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB3:[0-9]+]], i32 [[TMP41]])
+// CHECK1-NEXT:    call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP41]])
 // CHECK1-NEXT:    br label [[OMP_PRECOND_END]]
 // CHECK1:       omp.precond.end:
 // CHECK1-NEXT:    ret void
@@ -240,7 +240,7 @@ int main(int argc, char **argv) {
 // CHECK1-NEXT:    store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK1-NEXT:    [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK1-NEXT:    [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4
-// CHECK1-NEXT:    call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP8]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
+// CHECK1-NEXT:    call void @__kmpc_for_static_init_4(ptr @[[GLOB3:[0-9]+]], i32 [[TMP8]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
 // CHECK1-NEXT:    [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
 // CHECK1-NEXT:    store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4
 // CHECK1-NEXT:    br label [[OMP_INNER_FOR_COND:%.*]]
@@ -433,7 +433,7 @@ int main(int argc, char **argv) {
 // CHECK2:       omp.loop.exit:
 // CHECK2-NEXT:    [[TMP38:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK2-NEXT:    [[TMP39:%.*]] = load i32, ptr [[TMP38]], align 4
-// CHECK2-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB3:[0-9]+]], i32 [[TMP39]])
+// CHECK2-NEXT:    call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP39]])
 // CHECK2-NEXT:    br label [[OMP_PRECOND_END]]
 // CHECK2:       omp.precond.end:
 // CHECK2-NEXT:    ret void
@@ -488,7 +488,7 @@ int main(int argc, char **argv) {
 // CHECK2-NEXT:    store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK2-NEXT:    [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK2-NEXT:    [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4
-// CHECK2-NEXT:    call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP8]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
+// CHECK2-NEXT:    call void @__kmpc_for_static_init_4(ptr @[[GLOB3:[0-9]+]], i32 [[TMP8]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
 // CHECK2-NEXT:    [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
 // CHECK2-NEXT:    store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4
 // CHECK2-NEXT:    br label [[OMP_INNER_FOR_COND:%.*]]
diff --git a/clang/test/OpenMP/reduction_implicit_map.cpp b/clang/test/OpenMP/reduction_implicit_map.cpp
index d47c6ec7214d..7305c56289e0 100644
--- a/clang/test/OpenMP/reduction_implicit_map.cpp
+++ b/clang/test/OpenMP/reduction_implicit_map.cpp
@@ -1365,7 +1365,7 @@ int main()
 // CHECK2:       omp.loop.exit:
 // CHECK2-NEXT:    [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK2-NEXT:    [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4
-// CHECK2-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]])
+// CHECK2-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP30]])
 // CHECK2-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0
 // CHECK2-NEXT:    store ptr [[OUTPUT3]], ptr [[TMP31]], align 4
 // CHECK2-NEXT:    [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
@@ -1735,7 +1735,7 @@ int main()
 // CHECK2:       omp.loop.exit:
 // CHECK2-NEXT:    [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK2-NEXT:    [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4
-// CHECK2-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]])
+// CHECK2-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP32]])
 // CHECK2-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0
 // CHECK2-NEXT:    store ptr [[OUTPUT4]], ptr [[TMP33]], align 4
 // CHECK2-NEXT:    [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
diff --git a/clang/test/OpenMP/target_ompx_dyn_cgroup_mem_codegen.cpp b/clang/test/OpenMP/target_ompx_dyn_cgroup_mem_codegen.cpp
index a8b241c17d24..e8b074a9d5f8 100644
--- a/clang/test/OpenMP/target_ompx_dyn_cgroup_mem_codegen.cpp
+++ b/clang/test/OpenMP/target_ompx_dyn_cgroup_mem_codegen.cpp
@@ -854,7 +854,7 @@ int bar(int n){
 // CHECK1:       omp.loop.exit:
 // CHECK1-NEXT:    [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK1-NEXT:    [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP18]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP18]])
 // CHECK1-NEXT:    [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK1-NEXT:    [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0
 // CHECK1-NEXT:    br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -1732,7 +1732,7 @@ int bar(int n){
 // CHECK3:       omp.loop.exit:
 // CHECK3-NEXT:    [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK3-NEXT:    [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4
-// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP18]])
+// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP18]])
 // CHECK3-NEXT:    [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK3-NEXT:    [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0
 // CHECK3-NEXT:    br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -2145,7 +2145,7 @@ int bar(int n){
 // CHECK9:       omp.loop.exit:
 // CHECK9-NEXT:    [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK9-NEXT:    [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4
-// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP18]])
+// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP18]])
 // CHECK9-NEXT:    [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK9-NEXT:    [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0
 // CHECK9-NEXT:    br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -2508,7 +2508,7 @@ int bar(int n){
 // CHECK11:       omp.loop.exit:
 // CHECK11-NEXT:    [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK11-NEXT:    [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4
-// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP18]])
+// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP18]])
 // CHECK11-NEXT:    [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK11-NEXT:    [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0
 // CHECK11-NEXT:    br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_codegen.cpp
index 9e12880be298..7e06d8c16169 100644
--- a/clang/test/OpenMP/target_teams_distribute_parallel_for_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_codegen.cpp
@@ -333,12 +333,12 @@ int target_teams_fun(int *g){
 // CHECK1:       omp.loop.exit:
 // CHECK1-NEXT:    [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK1-NEXT:    [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP24]])
 // CHECK1-NEXT:    br label [[OMP_PRECOND_END]]
 // CHECK1:       cancel.exit:
 // CHECK1-NEXT:    [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK1-NEXT:    [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP26]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP26]])
 // CHECK1-NEXT:    br label [[CANCEL_CONT:%.*]]
 // CHECK1:       omp.precond.end:
 // CHECK1-NEXT:    br label [[CANCEL_CONT]]
@@ -559,7 +559,7 @@ int target_teams_fun(int *g){
 // CHECK1:       omp.loop.exit:
 // CHECK1-NEXT:    [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK1-NEXT:    [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP22]])
 // CHECK1-NEXT:    br label [[OMP_PRECOND_END]]
 // CHECK1:       omp.precond.end:
 // CHECK1-NEXT:    ret void
@@ -975,12 +975,12 @@ int target_teams_fun(int *g){
 // CHECK2:       omp.loop.exit:
 // CHECK2-NEXT:    [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK2-NEXT:    [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4
-// CHECK2-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]])
+// CHECK2-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP24]])
 // CHECK2-NEXT:    br label [[OMP_PRECOND_END]]
 // CHECK2:       cancel.exit:
 // CHECK2-NEXT:    [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK2-NEXT:    [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4
-// CHECK2-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP26]])
+// CHECK2-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP26]])
 // CHECK2-NEXT:    br label [[CANCEL_CONT:%.*]]
 // CHECK2:       omp.precond.end:
 // CHECK2-NEXT:    br label [[CANCEL_CONT]]
@@ -1201,7 +1201,7 @@ int target_teams_fun(int *g){
 // CHECK2:       omp.loop.exit:
 // CHECK2-NEXT:    [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK2-NEXT:    [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4
-// CHECK2-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]])
+// CHECK2-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP22]])
 // CHECK2-NEXT:    br label [[OMP_PRECOND_END]]
 // CHECK2:       omp.precond.end:
 // CHECK2-NEXT:    ret void
@@ -1612,12 +1612,12 @@ int target_teams_fun(int *g){
 // CHECK4:       omp.loop.exit:
 // CHECK4-NEXT:    [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK4-NEXT:    [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4
-// CHECK4-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]])
+// CHECK4-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP24]])
 // CHECK4-NEXT:    br label [[OMP_PRECOND_END]]
 // CHECK4:       cancel.exit:
 // CHECK4-NEXT:    [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK4-NEXT:    [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4
-// CHECK4-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP26]])
+// CHECK4-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP26]])
 // CHECK4-NEXT:    br label [[CANCEL_CONT:%.*]]
 // CHECK4:       omp.precond.end:
 // CHECK4-NEXT:    br label [[CANCEL_CONT]]
@@ -1833,7 +1833,7 @@ int target_teams_fun(int *g){
 // CHECK4:       omp.loop.exit:
 // CHECK4-NEXT:    [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK4-NEXT:    [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4
-// CHECK4-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]])
+// CHECK4-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP22]])
 // CHECK4-NEXT:    br label [[OMP_PRECOND_END]]
 // CHECK4:       omp.precond.end:
 // CHECK4-NEXT:    ret void
@@ -2059,12 +2059,12 @@ int target_teams_fun(int *g){
 // CHECK10:       omp.loop.exit:
 // CHECK10-NEXT:    [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK10-NEXT:    [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4
-// CHECK10-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]])
+// CHECK10-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP24]])
 // CHECK10-NEXT:    br label [[OMP_PRECOND_END]]
 // CHECK10:       cancel.exit:
 // CHECK10-NEXT:    [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK10-NEXT:    [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4
-// CHECK10-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP26]])
+// CHECK10-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP26]])
 // CHECK10-NEXT:    br label [[CANCEL_CONT:%.*]]
 // CHECK10:       omp.precond.end:
 // CHECK10-NEXT:    br label [[CANCEL_CONT]]
@@ -2287,7 +2287,7 @@ int target_teams_fun(int *g){
 // CHECK10:       omp.loop.exit:
 // CHECK10-NEXT:    [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK10-NEXT:    [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4
-// CHECK10-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]])
+// CHECK10-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP22]])
 // CHECK10-NEXT:    br label [[OMP_PRECOND_END]]
 // CHECK10:       omp.precond.end:
 // CHECK10-NEXT:    ret void
@@ -2508,12 +2508,12 @@ int target_teams_fun(int *g){
 // CHECK12:       omp.loop.exit:
 // CHECK12-NEXT:    [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK12-NEXT:    [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4
-// CHECK12-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]])
+// CHECK12-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP24]])
 // CHECK12-NEXT:    br label [[OMP_PRECOND_END]]
 // CHECK12:       cancel.exit:
 // CHECK12-NEXT:    [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK12-NEXT:    [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4
-// CHECK12-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP26]])
+// CHECK12-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP26]])
 // CHECK12-NEXT:    br label [[CANCEL_CONT:%.*]]
 // CHECK12:       omp.precond.end:
 // CHECK12-NEXT:    br label [[CANCEL_CONT]]
@@ -2731,7 +2731,7 @@ int target_teams_fun(int *g){
 // CHECK12:       omp.loop.exit:
 // CHECK12-NEXT:    [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK12-NEXT:    [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4
-// CHECK12-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]])
+// CHECK12-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP22]])
 // CHECK12-NEXT:    br label [[OMP_PRECOND_END]]
 // CHECK12:       omp.precond.end:
 // CHECK12-NEXT:    ret void
diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_collapse_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_collapse_codegen.cpp
index d13920ba956f..b812011ea4ca 100644
--- a/clang/test/OpenMP/target_teams_distribute_parallel_for_collapse_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_collapse_codegen.cpp
@@ -331,7 +331,7 @@ int main (int argc, char **argv) {
 // CHECK1:       omp.inner.for.end:
 // CHECK1-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK1:       omp.loop.exit:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
 // CHECK1-NEXT:    ret void
 //
 //
@@ -561,7 +561,7 @@ int main (int argc, char **argv) {
 // CHECK3:       omp.inner.for.end:
 // CHECK3-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK3:       omp.loop.exit:
-// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
 // CHECK3-NEXT:    ret void
 //
 //
@@ -1000,7 +1000,7 @@ int main (int argc, char **argv) {
 // CHECK9:       omp.loop.exit:
 // CHECK9-NEXT:    [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK9-NEXT:    [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4
-// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]])
+// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP32]])
 // CHECK9-NEXT:    br label [[OMP_PRECOND_END]]
 // CHECK9:       omp.precond.end:
 // CHECK9-NEXT:    ret void
@@ -1224,7 +1224,7 @@ int main (int argc, char **argv) {
 // CHECK9:       omp.inner.for.end:
 // CHECK9-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK9:       omp.loop.exit:
-// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
 // CHECK9-NEXT:    ret void
 //
 //
@@ -1664,7 +1664,7 @@ int main (int argc, char **argv) {
 // CHECK11:       omp.loop.exit:
 // CHECK11-NEXT:    [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK11-NEXT:    [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4
-// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]])
+// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP32]])
 // CHECK11-NEXT:    br label [[OMP_PRECOND_END]]
 // CHECK11:       omp.precond.end:
 // CHECK11-NEXT:    ret void
@@ -1882,6 +1882,6 @@ int main (int argc, char **argv) {
 // CHECK11:       omp.inner.for.end:
 // CHECK11-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK11:       omp.loop.exit:
-// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
 // CHECK11-NEXT:    ret void
 //
diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_dist_schedule_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_dist_schedule_codegen.cpp
index 517ea936a1d6..0aa75952a3c2 100644
--- a/clang/test/OpenMP/target_teams_distribute_parallel_for_dist_schedule_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_dist_schedule_codegen.cpp
@@ -442,7 +442,7 @@ int main (int argc, char **argv) {
 // CHECK1:       omp.inner.for.end:
 // CHECK1-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK1:       omp.loop.exit:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
 // CHECK1-NEXT:    ret void
 //
 //
@@ -593,7 +593,7 @@ int main (int argc, char **argv) {
 // CHECK1:       omp.inner.for.end:
 // CHECK1-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK1:       omp.loop.exit:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
 // CHECK1-NEXT:    ret void
 //
 //
@@ -764,7 +764,7 @@ int main (int argc, char **argv) {
 // CHECK1:       omp.inner.for.end:
 // CHECK1-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK1:       omp.loop.exit:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
 // CHECK1-NEXT:    ret void
 //
 //
@@ -1071,7 +1071,7 @@ int main (int argc, char **argv) {
 // CHECK3:       omp.inner.for.end:
 // CHECK3-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK3:       omp.loop.exit:
-// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
 // CHECK3-NEXT:    ret void
 //
 //
@@ -1217,7 +1217,7 @@ int main (int argc, char **argv) {
 // CHECK3:       omp.inner.for.end:
 // CHECK3-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK3:       omp.loop.exit:
-// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
 // CHECK3-NEXT:    ret void
 //
 //
@@ -1383,7 +1383,7 @@ int main (int argc, char **argv) {
 // CHECK3:       omp.inner.for.end:
 // CHECK3-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK3:       omp.loop.exit:
-// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
 // CHECK3-NEXT:    ret void
 //
 //
@@ -1881,7 +1881,7 @@ int main (int argc, char **argv) {
 // CHECK9:       omp.loop.exit:
 // CHECK9-NEXT:    [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK9-NEXT:    [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4
-// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]])
+// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP21]])
 // CHECK9-NEXT:    br label [[OMP_PRECOND_END]]
 // CHECK9:       omp.precond.end:
 // CHECK9-NEXT:    ret void
@@ -2098,7 +2098,7 @@ int main (int argc, char **argv) {
 // CHECK9:       omp.loop.exit:
 // CHECK9-NEXT:    [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK9-NEXT:    [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4
-// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]])
+// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP21]])
 // CHECK9-NEXT:    br label [[OMP_PRECOND_END]]
 // CHECK9:       omp.precond.end:
 // CHECK9-NEXT:    ret void
@@ -2354,7 +2354,7 @@ int main (int argc, char **argv) {
 // CHECK9:       omp.loop.exit:
 // CHECK9-NEXT:    [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK9-NEXT:    [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4
-// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]])
+// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP21]])
 // CHECK9-NEXT:    br label [[OMP_PRECOND_END]]
 // CHECK9:       omp.precond.end:
 // CHECK9-NEXT:    ret void
@@ -2668,7 +2668,7 @@ int main (int argc, char **argv) {
 // CHECK9:       omp.inner.for.end:
 // CHECK9-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK9:       omp.loop.exit:
-// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
 // CHECK9-NEXT:    ret void
 //
 //
@@ -2818,7 +2818,7 @@ int main (int argc, char **argv) {
 // CHECK9:       omp.inner.for.end:
 // CHECK9-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK9:       omp.loop.exit:
-// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
 // CHECK9-NEXT:    ret void
 //
 //
@@ -3003,7 +3003,7 @@ int main (int argc, char **argv) {
 // CHECK9:       omp.inner.for.end:
 // CHECK9-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK9:       omp.loop.exit:
-// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
 // CHECK9-NEXT:    ret void
 //
 //
@@ -3498,7 +3498,7 @@ int main (int argc, char **argv) {
 // CHECK11:       omp.loop.exit:
 // CHECK11-NEXT:    [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK11-NEXT:    [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4
-// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]])
+// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP21]])
 // CHECK11-NEXT:    br label [[OMP_PRECOND_END]]
 // CHECK11:       omp.precond.end:
 // CHECK11-NEXT:    ret void
@@ -3710,7 +3710,7 @@ int main (int argc, char **argv) {
 // CHECK11:       omp.loop.exit:
 // CHECK11-NEXT:    [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK11-NEXT:    [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4
-// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]])
+// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP21]])
 // CHECK11-NEXT:    br label [[OMP_PRECOND_END]]
 // CHECK11:       omp.precond.end:
 // CHECK11-NEXT:    ret void
@@ -3961,7 +3961,7 @@ int main (int argc, char **argv) {
 // CHECK11:       omp.loop.exit:
 // CHECK11-NEXT:    [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK11-NEXT:    [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4
-// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]])
+// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP21]])
 // CHECK11-NEXT:    br label [[OMP_PRECOND_END]]
 // CHECK11:       omp.precond.end:
 // CHECK11-NEXT:    ret void
@@ -4270,7 +4270,7 @@ int main (int argc, char **argv) {
 // CHECK11:       omp.inner.for.end:
 // CHECK11-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK11:       omp.loop.exit:
-// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
 // CHECK11-NEXT:    ret void
 //
 //
@@ -4415,7 +4415,7 @@ int main (int argc, char **argv) {
 // CHECK11:       omp.inner.for.end:
 // CHECK11-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK11:       omp.loop.exit:
-// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
 // CHECK11-NEXT:    ret void
 //
 //
@@ -4595,6 +4595,6 @@ int main (int argc, char **argv) {
 // CHECK11:       omp.inner.for.end:
 // CHECK11-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK11:       omp.loop.exit:
-// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
 // CHECK11-NEXT:    ret void
 //
diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_firstprivate_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_firstprivate_codegen.cpp
index 9f11929ec372..375279d96360 100644
--- a/clang/test/OpenMP/target_teams_distribute_parallel_for_firstprivate_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_firstprivate_codegen.cpp
@@ -703,7 +703,7 @@ int main() {
 // CHECK1:       omp.loop.exit:
 // CHECK1-NEXT:    [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK1-NEXT:    [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP21]])
 // CHECK1-NEXT:    call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR2]]
 // CHECK1-NEXT:    [[ARRAY_BEGIN12:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR3]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN12]], i64 2
@@ -1170,7 +1170,7 @@ int main() {
 // CHECK1:       omp.loop.exit:
 // CHECK1-NEXT:    [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK1-NEXT:    [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP21]])
 // CHECK1-NEXT:    call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR6]]) #[[ATTR2]]
 // CHECK1-NEXT:    [[ARRAY_BEGIN13:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN13]], i64 2
@@ -1759,7 +1759,7 @@ int main() {
 // CHECK3:       omp.loop.exit:
 // CHECK3-NEXT:    [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK3-NEXT:    [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4
-// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]])
+// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP21]])
 // CHECK3-NEXT:    call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR4]]) #[[ATTR2]]
 // CHECK3-NEXT:    [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR2]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN10]], i32 2
@@ -2220,7 +2220,7 @@ int main() {
 // CHECK3:       omp.loop.exit:
 // CHECK3-NEXT:    [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK3-NEXT:    [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4
-// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]])
+// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP21]])
 // CHECK3-NEXT:    call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR2]]
 // CHECK3-NEXT:    [[ARRAY_BEGIN11:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN11]], i32 2
@@ -2631,7 +2631,7 @@ int main() {
 // CHECK5:       omp.inner.for.end:
 // CHECK5-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK5:       omp.loop.exit:
-// CHECK5-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK5-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK5-NEXT:    ret void
 //
 //
@@ -2949,7 +2949,7 @@ int main() {
 // CHECK13:       omp.loop.exit:
 // CHECK13-NEXT:    [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK13-NEXT:    [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4
-// CHECK13-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]])
+// CHECK13-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP21]])
 // CHECK13-NEXT:    call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR5]]
 // CHECK13-NEXT:    [[ARRAY_BEGIN12:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR3]], i32 0, i32 0
 // CHECK13-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN12]], i64 2
@@ -3256,7 +3256,7 @@ int main() {
 // CHECK13:       omp.loop.exit:
 // CHECK13-NEXT:    [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK13-NEXT:    [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4
-// CHECK13-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]])
+// CHECK13-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP21]])
 // CHECK13-NEXT:    call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR6]]) #[[ATTR5]]
 // CHECK13-NEXT:    [[ARRAY_BEGIN13:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i32 0, i32 0
 // CHECK13-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN13]], i64 2
@@ -3663,7 +3663,7 @@ int main() {
 // CHECK15:       omp.loop.exit:
 // CHECK15-NEXT:    [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK15-NEXT:    [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4
-// CHECK15-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]])
+// CHECK15-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP21]])
 // CHECK15-NEXT:    call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR4]]) #[[ATTR5]]
 // CHECK15-NEXT:    [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR2]], i32 0, i32 0
 // CHECK15-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN10]], i32 2
@@ -3964,7 +3964,7 @@ int main() {
 // CHECK15:       omp.loop.exit:
 // CHECK15-NEXT:    [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK15-NEXT:    [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4
-// CHECK15-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]])
+// CHECK15-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP21]])
 // CHECK15-NEXT:    call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR5]]
 // CHECK15-NEXT:    [[ARRAY_BEGIN11:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 0
 // CHECK15-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN11]], i32 2
@@ -4270,6 +4270,6 @@ int main() {
 // CHECK17:       omp.inner.for.end:
 // CHECK17-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK17:       omp.loop.exit:
-// CHECK17-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK17-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK17-NEXT:    ret void
 //
diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_if_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_if_codegen.cpp
index fe2088aace15..9baf13385bef 100644
--- a/clang/test/OpenMP/target_teams_distribute_parallel_for_if_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_if_codegen.cpp
@@ -314,7 +314,7 @@ int main() {
 // CHECK1:       omp.inner.for.end:
 // CHECK1-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK1:       omp.loop.exit:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK1-NEXT:    ret void
 //
 //
@@ -457,7 +457,7 @@ int main() {
 // CHECK1:       omp.inner.for.end:
 // CHECK1-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK1:       omp.loop.exit:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK1-NEXT:    ret void
 //
 //
@@ -711,7 +711,7 @@ int main() {
 // CHECK1:       omp.inner.for.end:
 // CHECK1-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK1:       omp.loop.exit:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK1-NEXT:    ret void
 //
 //
@@ -854,7 +854,7 @@ int main() {
 // CHECK1:       omp.inner.for.end:
 // CHECK1-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK1:       omp.loop.exit:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK1-NEXT:    ret void
 //
 //
@@ -1016,7 +1016,7 @@ int main() {
 // CHECK1:       omp.inner.for.end:
 // CHECK1-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK1:       omp.loop.exit:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK1-NEXT:    ret void
 //
 //
@@ -1259,7 +1259,7 @@ int main() {
 // CHECK1:       omp.inner.for.end:
 // CHECK1-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK1:       omp.loop.exit:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK1-NEXT:    ret void
 //
 //
@@ -1402,7 +1402,7 @@ int main() {
 // CHECK1:       omp.inner.for.end:
 // CHECK1-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK1:       omp.loop.exit:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK1-NEXT:    ret void
 //
 //
@@ -1564,6 +1564,6 @@ int main() {
 // CHECK1:       omp.inner.for.end:
 // CHECK1-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK1:       omp.loop.exit:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK1-NEXT:    ret void
 //
diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_lastprivate_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_lastprivate_codegen.cpp
index e2181e5088cc..e54c230eea7e 100644
--- a/clang/test/OpenMP/target_teams_distribute_parallel_for_lastprivate_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_lastprivate_codegen.cpp
@@ -428,7 +428,7 @@ int main() {
 // CHECK1:       omp.inner.for.end:
 // CHECK1-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK1:       omp.loop.exit:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
 // CHECK1-NEXT:    [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK1-NEXT:    [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0
 // CHECK1-NEXT:    br i1 [[TMP19]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]]
@@ -692,7 +692,7 @@ int main() {
 // CHECK3:       omp.inner.for.end:
 // CHECK3-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK3:       omp.loop.exit:
-// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]])
+// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP6]])
 // CHECK3-NEXT:    [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK3-NEXT:    [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0
 // CHECK3-NEXT:    br i1 [[TMP21]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]]
@@ -1144,7 +1144,7 @@ int main() {
 // CHECK5:       omp.loop.exit:
 // CHECK5-NEXT:    [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK5-NEXT:    [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4
-// CHECK5-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP20]])
+// CHECK5-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP20]])
 // CHECK5-NEXT:    [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK5-NEXT:    [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0
 // CHECK5-NEXT:    br i1 [[TMP22]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]]
@@ -1628,7 +1628,7 @@ int main() {
 // CHECK5:       omp.loop.exit:
 // CHECK5-NEXT:    [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK5-NEXT:    [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4
-// CHECK5-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP20]])
+// CHECK5-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP20]])
 // CHECK5-NEXT:    [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK5-NEXT:    [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0
 // CHECK5-NEXT:    br i1 [[TMP22]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]]
@@ -2138,7 +2138,7 @@ int main() {
 // CHECK7:       omp.loop.exit:
 // CHECK7-NEXT:    [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK7-NEXT:    [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4
-// CHECK7-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP20]])
+// CHECK7-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP20]])
 // CHECK7-NEXT:    [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK7-NEXT:    [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0
 // CHECK7-NEXT:    br i1 [[TMP22]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]]
@@ -2616,7 +2616,7 @@ int main() {
 // CHECK7:       omp.loop.exit:
 // CHECK7-NEXT:    [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK7-NEXT:    [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4
-// CHECK7-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP20]])
+// CHECK7-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP20]])
 // CHECK7-NEXT:    [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK7-NEXT:    [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0
 // CHECK7-NEXT:    br i1 [[TMP22]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]]
diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_order_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_order_codegen.cpp
index 74dee0399f58..feab9cdc948b 100644
--- a/clang/test/OpenMP/target_teams_distribute_parallel_for_order_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_order_codegen.cpp
@@ -195,6 +195,6 @@ void gtid_test() {
 // CHECK1:       omp.inner.for.end:
 // CHECK1-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK1:       omp.loop.exit:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK1-NEXT:    ret void
 //
diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_private_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_private_codegen.cpp
index 82f29fa1d3ef..066d926e5abc 100644
--- a/clang/test/OpenMP/target_teams_distribute_parallel_for_private_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_private_codegen.cpp
@@ -534,7 +534,7 @@ int main() {
 // CHECK1:       omp.loop.exit:
 // CHECK1-NEXT:    [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK1-NEXT:    [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP17]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP17]])
 // CHECK1-NEXT:    call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]]
 // CHECK1-NEXT:    [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN7]], i64 2
@@ -842,7 +842,7 @@ int main() {
 // CHECK1:       omp.loop.exit:
 // CHECK1-NEXT:    [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK1-NEXT:    [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP16]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP16]])
 // CHECK1-NEXT:    call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]]
 // CHECK1-NEXT:    [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN8]], i64 2
@@ -1261,7 +1261,7 @@ int main() {
 // CHECK3:       omp.loop.exit:
 // CHECK3-NEXT:    [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK3-NEXT:    [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4
-// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP17]])
+// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP17]])
 // CHECK3-NEXT:    call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]]
 // CHECK3-NEXT:    [[ARRAY_BEGIN5:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN5]], i32 2
@@ -1563,7 +1563,7 @@ int main() {
 // CHECK3:       omp.loop.exit:
 // CHECK3-NEXT:    [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK3-NEXT:    [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4
-// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP16]])
+// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP16]])
 // CHECK3-NEXT:    call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]]
 // CHECK3-NEXT:    [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN6]], i32 2
@@ -1917,7 +1917,7 @@ int main() {
 // CHECK5:       omp.inner.for.end:
 // CHECK5-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK5:       omp.loop.exit:
-// CHECK5-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK5-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK5-NEXT:    ret void
 //
 //
@@ -2139,7 +2139,7 @@ int main() {
 // CHECK13:       omp.loop.exit:
 // CHECK13-NEXT:    [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK13-NEXT:    [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4
-// CHECK13-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP17]])
+// CHECK13-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP17]])
 // CHECK13-NEXT:    call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]]
 // CHECK13-NEXT:    [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0
 // CHECK13-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN7]], i64 2
@@ -2376,7 +2376,7 @@ int main() {
 // CHECK13:       omp.loop.exit:
 // CHECK13-NEXT:    [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK13-NEXT:    [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4
-// CHECK13-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP16]])
+// CHECK13-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP16]])
 // CHECK13-NEXT:    call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]]
 // CHECK13-NEXT:    [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0
 // CHECK13-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN8]], i64 2
@@ -2647,7 +2647,7 @@ int main() {
 // CHECK15:       omp.loop.exit:
 // CHECK15-NEXT:    [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK15-NEXT:    [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4
-// CHECK15-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP17]])
+// CHECK15-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP17]])
 // CHECK15-NEXT:    call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]]
 // CHECK15-NEXT:    [[ARRAY_BEGIN5:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0
 // CHECK15-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN5]], i32 2
@@ -2878,7 +2878,7 @@ int main() {
 // CHECK15:       omp.loop.exit:
 // CHECK15-NEXT:    [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK15-NEXT:    [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4
-// CHECK15-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP16]])
+// CHECK15-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP16]])
 // CHECK15-NEXT:    call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]]
 // CHECK15-NEXT:    [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0
 // CHECK15-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN6]], i32 2
@@ -3108,6 +3108,6 @@ int main() {
 // CHECK17:       omp.inner.for.end:
 // CHECK17-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK17:       omp.loop.exit:
-// CHECK17-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK17-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK17-NEXT:    ret void
 //
diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_proc_bind_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_proc_bind_codegen.cpp
index 6d22bc22e479..9f3e50fe20a6 100644
--- a/clang/test/OpenMP/target_teams_distribute_parallel_for_proc_bind_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_proc_bind_codegen.cpp
@@ -261,7 +261,7 @@ int main() {
 // CHECK1:       omp.inner.for.end:
 // CHECK1-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK1:       omp.loop.exit:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK1-NEXT:    ret void
 //
 //
@@ -399,7 +399,7 @@ int main() {
 // CHECK1:       omp.inner.for.end:
 // CHECK1-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK1:       omp.loop.exit:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK1-NEXT:    ret void
 //
 //
@@ -578,6 +578,6 @@ int main() {
 // CHECK1:       omp.inner.for.end:
 // CHECK1-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK1:       omp.loop.exit:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK1-NEXT:    ret void
 //
diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_reduction_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_reduction_codegen.cpp
index bfa00ee7a0f4..1036ffd195de 100644
--- a/clang/test/OpenMP/target_teams_distribute_parallel_for_reduction_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_reduction_codegen.cpp
@@ -316,7 +316,7 @@ int main() {
 // CHECK1:       omp.inner.for.end:
 // CHECK1-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK1:       omp.loop.exit:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
 // CHECK1-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
 // CHECK1-NEXT:    store ptr [[SIVAR2]], ptr [[TMP14]], align 8
 // CHECK1-NEXT:    [[TMP15:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l66.omp_outlined.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var)
@@ -606,7 +606,7 @@ int main() {
 // CHECK1:       omp.inner.for.end:
 // CHECK1-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK1:       omp.loop.exit:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
 // CHECK1-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
 // CHECK1-NEXT:    store ptr [[T_VAR2]], ptr [[TMP14]], align 8
 // CHECK1-NEXT:    [[TMP15:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.omp_outlined.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var)
@@ -891,7 +891,7 @@ int main() {
 // CHECK3:       omp.inner.for.end:
 // CHECK3-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK3:       omp.loop.exit:
-// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
 // CHECK3-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0
 // CHECK3-NEXT:    store ptr [[SIVAR1]], ptr [[TMP14]], align 4
 // CHECK3-NEXT:    [[TMP15:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l66.omp_outlined.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var)
@@ -1177,7 +1177,7 @@ int main() {
 // CHECK3:       omp.inner.for.end:
 // CHECK3-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK3:       omp.loop.exit:
-// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
 // CHECK3-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0
 // CHECK3-NEXT:    store ptr [[T_VAR1]], ptr [[TMP14]], align 4
 // CHECK3-NEXT:    [[TMP15:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.omp_outlined.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var)
@@ -1425,7 +1425,7 @@ int main() {
 // CHECK5:       omp.inner.for.end:
 // CHECK5-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK5:       omp.loop.exit:
-// CHECK5-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK5-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
 // CHECK5-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
 // CHECK5-NEXT:    store ptr [[SIVAR2]], ptr [[TMP15]], align 8
 // CHECK5-NEXT:    [[TMP16:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l44.omp_outlined.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var)
diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_reduction_task_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_reduction_task_codegen.cpp
index fea36e882b7a..6f1cc4d308a6 100644
--- a/clang/test/OpenMP/target_teams_distribute_parallel_for_reduction_task_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_reduction_task_codegen.cpp
@@ -239,8 +239,8 @@ int main(int argc, char **argv) {
 // CHECK1-NEXT:    [[TMP72:%.*]] = load i32, ptr [[TMP71]], align 4
 // CHECK1-NEXT:    [[TMP73:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB4:[0-9]+]], i32 [[TMP72]], i32 2, i64 24, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l14.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var)
 // CHECK1-NEXT:    switch i32 [[TMP73]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
-// CHECK1-NEXT:    i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
-// CHECK1-NEXT:    i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
+// CHECK1-NEXT:      i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
+// CHECK1-NEXT:      i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
 // CHECK1-NEXT:    ]
 // CHECK1:       .omp.reduction.case1:
 // CHECK1-NEXT:    [[TMP74:%.*]] = load i32, ptr [[TMP0]], align 4
@@ -588,7 +588,7 @@ int main(int argc, char **argv) {
 // CHECK1:       omp.loop.exit:
 // CHECK1-NEXT:    [[TMP78:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK1-NEXT:    [[TMP79:%.*]] = load i32, ptr [[TMP78]], align 4
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP79]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP79]])
 // CHECK1-NEXT:    [[TMP80:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK1-NEXT:    [[TMP81:%.*]] = load i32, ptr [[TMP80]], align 4
 // CHECK1-NEXT:    call void @__kmpc_task_reduction_modifier_fini(ptr @[[GLOB1]], i32 [[TMP81]], i32 1)
@@ -603,8 +603,8 @@ int main(int argc, char **argv) {
 // CHECK1-NEXT:    [[TMP87:%.*]] = load i32, ptr [[TMP86]], align 4
 // CHECK1-NEXT:    [[TMP88:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB4]], i32 [[TMP87]], i32 2, i64 24, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l14.omp_outlined.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var)
 // CHECK1-NEXT:    switch i32 [[TMP88]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
-// CHECK1-NEXT:    i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
-// CHECK1-NEXT:    i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
+// CHECK1-NEXT:      i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
+// CHECK1-NEXT:      i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
 // CHECK1-NEXT:    ]
 // CHECK1:       .omp.reduction.case1:
 // CHECK1-NEXT:    [[TMP89:%.*]] = load i32, ptr [[TMP0]], align 4
@@ -796,21 +796,21 @@ int main(int argc, char **argv) {
 // CHECK1-NEXT:    call void @llvm.experimental.noalias.scope.decl(metadata [[META6:![0-9]+]])
 // CHECK1-NEXT:    call void @llvm.experimental.noalias.scope.decl(metadata [[META8:![0-9]+]])
 // CHECK1-NEXT:    call void @llvm.experimental.noalias.scope.decl(metadata [[META10:![0-9]+]])
-// CHECK1-NEXT:    store i32 [[TMP2]], ptr [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !12
-// CHECK1-NEXT:    store ptr [[TMP5]], ptr [[DOTPART_ID__ADDR_I]], align 8, !noalias !12
-// CHECK1-NEXT:    store ptr [[TMP8]], ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12
-// CHECK1-NEXT:    store ptr @.omp_task_privates_map., ptr [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !12
-// CHECK1-NEXT:    store ptr [[TMP3]], ptr [[DOTTASK_T__ADDR_I]], align 8, !noalias !12
-// CHECK1-NEXT:    store ptr [[TMP7]], ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !12
-// CHECK1-NEXT:    [[TMP9:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !12
-// CHECK1-NEXT:    [[TMP10:%.*]] = load ptr, ptr [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !12
-// CHECK1-NEXT:    [[TMP11:%.*]] = load ptr, ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12
+// CHECK1-NEXT:    store i32 [[TMP2]], ptr [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias [[META12:![0-9]+]]
+// CHECK1-NEXT:    store ptr [[TMP5]], ptr [[DOTPART_ID__ADDR_I]], align 8, !noalias [[META12]]
+// CHECK1-NEXT:    store ptr [[TMP8]], ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias [[META12]]
+// CHECK1-NEXT:    store ptr @.omp_task_privates_map., ptr [[DOTCOPY_FN__ADDR_I]], align 8, !noalias [[META12]]
+// CHECK1-NEXT:    store ptr [[TMP3]], ptr [[DOTTASK_T__ADDR_I]], align 8, !noalias [[META12]]
+// CHECK1-NEXT:    store ptr [[TMP7]], ptr [[__CONTEXT_ADDR_I]], align 8, !noalias [[META12]]
+// CHECK1-NEXT:    [[TMP9:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, !noalias [[META12]]
+// CHECK1-NEXT:    [[TMP10:%.*]] = load ptr, ptr [[DOTCOPY_FN__ADDR_I]], align 8, !noalias [[META12]]
+// CHECK1-NEXT:    [[TMP11:%.*]] = load ptr, ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias [[META12]]
 // CHECK1-NEXT:    call void [[TMP10]](ptr [[TMP11]], ptr [[DOTFIRSTPRIV_PTR_ADDR_I]]) #[[ATTR6]]
-// CHECK1-NEXT:    [[TMP12:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !12
+// CHECK1-NEXT:    [[TMP12:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias [[META12]]
 // CHECK1-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP9]], i32 0, i32 1
 // CHECK1-NEXT:    [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 8
 // CHECK1-NEXT:    [[TMP15:%.*]] = load ptr, ptr [[TMP12]], align 8
-// CHECK1-NEXT:    [[TMP16:%.*]] = load i32, ptr [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !12
+// CHECK1-NEXT:    [[TMP16:%.*]] = load i32, ptr [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias [[META12]]
 // CHECK1-NEXT:    [[TMP17:%.*]] = call ptr @__kmpc_task_reduction_get_th_data(i32 [[TMP16]], ptr [[TMP15]], ptr [[TMP14]])
 // CHECK1-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP9]], i32 0, i32 2
 // CHECK1-NEXT:    [[TMP19:%.*]] = load ptr, ptr [[TMP18]], align 8
@@ -830,7 +830,7 @@ int main(int argc, char **argv) {
 // CHECK1-NEXT:    [[TMP30:%.*]] = sub i64 [[TMP28]], [[TMP29]]
 // CHECK1-NEXT:    [[TMP31:%.*]] = add nuw i64 [[TMP30]], 1
 // CHECK1-NEXT:    [[TMP32:%.*]] = mul nuw i64 [[TMP31]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64)
-// CHECK1-NEXT:    store i64 [[TMP31]], ptr @{{reduction_size[.].+[.]}}, align 8, !noalias !12
+// CHECK1-NEXT:    store i64 [[TMP31]], ptr @{{reduction_size[.].+[.]}}, align 8, !noalias [[META12]]
 // CHECK1-NEXT:    [[TMP33:%.*]] = load ptr, ptr [[TMP12]], align 8
 // CHECK1-NEXT:    [[TMP34:%.*]] = call ptr @__kmpc_task_reduction_get_th_data(i32 [[TMP16]], ptr [[TMP33]], ptr [[TMP20]])
 // CHECK1-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP9]], i32 0, i32 2
@@ -840,8 +840,8 @@ int main(int argc, char **argv) {
 // CHECK1-NEXT:    [[TMP39:%.*]] = ptrtoint ptr [[TMP20]] to i64
 // CHECK1-NEXT:    [[TMP40:%.*]] = sub i64 [[TMP38]], [[TMP39]]
 // CHECK1-NEXT:    [[TMP41:%.*]] = getelementptr i8, ptr [[TMP34]], i64 [[TMP40]]
-// CHECK1-NEXT:    store ptr [[TMP4_I]], ptr [[TMP_I]], align 8, !noalias !12
-// CHECK1-NEXT:    store ptr [[TMP41]], ptr [[TMP4_I]], align 8, !noalias !12
+// CHECK1-NEXT:    store ptr [[TMP4_I]], ptr [[TMP_I]], align 8, !noalias [[META12]]
+// CHECK1-NEXT:    store ptr [[TMP41]], ptr [[TMP4_I]], align 8, !noalias [[META12]]
 // CHECK1-NEXT:    ret i32 0
 //
 //
diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_schedule_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_schedule_codegen.cpp
index f71a5ca73437..3d031e09aa07 100644
--- a/clang/test/OpenMP/target_teams_distribute_parallel_for_schedule_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_schedule_codegen.cpp
@@ -596,7 +596,7 @@ int main (int argc, char **argv) {
 // CHECK1:       omp.inner.for.end:
 // CHECK1-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK1:       omp.loop.exit:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
 // CHECK1-NEXT:    ret void
 //
 //
@@ -747,7 +747,7 @@ int main (int argc, char **argv) {
 // CHECK1:       omp.inner.for.end:
 // CHECK1-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK1:       omp.loop.exit:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
 // CHECK1-NEXT:    ret void
 //
 //
@@ -919,7 +919,7 @@ int main (int argc, char **argv) {
 // CHECK1-NEXT:    store i32 [[ADD8]], ptr [[DOTOMP_UB]], align 4
 // CHECK1-NEXT:    br label [[OMP_DISPATCH_COND]]
 // CHECK1:       omp.dispatch.end:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
 // CHECK1-NEXT:    ret void
 //
 //
@@ -1618,7 +1618,7 @@ int main (int argc, char **argv) {
 // CHECK3:       omp.inner.for.end:
 // CHECK3-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK3:       omp.loop.exit:
-// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
 // CHECK3-NEXT:    ret void
 //
 //
@@ -1764,7 +1764,7 @@ int main (int argc, char **argv) {
 // CHECK3:       omp.inner.for.end:
 // CHECK3-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK3:       omp.loop.exit:
-// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
 // CHECK3-NEXT:    ret void
 //
 //
@@ -1929,7 +1929,7 @@ int main (int argc, char **argv) {
 // CHECK3-NEXT:    store i32 [[ADD5]], ptr [[DOTOMP_UB]], align 4
 // CHECK3-NEXT:    br label [[OMP_DISPATCH_COND]]
 // CHECK3:       omp.dispatch.end:
-// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
 // CHECK3-NEXT:    ret void
 //
 //
@@ -2623,7 +2623,7 @@ int main (int argc, char **argv) {
 // CHECK5:       omp.inner.for.end:
 // CHECK5-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK5:       omp.loop.exit:
-// CHECK5-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK5-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
 // CHECK5-NEXT:    ret void
 //
 //
@@ -2774,7 +2774,7 @@ int main (int argc, char **argv) {
 // CHECK5:       omp.inner.for.end:
 // CHECK5-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK5:       omp.loop.exit:
-// CHECK5-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK5-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
 // CHECK5-NEXT:    ret void
 //
 //
@@ -2946,7 +2946,7 @@ int main (int argc, char **argv) {
 // CHECK5-NEXT:    store i32 [[ADD8]], ptr [[DOTOMP_UB]], align 4
 // CHECK5-NEXT:    br label [[OMP_DISPATCH_COND]]
 // CHECK5:       omp.dispatch.end:
-// CHECK5-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK5-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
 // CHECK5-NEXT:    ret void
 //
 //
@@ -3645,7 +3645,7 @@ int main (int argc, char **argv) {
 // CHECK7:       omp.inner.for.end:
 // CHECK7-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK7:       omp.loop.exit:
-// CHECK7-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK7-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
 // CHECK7-NEXT:    ret void
 //
 //
@@ -3791,7 +3791,7 @@ int main (int argc, char **argv) {
 // CHECK7:       omp.inner.for.end:
 // CHECK7-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK7:       omp.loop.exit:
-// CHECK7-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK7-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
 // CHECK7-NEXT:    ret void
 //
 //
@@ -3956,7 +3956,7 @@ int main (int argc, char **argv) {
 // CHECK7-NEXT:    store i32 [[ADD5]], ptr [[DOTOMP_UB]], align 4
 // CHECK7-NEXT:    br label [[OMP_DISPATCH_COND]]
 // CHECK7:       omp.dispatch.end:
-// CHECK7-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK7-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
 // CHECK7-NEXT:    ret void
 //
 //
@@ -4915,7 +4915,7 @@ int main (int argc, char **argv) {
 // CHECK13:       omp.loop.exit:
 // CHECK13-NEXT:    [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK13-NEXT:    [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4
-// CHECK13-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]])
+// CHECK13-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP21]])
 // CHECK13-NEXT:    br label [[OMP_PRECOND_END]]
 // CHECK13:       omp.precond.end:
 // CHECK13-NEXT:    ret void
@@ -5132,7 +5132,7 @@ int main (int argc, char **argv) {
 // CHECK13:       omp.loop.exit:
 // CHECK13-NEXT:    [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK13-NEXT:    [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4
-// CHECK13-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]])
+// CHECK13-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP21]])
 // CHECK13-NEXT:    br label [[OMP_PRECOND_END]]
 // CHECK13:       omp.precond.end:
 // CHECK13-NEXT:    ret void
@@ -5388,7 +5388,7 @@ int main (int argc, char **argv) {
 // CHECK13:       omp.loop.exit:
 // CHECK13-NEXT:    [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK13-NEXT:    [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4
-// CHECK13-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]])
+// CHECK13-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP21]])
 // CHECK13-NEXT:    br label [[OMP_PRECOND_END]]
 // CHECK13:       omp.precond.end:
 // CHECK13-NEXT:    ret void
@@ -6248,7 +6248,7 @@ int main (int argc, char **argv) {
 // CHECK13:       omp.inner.for.end:
 // CHECK13-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK13:       omp.loop.exit:
-// CHECK13-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK13-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
 // CHECK13-NEXT:    ret void
 //
 //
@@ -6398,7 +6398,7 @@ int main (int argc, char **argv) {
 // CHECK13:       omp.inner.for.end:
 // CHECK13-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK13:       omp.loop.exit:
-// CHECK13-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK13-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
 // CHECK13-NEXT:    ret void
 //
 //
@@ -6584,7 +6584,7 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    store i32 [[ADD8]], ptr [[DOTOMP_UB]], align 4
 // CHECK13-NEXT:    br label [[OMP_DISPATCH_COND]]
 // CHECK13:       omp.dispatch.end:
-// CHECK13-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]])
+// CHECK13-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP5]])
 // CHECK13-NEXT:    ret void
 //
 //
@@ -7565,7 +7565,7 @@ int main (int argc, char **argv) {
 // CHECK15:       omp.loop.exit:
 // CHECK15-NEXT:    [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK15-NEXT:    [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4
-// CHECK15-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]])
+// CHECK15-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP21]])
 // CHECK15-NEXT:    br label [[OMP_PRECOND_END]]
 // CHECK15:       omp.precond.end:
 // CHECK15-NEXT:    ret void
@@ -7777,7 +7777,7 @@ int main (int argc, char **argv) {
 // CHECK15:       omp.loop.exit:
 // CHECK15-NEXT:    [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK15-NEXT:    [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4
-// CHECK15-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]])
+// CHECK15-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP21]])
 // CHECK15-NEXT:    br label [[OMP_PRECOND_END]]
 // CHECK15:       omp.precond.end:
 // CHECK15-NEXT:    ret void
@@ -8028,7 +8028,7 @@ int main (int argc, char **argv) {
 // CHECK15:       omp.loop.exit:
 // CHECK15-NEXT:    [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK15-NEXT:    [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4
-// CHECK15-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]])
+// CHECK15-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP21]])
 // CHECK15-NEXT:    br label [[OMP_PRECOND_END]]
 // CHECK15:       omp.precond.end:
 // CHECK15-NEXT:    ret void
@@ -8873,7 +8873,7 @@ int main (int argc, char **argv) {
 // CHECK15:       omp.inner.for.end:
 // CHECK15-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK15:       omp.loop.exit:
-// CHECK15-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK15-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
 // CHECK15-NEXT:    ret void
 //
 //
@@ -9018,7 +9018,7 @@ int main (int argc, char **argv) {
 // CHECK15:       omp.inner.for.end:
 // CHECK15-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK15:       omp.loop.exit:
-// CHECK15-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK15-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
 // CHECK15-NEXT:    ret void
 //
 //
@@ -9197,7 +9197,7 @@ int main (int argc, char **argv) {
 // CHECK15-NEXT:    store i32 [[ADD5]], ptr [[DOTOMP_UB]], align 4
 // CHECK15-NEXT:    br label [[OMP_DISPATCH_COND]]
 // CHECK15:       omp.dispatch.end:
-// CHECK15-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]])
+// CHECK15-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP5]])
 // CHECK15-NEXT:    ret void
 //
 //
@@ -10169,7 +10169,7 @@ int main (int argc, char **argv) {
 // CHECK17:       omp.loop.exit:
 // CHECK17-NEXT:    [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK17-NEXT:    [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4
-// CHECK17-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]])
+// CHECK17-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP21]])
 // CHECK17-NEXT:    br label [[OMP_PRECOND_END]]
 // CHECK17:       omp.precond.end:
 // CHECK17-NEXT:    ret void
@@ -10386,7 +10386,7 @@ int main (int argc, char **argv) {
 // CHECK17:       omp.loop.exit:
 // CHECK17-NEXT:    [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK17-NEXT:    [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4
-// CHECK17-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]])
+// CHECK17-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP21]])
 // CHECK17-NEXT:    br label [[OMP_PRECOND_END]]
 // CHECK17:       omp.precond.end:
 // CHECK17-NEXT:    ret void
@@ -10642,7 +10642,7 @@ int main (int argc, char **argv) {
 // CHECK17:       omp.loop.exit:
 // CHECK17-NEXT:    [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK17-NEXT:    [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4
-// CHECK17-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]])
+// CHECK17-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP21]])
 // CHECK17-NEXT:    br label [[OMP_PRECOND_END]]
 // CHECK17:       omp.precond.end:
 // CHECK17-NEXT:    ret void
@@ -11502,7 +11502,7 @@ int main (int argc, char **argv) {
 // CHECK17:       omp.inner.for.end:
 // CHECK17-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK17:       omp.loop.exit:
-// CHECK17-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK17-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
 // CHECK17-NEXT:    ret void
 //
 //
@@ -11652,7 +11652,7 @@ int main (int argc, char **argv) {
 // CHECK17:       omp.inner.for.end:
 // CHECK17-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK17:       omp.loop.exit:
-// CHECK17-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK17-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
 // CHECK17-NEXT:    ret void
 //
 //
@@ -11838,7 +11838,7 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    store i32 [[ADD8]], ptr [[DOTOMP_UB]], align 4
 // CHECK17-NEXT:    br label [[OMP_DISPATCH_COND]]
 // CHECK17:       omp.dispatch.end:
-// CHECK17-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]])
+// CHECK17-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP5]])
 // CHECK17-NEXT:    ret void
 //
 //
@@ -12819,7 +12819,7 @@ int main (int argc, char **argv) {
 // CHECK19:       omp.loop.exit:
 // CHECK19-NEXT:    [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK19-NEXT:    [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4
-// CHECK19-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]])
+// CHECK19-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP21]])
 // CHECK19-NEXT:    br label [[OMP_PRECOND_END]]
 // CHECK19:       omp.precond.end:
 // CHECK19-NEXT:    ret void
@@ -13031,7 +13031,7 @@ int main (int argc, char **argv) {
 // CHECK19:       omp.loop.exit:
 // CHECK19-NEXT:    [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK19-NEXT:    [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4
-// CHECK19-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]])
+// CHECK19-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP21]])
 // CHECK19-NEXT:    br label [[OMP_PRECOND_END]]
 // CHECK19:       omp.precond.end:
 // CHECK19-NEXT:    ret void
@@ -13282,7 +13282,7 @@ int main (int argc, char **argv) {
 // CHECK19:       omp.loop.exit:
 // CHECK19-NEXT:    [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK19-NEXT:    [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4
-// CHECK19-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]])
+// CHECK19-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP21]])
 // CHECK19-NEXT:    br label [[OMP_PRECOND_END]]
 // CHECK19:       omp.precond.end:
 // CHECK19-NEXT:    ret void
@@ -14127,7 +14127,7 @@ int main (int argc, char **argv) {
 // CHECK19:       omp.inner.for.end:
 // CHECK19-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK19:       omp.loop.exit:
-// CHECK19-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK19-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
 // CHECK19-NEXT:    ret void
 //
 //
@@ -14272,7 +14272,7 @@ int main (int argc, char **argv) {
 // CHECK19:       omp.inner.for.end:
 // CHECK19-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK19:       omp.loop.exit:
-// CHECK19-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK19-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
 // CHECK19-NEXT:    ret void
 //
 //
@@ -14451,7 +14451,7 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    store i32 [[ADD5]], ptr [[DOTOMP_UB]], align 4
 // CHECK19-NEXT:    br label [[OMP_DISPATCH_COND]]
 // CHECK19:       omp.dispatch.end:
-// CHECK19-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]])
+// CHECK19-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP5]])
 // CHECK19-NEXT:    ret void
 //
 //
diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_codegen.cpp
index 5294f0d65eb6..109063c623a1 100644
--- a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_codegen.cpp
@@ -535,7 +535,7 @@ void test_target_teams_atomic() {
 // CHECK1:       omp.loop.exit:
 // CHECK1-NEXT:    [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK1-NEXT:    [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP24]])
 // CHECK1-NEXT:    [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK1-NEXT:    [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0
 // CHECK1-NEXT:    br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -784,7 +784,7 @@ void test_target_teams_atomic() {
 // CHECK1:       omp.loop.exit:
 // CHECK1-NEXT:    [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK1-NEXT:    [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP22]])
 // CHECK1-NEXT:    [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK1-NEXT:    [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0
 // CHECK1-NEXT:    br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -1006,7 +1006,7 @@ void test_target_teams_atomic() {
 // CHECK1:       omp.inner.for.end:
 // CHECK1-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK1:       omp.loop.exit:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP4]])
 // CHECK1-NEXT:    [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK1-NEXT:    [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0
 // CHECK1-NEXT:    br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -1465,7 +1465,7 @@ void test_target_teams_atomic() {
 // CHECK3:       omp.loop.exit:
 // CHECK3-NEXT:    [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK3-NEXT:    [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4
-// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]])
+// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP24]])
 // CHECK3-NEXT:    [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK3-NEXT:    [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0
 // CHECK3-NEXT:    br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -1709,7 +1709,7 @@ void test_target_teams_atomic() {
 // CHECK3:       omp.loop.exit:
 // CHECK3-NEXT:    [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK3-NEXT:    [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4
-// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]])
+// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP22]])
 // CHECK3-NEXT:    [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK3-NEXT:    [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0
 // CHECK3-NEXT:    br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -1927,7 +1927,7 @@ void test_target_teams_atomic() {
 // CHECK3:       omp.inner.for.end:
 // CHECK3-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK3:       omp.loop.exit:
-// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP4]])
 // CHECK3-NEXT:    [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK3-NEXT:    [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0
 // CHECK3-NEXT:    br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -2574,7 +2574,7 @@ void test_target_teams_atomic() {
 // CHECK9:       omp.loop.exit:
 // CHECK9-NEXT:    [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK9-NEXT:    [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4
-// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]])
+// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP24]])
 // CHECK9-NEXT:    [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK9-NEXT:    [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0
 // CHECK9-NEXT:    br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -2825,7 +2825,7 @@ void test_target_teams_atomic() {
 // CHECK9:       omp.loop.exit:
 // CHECK9-NEXT:    [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK9-NEXT:    [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4
-// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]])
+// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP22]])
 // CHECK9-NEXT:    [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK9-NEXT:    [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0
 // CHECK9-NEXT:    br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -2995,7 +2995,7 @@ void test_target_teams_atomic() {
 // CHECK9:       omp.inner.for.end:
 // CHECK9-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK9:       omp.loop.exit:
-// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP4]])
 // CHECK9-NEXT:    [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK9-NEXT:    [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0
 // CHECK9-NEXT:    br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -3253,7 +3253,7 @@ void test_target_teams_atomic() {
 // CHECK11:       omp.loop.exit:
 // CHECK11-NEXT:    [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK11-NEXT:    [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4
-// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]])
+// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP24]])
 // CHECK11-NEXT:    [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK11-NEXT:    [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0
 // CHECK11-NEXT:    br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -3499,7 +3499,7 @@ void test_target_teams_atomic() {
 // CHECK11:       omp.loop.exit:
 // CHECK11-NEXT:    [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK11-NEXT:    [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4
-// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]])
+// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP22]])
 // CHECK11-NEXT:    [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK11-NEXT:    [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0
 // CHECK11-NEXT:    br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -3665,7 +3665,7 @@ void test_target_teams_atomic() {
 // CHECK11:       omp.inner.for.end:
 // CHECK11-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK11:       omp.loop.exit:
-// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP4]])
 // CHECK11-NEXT:    [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK11-NEXT:    [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0
 // CHECK11-NEXT:    br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_collapse_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_collapse_codegen.cpp
index 2b4d31d14524..306ec1db2ab2 100644
--- a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_collapse_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_collapse_codegen.cpp
@@ -339,7 +339,7 @@ int main (int argc, char **argv) {
 // CHECK1:       omp.inner.for.end:
 // CHECK1-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK1:       omp.loop.exit:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
 // CHECK1-NEXT:    [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK1-NEXT:    [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0
 // CHECK1-NEXT:    br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -585,7 +585,7 @@ int main (int argc, char **argv) {
 // CHECK3:       omp.inner.for.end:
 // CHECK3-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK3:       omp.loop.exit:
-// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
 // CHECK3-NEXT:    [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK3-NEXT:    [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0
 // CHECK3-NEXT:    br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -1190,7 +1190,7 @@ int main (int argc, char **argv) {
 // CHECK9:       omp.loop.exit:
 // CHECK9-NEXT:    [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK9-NEXT:    [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4
-// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]])
+// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP32]])
 // CHECK9-NEXT:    [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK9-NEXT:    [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0
 // CHECK9-NEXT:    br i1 [[TMP34]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -1440,7 +1440,7 @@ int main (int argc, char **argv) {
 // CHECK9:       omp.inner.for.end:
 // CHECK9-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK9:       omp.loop.exit:
-// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
 // CHECK9-NEXT:    [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK9-NEXT:    [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0
 // CHECK9-NEXT:    br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -1906,7 +1906,7 @@ int main (int argc, char **argv) {
 // CHECK11:       omp.loop.exit:
 // CHECK11-NEXT:    [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK11-NEXT:    [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4
-// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]])
+// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP32]])
 // CHECK11-NEXT:    [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK11-NEXT:    [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0
 // CHECK11-NEXT:    br i1 [[TMP34]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -2150,7 +2150,7 @@ int main (int argc, char **argv) {
 // CHECK11:       omp.inner.for.end:
 // CHECK11-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK11:       omp.loop.exit:
-// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
 // CHECK11-NEXT:    [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK11-NEXT:    [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0
 // CHECK11-NEXT:    br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_dist_schedule_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_dist_schedule_codegen.cpp
index 90fa290370b4..c265c1c616b1 100644
--- a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_dist_schedule_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_dist_schedule_codegen.cpp
@@ -449,7 +449,7 @@ int main (int argc, char **argv) {
 // CHECK1:       omp.inner.for.end:
 // CHECK1-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK1:       omp.loop.exit:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
 // CHECK1-NEXT:    [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK1-NEXT:    [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0
 // CHECK1-NEXT:    br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -614,7 +614,7 @@ int main (int argc, char **argv) {
 // CHECK1:       omp.inner.for.end:
 // CHECK1-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK1:       omp.loop.exit:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
 // CHECK1-NEXT:    [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK1-NEXT:    [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0
 // CHECK1-NEXT:    br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -799,7 +799,7 @@ int main (int argc, char **argv) {
 // CHECK1:       omp.inner.for.end:
 // CHECK1-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK1:       omp.loop.exit:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
 // CHECK1-NEXT:    [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK1-NEXT:    [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0
 // CHECK1-NEXT:    br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -1120,7 +1120,7 @@ int main (int argc, char **argv) {
 // CHECK3:       omp.inner.for.end:
 // CHECK3-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK3:       omp.loop.exit:
-// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
 // CHECK3-NEXT:    [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK3-NEXT:    [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0
 // CHECK3-NEXT:    br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -1280,7 +1280,7 @@ int main (int argc, char **argv) {
 // CHECK3:       omp.inner.for.end:
 // CHECK3-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK3:       omp.loop.exit:
-// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
 // CHECK3-NEXT:    [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK3-NEXT:    [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0
 // CHECK3-NEXT:    br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -1460,7 +1460,7 @@ int main (int argc, char **argv) {
 // CHECK3:       omp.inner.for.end:
 // CHECK3-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK3:       omp.loop.exit:
-// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
 // CHECK3-NEXT:    [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK3-NEXT:    [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0
 // CHECK3-NEXT:    br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -2224,7 +2224,7 @@ int main (int argc, char **argv) {
 // CHECK9:       omp.loop.exit:
 // CHECK9-NEXT:    [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK9-NEXT:    [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4
-// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]])
+// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP21]])
 // CHECK9-NEXT:    [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK9-NEXT:    [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0
 // CHECK9-NEXT:    br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -2465,7 +2465,7 @@ int main (int argc, char **argv) {
 // CHECK9:       omp.loop.exit:
 // CHECK9-NEXT:    [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK9-NEXT:    [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4
-// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]])
+// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP21]])
 // CHECK9-NEXT:    [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK9-NEXT:    [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0
 // CHECK9-NEXT:    br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -2745,7 +2745,7 @@ int main (int argc, char **argv) {
 // CHECK9:       omp.loop.exit:
 // CHECK9-NEXT:    [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK9-NEXT:    [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4
-// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]])
+// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP21]])
 // CHECK9-NEXT:    [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK9-NEXT:    [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0
 // CHECK9-NEXT:    br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -3078,7 +3078,7 @@ int main (int argc, char **argv) {
 // CHECK9:       omp.inner.for.end:
 // CHECK9-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK9:       omp.loop.exit:
-// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
 // CHECK9-NEXT:    [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK9-NEXT:    [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0
 // CHECK9-NEXT:    br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -3242,7 +3242,7 @@ int main (int argc, char **argv) {
 // CHECK9:       omp.inner.for.end:
 // CHECK9-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK9:       omp.loop.exit:
-// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
 // CHECK9-NEXT:    [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK9-NEXT:    [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0
 // CHECK9-NEXT:    br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -3441,7 +3441,7 @@ int main (int argc, char **argv) {
 // CHECK9:       omp.inner.for.end:
 // CHECK9-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK9:       omp.loop.exit:
-// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
 // CHECK9-NEXT:    [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK9-NEXT:    [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0
 // CHECK9-NEXT:    br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -3955,7 +3955,7 @@ int main (int argc, char **argv) {
 // CHECK11:       omp.loop.exit:
 // CHECK11-NEXT:    [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK11-NEXT:    [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4
-// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]])
+// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP21]])
 // CHECK11-NEXT:    [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK11-NEXT:    [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0
 // CHECK11-NEXT:    br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -4191,7 +4191,7 @@ int main (int argc, char **argv) {
 // CHECK11:       omp.loop.exit:
 // CHECK11-NEXT:    [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK11-NEXT:    [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4
-// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]])
+// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP21]])
 // CHECK11-NEXT:    [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK11-NEXT:    [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0
 // CHECK11-NEXT:    br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -4466,7 +4466,7 @@ int main (int argc, char **argv) {
 // CHECK11:       omp.loop.exit:
 // CHECK11-NEXT:    [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK11-NEXT:    [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4
-// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]])
+// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP21]])
 // CHECK11-NEXT:    [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK11-NEXT:    [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0
 // CHECK11-NEXT:    br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -4794,7 +4794,7 @@ int main (int argc, char **argv) {
 // CHECK11:       omp.inner.for.end:
 // CHECK11-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK11:       omp.loop.exit:
-// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
 // CHECK11-NEXT:    [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK11-NEXT:    [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0
 // CHECK11-NEXT:    br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -4953,7 +4953,7 @@ int main (int argc, char **argv) {
 // CHECK11:       omp.inner.for.end:
 // CHECK11-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK11:       omp.loop.exit:
-// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
 // CHECK11-NEXT:    [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK11-NEXT:    [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0
 // CHECK11-NEXT:    br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -5147,7 +5147,7 @@ int main (int argc, char **argv) {
 // CHECK11:       omp.inner.for.end:
 // CHECK11-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK11:       omp.loop.exit:
-// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
 // CHECK11-NEXT:    [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK11-NEXT:    [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0
 // CHECK11-NEXT:    br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_firstprivate_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_firstprivate_codegen.cpp
index bf79fe669d83..37c1f428ef9a 100644
--- a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_firstprivate_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_firstprivate_codegen.cpp
@@ -708,7 +708,7 @@ int main() {
 // CHECK1:       omp.loop.exit:
 // CHECK1-NEXT:    [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK1-NEXT:    [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP21]])
 // CHECK1-NEXT:    [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK1-NEXT:    [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0
 // CHECK1-NEXT:    br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -1189,7 +1189,7 @@ int main() {
 // CHECK1:       omp.loop.exit:
 // CHECK1-NEXT:    [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK1-NEXT:    [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP21]])
 // CHECK1-NEXT:    [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK1-NEXT:    [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0
 // CHECK1-NEXT:    br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -1792,7 +1792,7 @@ int main() {
 // CHECK3:       omp.loop.exit:
 // CHECK3-NEXT:    [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK3-NEXT:    [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4
-// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]])
+// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP21]])
 // CHECK3-NEXT:    [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK3-NEXT:    [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0
 // CHECK3-NEXT:    br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -2267,7 +2267,7 @@ int main() {
 // CHECK3:       omp.loop.exit:
 // CHECK3-NEXT:    [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK3-NEXT:    [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4
-// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]])
+// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP21]])
 // CHECK3-NEXT:    [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK3-NEXT:    [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0
 // CHECK3-NEXT:    br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -2692,7 +2692,7 @@ int main() {
 // CHECK5:       omp.inner.for.end:
 // CHECK5-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK5:       omp.loop.exit:
-// CHECK5-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK5-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK5-NEXT:    [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK5-NEXT:    [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0
 // CHECK5-NEXT:    br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -3798,7 +3798,7 @@ int main() {
 // CHECK13:       omp.loop.exit:
 // CHECK13-NEXT:    [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK13-NEXT:    [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4
-// CHECK13-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]])
+// CHECK13-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP21]])
 // CHECK13-NEXT:    [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK13-NEXT:    [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0
 // CHECK13-NEXT:    br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -4119,7 +4119,7 @@ int main() {
 // CHECK13:       omp.loop.exit:
 // CHECK13-NEXT:    [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK13-NEXT:    [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4
-// CHECK13-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]])
+// CHECK13-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP21]])
 // CHECK13-NEXT:    [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK13-NEXT:    [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0
 // CHECK13-NEXT:    br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -4540,7 +4540,7 @@ int main() {
 // CHECK15:       omp.loop.exit:
 // CHECK15-NEXT:    [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK15-NEXT:    [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4
-// CHECK15-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]])
+// CHECK15-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP21]])
 // CHECK15-NEXT:    [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK15-NEXT:    [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0
 // CHECK15-NEXT:    br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -4855,7 +4855,7 @@ int main() {
 // CHECK15:       omp.loop.exit:
 // CHECK15-NEXT:    [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK15-NEXT:    [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4
-// CHECK15-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]])
+// CHECK15-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP21]])
 // CHECK15-NEXT:    [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK15-NEXT:    [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0
 // CHECK15-NEXT:    br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -5175,7 +5175,7 @@ int main() {
 // CHECK17:       omp.inner.for.end:
 // CHECK17-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK17:       omp.loop.exit:
-// CHECK17-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK17-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK17-NEXT:    [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK17-NEXT:    [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0
 // CHECK17-NEXT:    br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_if_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_if_codegen.cpp
index 329cd788c8ba..df5dd7ba6805 100644
--- a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_if_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_if_codegen.cpp
@@ -353,7 +353,7 @@ int main() {
 // CHECK1:       omp.inner.for.end:
 // CHECK1-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK1:       omp.loop.exit:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK1-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK1-NEXT:    [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
 // CHECK1-NEXT:    br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -510,7 +510,7 @@ int main() {
 // CHECK1:       omp.inner.for.end:
 // CHECK1-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK1:       omp.loop.exit:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK1-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK1-NEXT:    [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
 // CHECK1-NEXT:    br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -778,7 +778,7 @@ int main() {
 // CHECK1:       omp.inner.for.end:
 // CHECK1-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK1:       omp.loop.exit:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK1-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK1-NEXT:    [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
 // CHECK1-NEXT:    br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -935,7 +935,7 @@ int main() {
 // CHECK1:       omp.inner.for.end:
 // CHECK1-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK1:       omp.loop.exit:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK1-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK1-NEXT:    [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
 // CHECK1-NEXT:    br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -1111,7 +1111,7 @@ int main() {
 // CHECK1:       omp.inner.for.end:
 // CHECK1-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK1:       omp.loop.exit:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK1-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK1-NEXT:    [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
 // CHECK1-NEXT:    br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -1368,7 +1368,7 @@ int main() {
 // CHECK1:       omp.inner.for.end:
 // CHECK1-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK1:       omp.loop.exit:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK1-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK1-NEXT:    [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
 // CHECK1-NEXT:    br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -1525,7 +1525,7 @@ int main() {
 // CHECK1:       omp.inner.for.end:
 // CHECK1-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK1:       omp.loop.exit:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK1-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK1-NEXT:    [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
 // CHECK1-NEXT:    br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -1701,7 +1701,7 @@ int main() {
 // CHECK1:       omp.inner.for.end:
 // CHECK1-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK1:       omp.loop.exit:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK1-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK1-NEXT:    [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
 // CHECK1-NEXT:    br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -1958,7 +1958,7 @@ int main() {
 // CHECK3:       omp.inner.for.end:
 // CHECK3-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK3:       omp.loop.exit:
-// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK3-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK3-NEXT:    [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
 // CHECK3-NEXT:    br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -2115,7 +2115,7 @@ int main() {
 // CHECK3:       omp.inner.for.end:
 // CHECK3-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK3:       omp.loop.exit:
-// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK3-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK3-NEXT:    [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
 // CHECK3-NEXT:    br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -2383,7 +2383,7 @@ int main() {
 // CHECK3:       omp.inner.for.end:
 // CHECK3-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK3:       omp.loop.exit:
-// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK3-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK3-NEXT:    [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
 // CHECK3-NEXT:    br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -2540,7 +2540,7 @@ int main() {
 // CHECK3:       omp.inner.for.end:
 // CHECK3-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK3:       omp.loop.exit:
-// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK3-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK3-NEXT:    [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
 // CHECK3-NEXT:    br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -2819,7 +2819,7 @@ int main() {
 // CHECK3:       omp.loop.exit:
 // CHECK3-NEXT:    [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK3-NEXT:    [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4
-// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]])
+// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP22]])
 // CHECK3-NEXT:    [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK3-NEXT:    [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0
 // CHECK3-NEXT:    br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -2946,7 +2946,7 @@ int main() {
 // CHECK3:       omp.loop.exit:
 // CHECK3-NEXT:    [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK3-NEXT:    [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4
-// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]])
+// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP22]])
 // CHECK3-NEXT:    [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK3-NEXT:    [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0
 // CHECK3-NEXT:    br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -3203,7 +3203,7 @@ int main() {
 // CHECK3:       omp.inner.for.end:
 // CHECK3-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK3:       omp.loop.exit:
-// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK3-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK3-NEXT:    [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
 // CHECK3-NEXT:    br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -3360,7 +3360,7 @@ int main() {
 // CHECK3:       omp.inner.for.end:
 // CHECK3-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK3:       omp.loop.exit:
-// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK3-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK3-NEXT:    [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
 // CHECK3-NEXT:    br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -3536,7 +3536,7 @@ int main() {
 // CHECK3:       omp.inner.for.end:
 // CHECK3-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK3:       omp.loop.exit:
-// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK3-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK3-NEXT:    [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
 // CHECK3-NEXT:    br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -4386,7 +4386,7 @@ int main() {
 // CHECK9:       omp.inner.for.end:
 // CHECK9-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK9:       omp.loop.exit:
-// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK9-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK9-NEXT:    [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
 // CHECK9-NEXT:    br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -4543,7 +4543,7 @@ int main() {
 // CHECK9:       omp.inner.for.end:
 // CHECK9-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK9:       omp.loop.exit:
-// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK9-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK9-NEXT:    [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
 // CHECK9-NEXT:    br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -4811,7 +4811,7 @@ int main() {
 // CHECK9:       omp.inner.for.end:
 // CHECK9-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK9:       omp.loop.exit:
-// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK9-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK9-NEXT:    [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
 // CHECK9-NEXT:    br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -4968,7 +4968,7 @@ int main() {
 // CHECK9:       omp.inner.for.end:
 // CHECK9-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK9:       omp.loop.exit:
-// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK9-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK9-NEXT:    [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
 // CHECK9-NEXT:    br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -5144,7 +5144,7 @@ int main() {
 // CHECK9:       omp.inner.for.end:
 // CHECK9-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK9:       omp.loop.exit:
-// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK9-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK9-NEXT:    [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
 // CHECK9-NEXT:    br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -5401,7 +5401,7 @@ int main() {
 // CHECK9:       omp.inner.for.end:
 // CHECK9-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK9:       omp.loop.exit:
-// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK9-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK9-NEXT:    [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
 // CHECK9-NEXT:    br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -5558,7 +5558,7 @@ int main() {
 // CHECK9:       omp.inner.for.end:
 // CHECK9-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK9:       omp.loop.exit:
-// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK9-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK9-NEXT:    [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
 // CHECK9-NEXT:    br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -5734,7 +5734,7 @@ int main() {
 // CHECK9:       omp.inner.for.end:
 // CHECK9-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK9:       omp.loop.exit:
-// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK9-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK9-NEXT:    [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
 // CHECK9-NEXT:    br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -5991,7 +5991,7 @@ int main() {
 // CHECK11:       omp.inner.for.end:
 // CHECK11-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK11:       omp.loop.exit:
-// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK11-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK11-NEXT:    [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
 // CHECK11-NEXT:    br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -6148,7 +6148,7 @@ int main() {
 // CHECK11:       omp.inner.for.end:
 // CHECK11-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK11:       omp.loop.exit:
-// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK11-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK11-NEXT:    [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
 // CHECK11-NEXT:    br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -6416,7 +6416,7 @@ int main() {
 // CHECK11:       omp.inner.for.end:
 // CHECK11-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK11:       omp.loop.exit:
-// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK11-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK11-NEXT:    [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
 // CHECK11-NEXT:    br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -6573,7 +6573,7 @@ int main() {
 // CHECK11:       omp.inner.for.end:
 // CHECK11-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK11:       omp.loop.exit:
-// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK11-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK11-NEXT:    [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
 // CHECK11-NEXT:    br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -6852,7 +6852,7 @@ int main() {
 // CHECK11:       omp.loop.exit:
 // CHECK11-NEXT:    [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK11-NEXT:    [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4
-// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]])
+// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP22]])
 // CHECK11-NEXT:    [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK11-NEXT:    [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0
 // CHECK11-NEXT:    br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -6979,7 +6979,7 @@ int main() {
 // CHECK11:       omp.loop.exit:
 // CHECK11-NEXT:    [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK11-NEXT:    [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4
-// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]])
+// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP22]])
 // CHECK11-NEXT:    [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK11-NEXT:    [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0
 // CHECK11-NEXT:    br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -7236,7 +7236,7 @@ int main() {
 // CHECK11:       omp.inner.for.end:
 // CHECK11-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK11:       omp.loop.exit:
-// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK11-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK11-NEXT:    [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
 // CHECK11-NEXT:    br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -7393,7 +7393,7 @@ int main() {
 // CHECK11:       omp.inner.for.end:
 // CHECK11-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK11:       omp.loop.exit:
-// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK11-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK11-NEXT:    [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
 // CHECK11-NEXT:    br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -7569,7 +7569,7 @@ int main() {
 // CHECK11:       omp.inner.for.end:
 // CHECK11-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK11:       omp.loop.exit:
-// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK11-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK11-NEXT:    [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
 // CHECK11-NEXT:    br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_lastprivate_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_lastprivate_codegen.cpp
index 903ecb865a25..3fdc0c482541 100644
--- a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_lastprivate_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_lastprivate_codegen.cpp
@@ -435,7 +435,7 @@ int main() {
 // CHECK1:       omp.inner.for.end:
 // CHECK1-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK1:       omp.loop.exit:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
 // CHECK1-NEXT:    [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK1-NEXT:    [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0
 // CHECK1-NEXT:    br i1 [[TMP19]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -713,7 +713,7 @@ int main() {
 // CHECK3:       omp.inner.for.end:
 // CHECK3-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK3:       omp.loop.exit:
-// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]])
+// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP6]])
 // CHECK3-NEXT:    [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK3-NEXT:    [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0
 // CHECK3-NEXT:    br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -1179,7 +1179,7 @@ int main() {
 // CHECK5:       omp.loop.exit:
 // CHECK5-NEXT:    [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK5-NEXT:    [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4
-// CHECK5-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP20]])
+// CHECK5-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP20]])
 // CHECK5-NEXT:    [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK5-NEXT:    [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0
 // CHECK5-NEXT:    br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -1677,7 +1677,7 @@ int main() {
 // CHECK5:       omp.loop.exit:
 // CHECK5-NEXT:    [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK5-NEXT:    [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4
-// CHECK5-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP20]])
+// CHECK5-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP20]])
 // CHECK5-NEXT:    [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK5-NEXT:    [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0
 // CHECK5-NEXT:    br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -2201,7 +2201,7 @@ int main() {
 // CHECK7:       omp.loop.exit:
 // CHECK7-NEXT:    [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK7-NEXT:    [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4
-// CHECK7-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP20]])
+// CHECK7-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP20]])
 // CHECK7-NEXT:    [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK7-NEXT:    [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0
 // CHECK7-NEXT:    br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -2693,7 +2693,7 @@ int main() {
 // CHECK7:       omp.loop.exit:
 // CHECK7-NEXT:    [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK7-NEXT:    [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4
-// CHECK7-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP20]])
+// CHECK7-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP20]])
 // CHECK7-NEXT:    [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK7-NEXT:    [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0
 // CHECK7-NEXT:    br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_private_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_private_codegen.cpp
index 7a211b4cd06b..bba97a750adc 100644
--- a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_private_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_private_codegen.cpp
@@ -541,7 +541,7 @@ int main() {
 // CHECK1:       omp.loop.exit:
 // CHECK1-NEXT:    [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK1-NEXT:    [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP17]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP17]])
 // CHECK1-NEXT:    [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK1-NEXT:    [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0
 // CHECK1-NEXT:    br i1 [[TMP19]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -863,7 +863,7 @@ int main() {
 // CHECK1:       omp.loop.exit:
 // CHECK1-NEXT:    [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK1-NEXT:    [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP16]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP16]])
 // CHECK1-NEXT:    [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK1-NEXT:    [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0
 // CHECK1-NEXT:    br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -1296,7 +1296,7 @@ int main() {
 // CHECK3:       omp.loop.exit:
 // CHECK3-NEXT:    [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK3-NEXT:    [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4
-// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP17]])
+// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP17]])
 // CHECK3-NEXT:    [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK3-NEXT:    [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0
 // CHECK3-NEXT:    br i1 [[TMP19]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -1612,7 +1612,7 @@ int main() {
 // CHECK3:       omp.loop.exit:
 // CHECK3-NEXT:    [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK3-NEXT:    [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4
-// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP16]])
+// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP16]])
 // CHECK3-NEXT:    [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK3-NEXT:    [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0
 // CHECK3-NEXT:    br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -1980,7 +1980,7 @@ int main() {
 // CHECK5:       omp.inner.for.end:
 // CHECK5-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK5:       omp.loop.exit:
-// CHECK5-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK5-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK5-NEXT:    [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK5-NEXT:    [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0
 // CHECK5-NEXT:    br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -3100,7 +3100,7 @@ int main() {
 // CHECK13:       omp.loop.exit:
 // CHECK13-NEXT:    [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK13-NEXT:    [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4
-// CHECK13-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP17]])
+// CHECK13-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP17]])
 // CHECK13-NEXT:    [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK13-NEXT:    [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0
 // CHECK13-NEXT:    br i1 [[TMP19]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -3351,7 +3351,7 @@ int main() {
 // CHECK13:       omp.loop.exit:
 // CHECK13-NEXT:    [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK13-NEXT:    [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4
-// CHECK13-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP16]])
+// CHECK13-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP16]])
 // CHECK13-NEXT:    [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK13-NEXT:    [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0
 // CHECK13-NEXT:    br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -3636,7 +3636,7 @@ int main() {
 // CHECK15:       omp.loop.exit:
 // CHECK15-NEXT:    [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK15-NEXT:    [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4
-// CHECK15-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP17]])
+// CHECK15-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP17]])
 // CHECK15-NEXT:    [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK15-NEXT:    [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0
 // CHECK15-NEXT:    br i1 [[TMP19]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -3881,7 +3881,7 @@ int main() {
 // CHECK15:       omp.loop.exit:
 // CHECK15-NEXT:    [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK15-NEXT:    [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4
-// CHECK15-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP16]])
+// CHECK15-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP16]])
 // CHECK15-NEXT:    [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK15-NEXT:    [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0
 // CHECK15-NEXT:    br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -4125,7 +4125,7 @@ int main() {
 // CHECK17:       omp.inner.for.end:
 // CHECK17-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK17:       omp.loop.exit:
-// CHECK17-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK17-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK17-NEXT:    [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK17-NEXT:    [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0
 // CHECK17-NEXT:    br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_proc_bind_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_proc_bind_codegen.cpp
index 31941b48f92d..c347743bd4fd 100644
--- a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_proc_bind_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_proc_bind_codegen.cpp
@@ -268,7 +268,7 @@ int main() {
 // CHECK1:       omp.inner.for.end:
 // CHECK1-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK1:       omp.loop.exit:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK1-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK1-NEXT:    [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
 // CHECK1-NEXT:    br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -420,7 +420,7 @@ int main() {
 // CHECK1:       omp.inner.for.end:
 // CHECK1-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK1:       omp.loop.exit:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK1-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK1-NEXT:    [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
 // CHECK1-NEXT:    br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -613,7 +613,7 @@ int main() {
 // CHECK1:       omp.inner.for.end:
 // CHECK1-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK1:       omp.loop.exit:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK1-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK1-NEXT:    [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
 // CHECK1-NEXT:    br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_reduction_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_reduction_codegen.cpp
index f255c3f084db..52430d51cde1 100644
--- a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_reduction_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_reduction_codegen.cpp
@@ -323,7 +323,7 @@ int main() {
 // CHECK1:       omp.inner.for.end:
 // CHECK1-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK1:       omp.loop.exit:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
 // CHECK1-NEXT:    [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK1-NEXT:    [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0
 // CHECK1-NEXT:    br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -627,7 +627,7 @@ int main() {
 // CHECK1:       omp.inner.for.end:
 // CHECK1-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK1:       omp.loop.exit:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
 // CHECK1-NEXT:    [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK1-NEXT:    [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0
 // CHECK1-NEXT:    br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -926,7 +926,7 @@ int main() {
 // CHECK3:       omp.inner.for.end:
 // CHECK3-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK3:       omp.loop.exit:
-// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
 // CHECK3-NEXT:    [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK3-NEXT:    [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0
 // CHECK3-NEXT:    br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -1226,7 +1226,7 @@ int main() {
 // CHECK3:       omp.inner.for.end:
 // CHECK3-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK3:       omp.loop.exit:
-// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
 // CHECK3-NEXT:    [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK3-NEXT:    [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0
 // CHECK3-NEXT:    br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -1488,7 +1488,7 @@ int main() {
 // CHECK5:       omp.inner.for.end:
 // CHECK5-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK5:       omp.loop.exit:
-// CHECK5-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK5-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
 // CHECK5-NEXT:    [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK5-NEXT:    [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0
 // CHECK5-NEXT:    br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_schedule_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_schedule_codegen.cpp
index b41571eb415d..f7c0666d58b6 100644
--- a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_schedule_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_schedule_codegen.cpp
@@ -603,7 +603,7 @@ int main (int argc, char **argv) {
 // CHECK1:       omp.inner.for.end:
 // CHECK1-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK1:       omp.loop.exit:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
 // CHECK1-NEXT:    [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK1-NEXT:    [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0
 // CHECK1-NEXT:    br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -768,7 +768,7 @@ int main (int argc, char **argv) {
 // CHECK1:       omp.inner.for.end:
 // CHECK1-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK1:       omp.loop.exit:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
 // CHECK1-NEXT:    [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK1-NEXT:    [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0
 // CHECK1-NEXT:    br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -954,7 +954,7 @@ int main (int argc, char **argv) {
 // CHECK1-NEXT:    store i32 [[ADD8]], ptr [[DOTOMP_UB]], align 4
 // CHECK1-NEXT:    br label [[OMP_DISPATCH_COND]]
 // CHECK1:       omp.dispatch.end:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
 // CHECK1-NEXT:    [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK1-NEXT:    [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0
 // CHECK1-NEXT:    br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -1695,7 +1695,7 @@ int main (int argc, char **argv) {
 // CHECK3:       omp.inner.for.end:
 // CHECK3-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK3:       omp.loop.exit:
-// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
 // CHECK3-NEXT:    [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK3-NEXT:    [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0
 // CHECK3-NEXT:    br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -1855,7 +1855,7 @@ int main (int argc, char **argv) {
 // CHECK3:       omp.inner.for.end:
 // CHECK3-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK3:       omp.loop.exit:
-// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
 // CHECK3-NEXT:    [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK3-NEXT:    [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0
 // CHECK3-NEXT:    br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -2034,7 +2034,7 @@ int main (int argc, char **argv) {
 // CHECK3-NEXT:    store i32 [[ADD5]], ptr [[DOTOMP_UB]], align 4
 // CHECK3-NEXT:    br label [[OMP_DISPATCH_COND]]
 // CHECK3:       omp.dispatch.end:
-// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
 // CHECK3-NEXT:    [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK3-NEXT:    [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0
 // CHECK3-NEXT:    br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -2770,7 +2770,7 @@ int main (int argc, char **argv) {
 // CHECK5:       omp.inner.for.end:
 // CHECK5-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK5:       omp.loop.exit:
-// CHECK5-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK5-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
 // CHECK5-NEXT:    [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK5-NEXT:    [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0
 // CHECK5-NEXT:    br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -2935,7 +2935,7 @@ int main (int argc, char **argv) {
 // CHECK5:       omp.inner.for.end:
 // CHECK5-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK5:       omp.loop.exit:
-// CHECK5-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK5-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
 // CHECK5-NEXT:    [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK5-NEXT:    [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0
 // CHECK5-NEXT:    br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -3121,7 +3121,7 @@ int main (int argc, char **argv) {
 // CHECK5-NEXT:    store i32 [[ADD8]], ptr [[DOTOMP_UB]], align 4
 // CHECK5-NEXT:    br label [[OMP_DISPATCH_COND]]
 // CHECK5:       omp.dispatch.end:
-// CHECK5-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK5-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
 // CHECK5-NEXT:    [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK5-NEXT:    [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0
 // CHECK5-NEXT:    br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -3862,7 +3862,7 @@ int main (int argc, char **argv) {
 // CHECK7:       omp.inner.for.end:
 // CHECK7-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK7:       omp.loop.exit:
-// CHECK7-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK7-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
 // CHECK7-NEXT:    [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK7-NEXT:    [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0
 // CHECK7-NEXT:    br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -4022,7 +4022,7 @@ int main (int argc, char **argv) {
 // CHECK7:       omp.inner.for.end:
 // CHECK7-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK7:       omp.loop.exit:
-// CHECK7-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK7-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
 // CHECK7-NEXT:    [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK7-NEXT:    [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0
 // CHECK7-NEXT:    br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -4201,7 +4201,7 @@ int main (int argc, char **argv) {
 // CHECK7-NEXT:    store i32 [[ADD5]], ptr [[DOTOMP_UB]], align 4
 // CHECK7-NEXT:    br label [[OMP_DISPATCH_COND]]
 // CHECK7:       omp.dispatch.end:
-// CHECK7-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK7-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
 // CHECK7-NEXT:    [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK7-NEXT:    [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0
 // CHECK7-NEXT:    br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -5592,7 +5592,7 @@ int main (int argc, char **argv) {
 // CHECK13:       omp.loop.exit:
 // CHECK13-NEXT:    [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK13-NEXT:    [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4
-// CHECK13-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]])
+// CHECK13-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP21]])
 // CHECK13-NEXT:    [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK13-NEXT:    [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0
 // CHECK13-NEXT:    br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -5833,7 +5833,7 @@ int main (int argc, char **argv) {
 // CHECK13:       omp.loop.exit:
 // CHECK13-NEXT:    [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK13-NEXT:    [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4
-// CHECK13-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]])
+// CHECK13-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP21]])
 // CHECK13-NEXT:    [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK13-NEXT:    [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0
 // CHECK13-NEXT:    br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -6113,7 +6113,7 @@ int main (int argc, char **argv) {
 // CHECK13:       omp.loop.exit:
 // CHECK13-NEXT:    [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK13-NEXT:    [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4
-// CHECK13-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]])
+// CHECK13-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP21]])
 // CHECK13-NEXT:    [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK13-NEXT:    [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0
 // CHECK13-NEXT:    br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -7040,7 +7040,7 @@ int main (int argc, char **argv) {
 // CHECK13:       omp.inner.for.end:
 // CHECK13-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK13:       omp.loop.exit:
-// CHECK13-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK13-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
 // CHECK13-NEXT:    [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK13-NEXT:    [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0
 // CHECK13-NEXT:    br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -7204,7 +7204,7 @@ int main (int argc, char **argv) {
 // CHECK13:       omp.inner.for.end:
 // CHECK13-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK13:       omp.loop.exit:
-// CHECK13-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK13-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
 // CHECK13-NEXT:    [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK13-NEXT:    [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0
 // CHECK13-NEXT:    br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -7404,7 +7404,7 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    store i32 [[ADD8]], ptr [[DOTOMP_UB]], align 4
 // CHECK13-NEXT:    br label [[OMP_DISPATCH_COND]]
 // CHECK13:       omp.dispatch.end:
-// CHECK13-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]])
+// CHECK13-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP5]])
 // CHECK13-NEXT:    [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK13-NEXT:    [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0
 // CHECK13-NEXT:    br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -8432,7 +8432,7 @@ int main (int argc, char **argv) {
 // CHECK15:       omp.loop.exit:
 // CHECK15-NEXT:    [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK15-NEXT:    [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4
-// CHECK15-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]])
+// CHECK15-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP21]])
 // CHECK15-NEXT:    [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK15-NEXT:    [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0
 // CHECK15-NEXT:    br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -8668,7 +8668,7 @@ int main (int argc, char **argv) {
 // CHECK15:       omp.loop.exit:
 // CHECK15-NEXT:    [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK15-NEXT:    [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4
-// CHECK15-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]])
+// CHECK15-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP21]])
 // CHECK15-NEXT:    [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK15-NEXT:    [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0
 // CHECK15-NEXT:    br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -8943,7 +8943,7 @@ int main (int argc, char **argv) {
 // CHECK15:       omp.loop.exit:
 // CHECK15-NEXT:    [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK15-NEXT:    [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4
-// CHECK15-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]])
+// CHECK15-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP21]])
 // CHECK15-NEXT:    [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK15-NEXT:    [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0
 // CHECK15-NEXT:    br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -9855,7 +9855,7 @@ int main (int argc, char **argv) {
 // CHECK15:       omp.inner.for.end:
 // CHECK15-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK15:       omp.loop.exit:
-// CHECK15-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK15-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
 // CHECK15-NEXT:    [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK15-NEXT:    [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0
 // CHECK15-NEXT:    br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -10014,7 +10014,7 @@ int main (int argc, char **argv) {
 // CHECK15:       omp.inner.for.end:
 // CHECK15-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK15:       omp.loop.exit:
-// CHECK15-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK15-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
 // CHECK15-NEXT:    [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK15-NEXT:    [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0
 // CHECK15-NEXT:    br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -10207,7 +10207,7 @@ int main (int argc, char **argv) {
 // CHECK15-NEXT:    store i32 [[ADD5]], ptr [[DOTOMP_UB]], align 4
 // CHECK15-NEXT:    br label [[OMP_DISPATCH_COND]]
 // CHECK15:       omp.dispatch.end:
-// CHECK15-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]])
+// CHECK15-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP5]])
 // CHECK15-NEXT:    [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK15-NEXT:    [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0
 // CHECK15-NEXT:    br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -11226,7 +11226,7 @@ int main (int argc, char **argv) {
 // CHECK17:       omp.loop.exit:
 // CHECK17-NEXT:    [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK17-NEXT:    [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4
-// CHECK17-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]])
+// CHECK17-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP21]])
 // CHECK17-NEXT:    [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK17-NEXT:    [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0
 // CHECK17-NEXT:    br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -11467,7 +11467,7 @@ int main (int argc, char **argv) {
 // CHECK17:       omp.loop.exit:
 // CHECK17-NEXT:    [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK17-NEXT:    [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4
-// CHECK17-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]])
+// CHECK17-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP21]])
 // CHECK17-NEXT:    [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK17-NEXT:    [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0
 // CHECK17-NEXT:    br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -11747,7 +11747,7 @@ int main (int argc, char **argv) {
 // CHECK17:       omp.loop.exit:
 // CHECK17-NEXT:    [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK17-NEXT:    [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4
-// CHECK17-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]])
+// CHECK17-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP21]])
 // CHECK17-NEXT:    [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK17-NEXT:    [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0
 // CHECK17-NEXT:    br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -12674,7 +12674,7 @@ int main (int argc, char **argv) {
 // CHECK17:       omp.inner.for.end:
 // CHECK17-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK17:       omp.loop.exit:
-// CHECK17-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK17-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
 // CHECK17-NEXT:    [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK17-NEXT:    [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0
 // CHECK17-NEXT:    br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -12838,7 +12838,7 @@ int main (int argc, char **argv) {
 // CHECK17:       omp.inner.for.end:
 // CHECK17-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK17:       omp.loop.exit:
-// CHECK17-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK17-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
 // CHECK17-NEXT:    [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK17-NEXT:    [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0
 // CHECK17-NEXT:    br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -13038,7 +13038,7 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    store i32 [[ADD8]], ptr [[DOTOMP_UB]], align 4
 // CHECK17-NEXT:    br label [[OMP_DISPATCH_COND]]
 // CHECK17:       omp.dispatch.end:
-// CHECK17-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]])
+// CHECK17-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP5]])
 // CHECK17-NEXT:    [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK17-NEXT:    [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0
 // CHECK17-NEXT:    br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -14066,7 +14066,7 @@ int main (int argc, char **argv) {
 // CHECK19:       omp.loop.exit:
 // CHECK19-NEXT:    [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK19-NEXT:    [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4
-// CHECK19-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]])
+// CHECK19-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP21]])
 // CHECK19-NEXT:    [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK19-NEXT:    [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0
 // CHECK19-NEXT:    br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -14302,7 +14302,7 @@ int main (int argc, char **argv) {
 // CHECK19:       omp.loop.exit:
 // CHECK19-NEXT:    [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK19-NEXT:    [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4
-// CHECK19-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]])
+// CHECK19-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP21]])
 // CHECK19-NEXT:    [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK19-NEXT:    [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0
 // CHECK19-NEXT:    br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -14577,7 +14577,7 @@ int main (int argc, char **argv) {
 // CHECK19:       omp.loop.exit:
 // CHECK19-NEXT:    [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK19-NEXT:    [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4
-// CHECK19-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]])
+// CHECK19-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP21]])
 // CHECK19-NEXT:    [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK19-NEXT:    [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0
 // CHECK19-NEXT:    br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -15489,7 +15489,7 @@ int main (int argc, char **argv) {
 // CHECK19:       omp.inner.for.end:
 // CHECK19-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK19:       omp.loop.exit:
-// CHECK19-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK19-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
 // CHECK19-NEXT:    [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK19-NEXT:    [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0
 // CHECK19-NEXT:    br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -15648,7 +15648,7 @@ int main (int argc, char **argv) {
 // CHECK19:       omp.inner.for.end:
 // CHECK19-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK19:       omp.loop.exit:
-// CHECK19-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK19-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
 // CHECK19-NEXT:    [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK19-NEXT:    [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0
 // CHECK19-NEXT:    br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -15841,7 +15841,7 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    store i32 [[ADD5]], ptr [[DOTOMP_UB]], align 4
 // CHECK19-NEXT:    br label [[OMP_DISPATCH_COND]]
 // CHECK19:       omp.dispatch.end:
-// CHECK19-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]])
+// CHECK19-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP5]])
 // CHECK19-NEXT:    [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK19-NEXT:    [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0
 // CHECK19-NEXT:    br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
diff --git a/clang/test/OpenMP/target_teams_generic_loop_codegen-1.cpp b/clang/test/OpenMP/target_teams_generic_loop_codegen-1.cpp
index 26c9e4713f3e..190ea17e9607 100644
--- a/clang/test/OpenMP/target_teams_generic_loop_codegen-1.cpp
+++ b/clang/test/OpenMP/target_teams_generic_loop_codegen-1.cpp
@@ -223,7 +223,7 @@ int target_teams_fun(int *g){
 // CHECK1:       omp.loop.exit:
 // CHECK1-NEXT:    [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK1-NEXT:    [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2:[0-9]+]], i32 [[TMP23]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP23]])
 // CHECK1-NEXT:    br label [[OMP_PRECOND_END]]
 // CHECK1:       omp.precond.end:
 // CHECK1-NEXT:    ret void
@@ -280,7 +280,7 @@ int target_teams_fun(int *g){
 // CHECK1-NEXT:    store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK1-NEXT:    [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK1-NEXT:    [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4
-// CHECK1-NEXT:    call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
+// CHECK1-NEXT:    call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
 // CHECK1-NEXT:    [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
 // CHECK1-NEXT:    [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
 // CHECK1-NEXT:    [[CMP5:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]]
@@ -437,7 +437,7 @@ int target_teams_fun(int *g){
 // CHECK1:       omp.loop.exit:
 // CHECK1-NEXT:    [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK1-NEXT:    [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP24]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]])
 // CHECK1-NEXT:    br label [[OMP_PRECOND_END]]
 // CHECK1:       omp.precond.end:
 // CHECK1-NEXT:    ret void
@@ -848,7 +848,7 @@ int target_teams_fun(int *g){
 // CHECK2:       omp.loop.exit:
 // CHECK2-NEXT:    [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK2-NEXT:    [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4
-// CHECK2-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2:[0-9]+]], i32 [[TMP23]])
+// CHECK2-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP23]])
 // CHECK2-NEXT:    br label [[OMP_PRECOND_END]]
 // CHECK2:       omp.precond.end:
 // CHECK2-NEXT:    ret void
@@ -905,7 +905,7 @@ int target_teams_fun(int *g){
 // CHECK2-NEXT:    store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK2-NEXT:    [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK2-NEXT:    [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4
-// CHECK2-NEXT:    call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
+// CHECK2-NEXT:    call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
 // CHECK2-NEXT:    [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
 // CHECK2-NEXT:    [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
 // CHECK2-NEXT:    [[CMP5:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]]
@@ -1062,7 +1062,7 @@ int target_teams_fun(int *g){
 // CHECK2:       omp.loop.exit:
 // CHECK2-NEXT:    [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK2-NEXT:    [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4
-// CHECK2-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP24]])
+// CHECK2-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]])
 // CHECK2-NEXT:    br label [[OMP_PRECOND_END]]
 // CHECK2:       omp.precond.end:
 // CHECK2-NEXT:    ret void
@@ -1471,7 +1471,7 @@ int target_teams_fun(int *g){
 // CHECK4:       omp.loop.exit:
 // CHECK4-NEXT:    [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK4-NEXT:    [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4
-// CHECK4-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2:[0-9]+]], i32 [[TMP21]])
+// CHECK4-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]])
 // CHECK4-NEXT:    br label [[OMP_PRECOND_END]]
 // CHECK4:       omp.precond.end:
 // CHECK4-NEXT:    ret void
@@ -1526,7 +1526,7 @@ int target_teams_fun(int *g){
 // CHECK4-NEXT:    store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK4-NEXT:    [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK4-NEXT:    [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4
-// CHECK4-NEXT:    call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
+// CHECK4-NEXT:    call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
 // CHECK4-NEXT:    [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
 // CHECK4-NEXT:    [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
 // CHECK4-NEXT:    [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]]
@@ -1680,7 +1680,7 @@ int target_teams_fun(int *g){
 // CHECK4:       omp.loop.exit:
 // CHECK4-NEXT:    [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK4-NEXT:    [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4
-// CHECK4-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP22]])
+// CHECK4-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]])
 // CHECK4-NEXT:    br label [[OMP_PRECOND_END]]
 // CHECK4:       omp.precond.end:
 // CHECK4-NEXT:    ret void
@@ -1900,7 +1900,7 @@ int target_teams_fun(int *g){
 // CHECK10:       omp.loop.exit:
 // CHECK10-NEXT:    [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK10-NEXT:    [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4
-// CHECK10-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2:[0-9]+]], i32 [[TMP23]])
+// CHECK10-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP23]])
 // CHECK10-NEXT:    br label [[OMP_PRECOND_END]]
 // CHECK10:       omp.precond.end:
 // CHECK10-NEXT:    ret void
@@ -1957,7 +1957,7 @@ int target_teams_fun(int *g){
 // CHECK10-NEXT:    store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK10-NEXT:    [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK10-NEXT:    [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4
-// CHECK10-NEXT:    call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
+// CHECK10-NEXT:    call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
 // CHECK10-NEXT:    [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
 // CHECK10-NEXT:    [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
 // CHECK10-NEXT:    [[CMP5:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]]
@@ -2116,7 +2116,7 @@ int target_teams_fun(int *g){
 // CHECK10:       omp.loop.exit:
 // CHECK10-NEXT:    [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK10-NEXT:    [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4
-// CHECK10-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP24]])
+// CHECK10-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]])
 // CHECK10-NEXT:    br label [[OMP_PRECOND_END]]
 // CHECK10:       omp.precond.end:
 // CHECK10-NEXT:    ret void
@@ -2337,7 +2337,7 @@ int target_teams_fun(int *g){
 // CHECK12:       omp.loop.exit:
 // CHECK12-NEXT:    [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK12-NEXT:    [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4
-// CHECK12-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2:[0-9]+]], i32 [[TMP21]])
+// CHECK12-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]])
 // CHECK12-NEXT:    br label [[OMP_PRECOND_END]]
 // CHECK12:       omp.precond.end:
 // CHECK12-NEXT:    ret void
@@ -2392,7 +2392,7 @@ int target_teams_fun(int *g){
 // CHECK12-NEXT:    store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK12-NEXT:    [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK12-NEXT:    [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4
-// CHECK12-NEXT:    call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
+// CHECK12-NEXT:    call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
 // CHECK12-NEXT:    [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
 // CHECK12-NEXT:    [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
 // CHECK12-NEXT:    [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]]
@@ -2548,7 +2548,7 @@ int target_teams_fun(int *g){
 // CHECK12:       omp.loop.exit:
 // CHECK12-NEXT:    [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK12-NEXT:    [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4
-// CHECK12-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP22]])
+// CHECK12-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]])
 // CHECK12-NEXT:    br label [[OMP_PRECOND_END]]
 // CHECK12:       omp.precond.end:
 // CHECK12-NEXT:    ret void
diff --git a/clang/test/OpenMP/target_teams_generic_loop_codegen.cpp b/clang/test/OpenMP/target_teams_generic_loop_codegen.cpp
index 3b1af7618794..22cf534bf0ba 100644
--- a/clang/test/OpenMP/target_teams_generic_loop_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_generic_loop_codegen.cpp
@@ -1312,7 +1312,7 @@ int foo() {
 // IR-GPU:       omp.loop.exit:
 // IR-GPU-NEXT:    [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
 // IR-GPU-NEXT:    [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4
-// IR-GPU-NEXT:    call void @__kmpc_for_static_fini(ptr addrspacecast (ptr addrspace(1) @[[GLOB3:[0-9]+]] to ptr), i32 [[TMP33]])
+// IR-GPU-NEXT:    call void @__kmpc_distribute_static_fini(ptr addrspacecast (ptr addrspace(1) @[[GLOB2]] to ptr), i32 [[TMP33]])
 // IR-GPU-NEXT:    [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
 // IR-GPU-NEXT:    [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0
 // IR-GPU-NEXT:    br i1 [[TMP35]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]]
@@ -1418,7 +1418,7 @@ int foo() {
 // IR-GPU:       omp.arrayinit.done:
 // IR-GPU-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
 // IR-GPU-NEXT:    [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4
-// IR-GPU-NEXT:    call void @__kmpc_for_static_init_4(ptr addrspacecast (ptr addrspace(1) @[[GLOB3]] to ptr), i32 [[TMP5]], i32 33, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1)
+// IR-GPU-NEXT:    call void @__kmpc_for_static_init_4(ptr addrspacecast (ptr addrspace(1) @[[GLOB3:[0-9]+]] to ptr), i32 [[TMP5]], i32 33, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1)
 // IR-GPU-NEXT:    [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
 // IR-GPU-NEXT:    store i32 [[TMP6]], ptr [[DOTOMP_IV_ASCAST]], align 4
 // IR-GPU-NEXT:    br label [[OMP_INNER_FOR_COND:%.*]]
@@ -2001,7 +2001,7 @@ int foo() {
 // IR:       omp.loop.exit:
 // IR-NEXT:    [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // IR-NEXT:    [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4
-// IR-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2:[0-9]+]], i32 [[TMP18]])
+// IR-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP18]])
 // IR-NEXT:    [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // IR-NEXT:    [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0
 // IR-NEXT:    br i1 [[TMP20]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]]
@@ -2109,7 +2109,7 @@ int foo() {
 // IR:       omp.arrayinit.done:
 // IR-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // IR-NEXT:    [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4
-// IR-NEXT:    call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP5]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
+// IR-NEXT:    call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP5]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
 // IR-NEXT:    [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
 // IR-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 99
 // IR-NEXT:    br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
@@ -2398,7 +2398,7 @@ int foo() {
 // IR-PCH:       omp.loop.exit:
 // IR-PCH-NEXT:    [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // IR-PCH-NEXT:    [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4
-// IR-PCH-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2:[0-9]+]], i32 [[TMP18]])
+// IR-PCH-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP18]])
 // IR-PCH-NEXT:    [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // IR-PCH-NEXT:    [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0
 // IR-PCH-NEXT:    br i1 [[TMP20]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]]
@@ -2506,7 +2506,7 @@ int foo() {
 // IR-PCH:       omp.arrayinit.done:
 // IR-PCH-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // IR-PCH-NEXT:    [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4
-// IR-PCH-NEXT:    call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP5]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
+// IR-PCH-NEXT:    call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP5]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
 // IR-PCH-NEXT:    [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
 // IR-PCH-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 99
 // IR-PCH-NEXT:    br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
diff --git a/clang/test/OpenMP/target_teams_generic_loop_collapse_codegen.cpp b/clang/test/OpenMP/target_teams_generic_loop_collapse_codegen.cpp
index 6a30ef7f6eb8..82ad43373327 100644
--- a/clang/test/OpenMP/target_teams_generic_loop_collapse_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_generic_loop_collapse_codegen.cpp
@@ -239,7 +239,7 @@ int main (int argc, char **argv) {
 // CHECK1:       omp.inner.for.end:
 // CHECK1-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK1:       omp.loop.exit:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2:[0-9]+]], i32 [[TMP2]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]])
 // CHECK1-NEXT:    ret void
 //
 //
@@ -278,7 +278,7 @@ int main (int argc, char **argv) {
 // CHECK1-NEXT:    store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK1-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK1-NEXT:    [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4
-// CHECK1-NEXT:    call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
+// CHECK1-NEXT:    call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
 // CHECK1-NEXT:    [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
 // CHECK1-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 56087
 // CHECK1-NEXT:    br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
@@ -473,7 +473,7 @@ int main (int argc, char **argv) {
 // CHECK3:       omp.inner.for.end:
 // CHECK3-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK3:       omp.loop.exit:
-// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2:[0-9]+]], i32 [[TMP2]])
+// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]])
 // CHECK3-NEXT:    ret void
 //
 //
@@ -510,7 +510,7 @@ int main (int argc, char **argv) {
 // CHECK3-NEXT:    store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK3-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK3-NEXT:    [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4
-// CHECK3-NEXT:    call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
+// CHECK3-NEXT:    call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
 // CHECK3-NEXT:    [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
 // CHECK3-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 56087
 // CHECK3-NEXT:    br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
@@ -846,7 +846,7 @@ int main (int argc, char **argv) {
 // CHECK9:       omp.loop.exit:
 // CHECK9-NEXT:    [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK9-NEXT:    [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4
-// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2:[0-9]+]], i32 [[TMP28]])
+// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP28]])
 // CHECK9-NEXT:    br label [[OMP_PRECOND_END]]
 // CHECK9:       omp.precond.end:
 // CHECK9-NEXT:    ret void
@@ -926,7 +926,7 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK9-NEXT:    [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK9-NEXT:    [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4
-// CHECK9-NEXT:    call void @__kmpc_for_static_init_8(ptr @[[GLOB2]], i32 [[TMP13]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1)
+// CHECK9-NEXT:    call void @__kmpc_for_static_init_8(ptr @[[GLOB2:[0-9]+]], i32 [[TMP13]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1)
 // CHECK9-NEXT:    [[TMP14:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8
 // CHECK9-NEXT:    [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8
 // CHECK9-NEXT:    [[CMP13:%.*]] = icmp sgt i64 [[TMP14]], [[TMP15]]
@@ -1133,7 +1133,7 @@ int main (int argc, char **argv) {
 // CHECK9:       omp.inner.for.end:
 // CHECK9-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK9:       omp.loop.exit:
-// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP2]])
+// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]])
 // CHECK9-NEXT:    ret void
 //
 //
@@ -1510,7 +1510,7 @@ int main (int argc, char **argv) {
 // CHECK11:       omp.loop.exit:
 // CHECK11-NEXT:    [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK11-NEXT:    [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4
-// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2:[0-9]+]], i32 [[TMP30]])
+// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]])
 // CHECK11-NEXT:    br label [[OMP_PRECOND_END]]
 // CHECK11:       omp.precond.end:
 // CHECK11-NEXT:    ret void
@@ -1592,7 +1592,7 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK11-NEXT:    [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK11-NEXT:    [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4
-// CHECK11-NEXT:    call void @__kmpc_for_static_init_8(ptr @[[GLOB2]], i32 [[TMP13]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1)
+// CHECK11-NEXT:    call void @__kmpc_for_static_init_8(ptr @[[GLOB2:[0-9]+]], i32 [[TMP13]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1)
 // CHECK11-NEXT:    [[TMP14:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8
 // CHECK11-NEXT:    [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8
 // CHECK11-NEXT:    [[CMP15:%.*]] = icmp sgt i64 [[TMP14]], [[TMP15]]
@@ -1795,7 +1795,7 @@ int main (int argc, char **argv) {
 // CHECK11:       omp.inner.for.end:
 // CHECK11-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK11:       omp.loop.exit:
-// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP2]])
+// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]])
 // CHECK11-NEXT:    ret void
 //
 //
diff --git a/clang/test/OpenMP/target_teams_generic_loop_if_codegen.cpp b/clang/test/OpenMP/target_teams_generic_loop_if_codegen.cpp
index c19323c35b4e..b2ff4c20db7a 100644
--- a/clang/test/OpenMP/target_teams_generic_loop_if_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_generic_loop_if_codegen.cpp
@@ -210,7 +210,7 @@ int main() {
 // CHECK1:       omp.inner.for.end:
 // CHECK1-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK1:       omp.loop.exit:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2:[0-9]+]], i32 [[TMP1]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]])
 // CHECK1-NEXT:    ret void
 //
 //
@@ -244,7 +244,7 @@ int main() {
 // CHECK1-NEXT:    store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK1-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK1-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
-// CHECK1-NEXT:    call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
+// CHECK1-NEXT:    call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
 // CHECK1-NEXT:    [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
 // CHECK1-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99
 // CHECK1-NEXT:    br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
@@ -347,7 +347,7 @@ int main() {
 // CHECK1:       omp.inner.for.end:
 // CHECK1-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK1:       omp.loop.exit:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP1]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]])
 // CHECK1-NEXT:    ret void
 //
 //
@@ -601,7 +601,7 @@ int main() {
 // CHECK1:       omp.inner.for.end:
 // CHECK1-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK1:       omp.loop.exit:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP1]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]])
 // CHECK1-NEXT:    ret void
 //
 //
@@ -744,7 +744,7 @@ int main() {
 // CHECK1:       omp.inner.for.end:
 // CHECK1-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK1:       omp.loop.exit:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP1]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]])
 // CHECK1-NEXT:    ret void
 //
 //
@@ -906,7 +906,7 @@ int main() {
 // CHECK1:       omp.inner.for.end:
 // CHECK1-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK1:       omp.loop.exit:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP1]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]])
 // CHECK1-NEXT:    ret void
 //
 //
@@ -1132,7 +1132,7 @@ int main() {
 // CHECK1:       omp.inner.for.end:
 // CHECK1-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK1:       omp.loop.exit:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP1]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]])
 // CHECK1-NEXT:    ret void
 //
 //
@@ -1275,7 +1275,7 @@ int main() {
 // CHECK1:       omp.inner.for.end:
 // CHECK1-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK1:       omp.loop.exit:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP1]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]])
 // CHECK1-NEXT:    ret void
 //
 //
@@ -1413,7 +1413,7 @@ int main() {
 // CHECK1:       omp.inner.for.end:
 // CHECK1-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK1:       omp.loop.exit:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP1]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]])
 // CHECK1-NEXT:    ret void
 //
 //
diff --git a/clang/test/OpenMP/target_teams_generic_loop_order_codegen.cpp b/clang/test/OpenMP/target_teams_generic_loop_order_codegen.cpp
index 195989692dc3..85f6a85a11bd 100644
--- a/clang/test/OpenMP/target_teams_generic_loop_order_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_generic_loop_order_codegen.cpp
@@ -125,7 +125,7 @@ void gtid_test() {
 // CHECK1:       omp.inner.for.end:
 // CHECK1-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK1:       omp.loop.exit:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2:[0-9]+]], i32 [[TMP1]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]])
 // CHECK1-NEXT:    ret void
 //
 //
@@ -159,7 +159,7 @@ void gtid_test() {
 // CHECK1-NEXT:    store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK1-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK1-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
-// CHECK1-NEXT:    call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
+// CHECK1-NEXT:    call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
 // CHECK1-NEXT:    [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
 // CHECK1-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99
 // CHECK1-NEXT:    br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
diff --git a/clang/test/OpenMP/target_teams_generic_loop_private_codegen.cpp b/clang/test/OpenMP/target_teams_generic_loop_private_codegen.cpp
index 987c12adc6f6..7503b69b92aa 100644
--- a/clang/test/OpenMP/target_teams_generic_loop_private_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_generic_loop_private_codegen.cpp
@@ -420,7 +420,7 @@ int main() {
 // CHECK1:       omp.loop.exit:
 // CHECK1-NEXT:    [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK1-NEXT:    [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2:[0-9]+]], i32 [[TMP14]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP14]])
 // CHECK1-NEXT:    call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]]
 // CHECK1-NEXT:    [[ARRAY_BEGIN2:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN2]], i64 2
@@ -481,7 +481,7 @@ int main() {
 // CHECK1-NEXT:    call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]])
 // CHECK1-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK1-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
-// CHECK1-NEXT:    call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
+// CHECK1-NEXT:    call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
 // CHECK1-NEXT:    [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
 // CHECK1-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1
 // CHECK1-NEXT:    br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
@@ -728,7 +728,7 @@ int main() {
 // CHECK1:       omp.loop.exit:
 // CHECK1-NEXT:    [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK1-NEXT:    [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP14]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP14]])
 // CHECK1-NEXT:    call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]]
 // CHECK1-NEXT:    [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN4]], i64 2
@@ -1151,7 +1151,7 @@ int main() {
 // CHECK3:       omp.loop.exit:
 // CHECK3-NEXT:    [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK3-NEXT:    [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4
-// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2:[0-9]+]], i32 [[TMP12]])
+// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP12]])
 // CHECK3-NEXT:    call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]]
 // CHECK3-NEXT:    [[ARRAY_BEGIN2:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN2]], i32 2
@@ -1210,7 +1210,7 @@ int main() {
 // CHECK3-NEXT:    call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]])
 // CHECK3-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK3-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
-// CHECK3-NEXT:    call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
+// CHECK3-NEXT:    call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
 // CHECK3-NEXT:    [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
 // CHECK3-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1
 // CHECK3-NEXT:    br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
@@ -1453,7 +1453,7 @@ int main() {
 // CHECK3:       omp.loop.exit:
 // CHECK3-NEXT:    [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK3-NEXT:    [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4
-// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP12]])
+// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP12]])
 // CHECK3-NEXT:    call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]]
 // CHECK3-NEXT:    [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN4]], i32 2
@@ -1827,7 +1827,7 @@ int main() {
 // CHECK5:       omp.inner.for.end:
 // CHECK5-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK5:       omp.loop.exit:
-// CHECK5-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2:[0-9]+]], i32 [[TMP1]])
+// CHECK5-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]])
 // CHECK5-NEXT:    ret void
 //
 //
@@ -1869,7 +1869,7 @@ int main() {
 // CHECK5-NEXT:    store ptr [[G1]], ptr [[_TMP3]], align 8
 // CHECK5-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK5-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
-// CHECK5-NEXT:    call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
+// CHECK5-NEXT:    call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
 // CHECK5-NEXT:    [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
 // CHECK5-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1
 // CHECK5-NEXT:    br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
@@ -2015,7 +2015,7 @@ int main() {
 // CHECK13:       omp.loop.exit:
 // CHECK13-NEXT:    [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK13-NEXT:    [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4
-// CHECK13-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2:[0-9]+]], i32 [[TMP14]])
+// CHECK13-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP14]])
 // CHECK13-NEXT:    call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5:[0-9]+]]
 // CHECK13-NEXT:    [[ARRAY_BEGIN2:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0
 // CHECK13-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN2]], i64 2
@@ -2086,7 +2086,7 @@ int main() {
 // CHECK13-NEXT:    call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]]
 // CHECK13-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK13-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
-// CHECK13-NEXT:    call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
+// CHECK13-NEXT:    call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
 // CHECK13-NEXT:    [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
 // CHECK13-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1
 // CHECK13-NEXT:    br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
@@ -2252,7 +2252,7 @@ int main() {
 // CHECK13:       omp.loop.exit:
 // CHECK13-NEXT:    [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK13-NEXT:    [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4
-// CHECK13-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP14]])
+// CHECK13-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP14]])
 // CHECK13-NEXT:    call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]]
 // CHECK13-NEXT:    [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0
 // CHECK13-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN4]], i64 2
@@ -2527,7 +2527,7 @@ int main() {
 // CHECK15:       omp.loop.exit:
 // CHECK15-NEXT:    [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK15-NEXT:    [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4
-// CHECK15-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2:[0-9]+]], i32 [[TMP12]])
+// CHECK15-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP12]])
 // CHECK15-NEXT:    call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5:[0-9]+]]
 // CHECK15-NEXT:    [[ARRAY_BEGIN2:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0
 // CHECK15-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN2]], i32 2
@@ -2596,7 +2596,7 @@ int main() {
 // CHECK15-NEXT:    call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]]
 // CHECK15-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK15-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
-// CHECK15-NEXT:    call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
+// CHECK15-NEXT:    call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
 // CHECK15-NEXT:    [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
 // CHECK15-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1
 // CHECK15-NEXT:    br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
@@ -2758,7 +2758,7 @@ int main() {
 // CHECK15:       omp.loop.exit:
 // CHECK15-NEXT:    [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK15-NEXT:    [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4
-// CHECK15-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP12]])
+// CHECK15-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP12]])
 // CHECK15-NEXT:    call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]]
 // CHECK15-NEXT:    [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0
 // CHECK15-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN4]], i32 2
@@ -3018,7 +3018,7 @@ int main() {
 // CHECK17:       omp.inner.for.end:
 // CHECK17-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK17:       omp.loop.exit:
-// CHECK17-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2:[0-9]+]], i32 [[TMP1]])
+// CHECK17-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]])
 // CHECK17-NEXT:    ret void
 //
 //
@@ -3060,7 +3060,7 @@ int main() {
 // CHECK17-NEXT:    store ptr [[G1]], ptr [[_TMP3]], align 8
 // CHECK17-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK17-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
-// CHECK17-NEXT:    call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
+// CHECK17-NEXT:    call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
 // CHECK17-NEXT:    [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
 // CHECK17-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1
 // CHECK17-NEXT:    br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
diff --git a/clang/test/OpenMP/target_teams_generic_loop_reduction_codegen.cpp b/clang/test/OpenMP/target_teams_generic_loop_reduction_codegen.cpp
index bfa00ee7a0f4..1036ffd195de 100644
--- a/clang/test/OpenMP/target_teams_generic_loop_reduction_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_generic_loop_reduction_codegen.cpp
@@ -316,7 +316,7 @@ int main() {
 // CHECK1:       omp.inner.for.end:
 // CHECK1-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK1:       omp.loop.exit:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
 // CHECK1-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
 // CHECK1-NEXT:    store ptr [[SIVAR2]], ptr [[TMP14]], align 8
 // CHECK1-NEXT:    [[TMP15:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l66.omp_outlined.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var)
@@ -606,7 +606,7 @@ int main() {
 // CHECK1:       omp.inner.for.end:
 // CHECK1-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK1:       omp.loop.exit:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
 // CHECK1-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
 // CHECK1-NEXT:    store ptr [[T_VAR2]], ptr [[TMP14]], align 8
 // CHECK1-NEXT:    [[TMP15:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.omp_outlined.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var)
@@ -891,7 +891,7 @@ int main() {
 // CHECK3:       omp.inner.for.end:
 // CHECK3-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK3:       omp.loop.exit:
-// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
 // CHECK3-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0
 // CHECK3-NEXT:    store ptr [[SIVAR1]], ptr [[TMP14]], align 4
 // CHECK3-NEXT:    [[TMP15:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l66.omp_outlined.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var)
@@ -1177,7 +1177,7 @@ int main() {
 // CHECK3:       omp.inner.for.end:
 // CHECK3-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK3:       omp.loop.exit:
-// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
 // CHECK3-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0
 // CHECK3-NEXT:    store ptr [[T_VAR1]], ptr [[TMP14]], align 4
 // CHECK3-NEXT:    [[TMP15:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.omp_outlined.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var)
@@ -1425,7 +1425,7 @@ int main() {
 // CHECK5:       omp.inner.for.end:
 // CHECK5-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK5:       omp.loop.exit:
-// CHECK5-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK5-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
 // CHECK5-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
 // CHECK5-NEXT:    store ptr [[SIVAR2]], ptr [[TMP15]], align 8
 // CHECK5-NEXT:    [[TMP16:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l44.omp_outlined.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var)
diff --git a/clang/test/OpenMP/target_teams_generic_loop_uses_allocators_codegen.cpp b/clang/test/OpenMP/target_teams_generic_loop_uses_allocators_codegen.cpp
index f945dc9b21d4..0dc2c95641e2 100644
--- a/clang/test/OpenMP/target_teams_generic_loop_uses_allocators_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_generic_loop_uses_allocators_codegen.cpp
@@ -400,7 +400,7 @@ void foo() {
 // CHECK:       omp.inner.for.end:
 // CHECK-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK:       omp.loop.exit:
-// CHECK-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB3:[0-9]+]], i32 [[TMP1]])
+// CHECK-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP1]])
 // CHECK-NEXT:    ret void
 //
 //
@@ -434,7 +434,7 @@ void foo() {
 // CHECK-NEXT:    store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
-// CHECK-NEXT:    call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
+// CHECK-NEXT:    call void @__kmpc_for_static_init_4(ptr @[[GLOB3:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
 // CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
 // CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 9
 // CHECK-NEXT:    br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_codegen.cpp
index 6dcfa4f6f2ab..a13b565cb38d 100644
--- a/clang/test/OpenMP/teams_distribute_parallel_for_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_parallel_for_codegen.cpp
@@ -562,12 +562,12 @@ int main (int argc, char **argv) {
 // CHECK1:       omp.loop.exit:
 // CHECK1-NEXT:    [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK1-NEXT:    [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP25]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP25]])
 // CHECK1-NEXT:    br label [[OMP_PRECOND_END]]
 // CHECK1:       cancel.exit:
 // CHECK1-NEXT:    [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK1-NEXT:    [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP27]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP27]])
 // CHECK1-NEXT:    br label [[CANCEL_CONT:%.*]]
 // CHECK1:       omp.precond.end:
 // CHECK1-NEXT:    br label [[CANCEL_CONT]]
@@ -771,7 +771,7 @@ int main (int argc, char **argv) {
 // CHECK1:       omp.loop.exit:
 // CHECK1-NEXT:    [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK1-NEXT:    [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP21]])
 // CHECK1-NEXT:    br label [[OMP_PRECOND_END]]
 // CHECK1:       omp.precond.end:
 // CHECK1-NEXT:    ret void
@@ -1159,12 +1159,12 @@ int main (int argc, char **argv) {
 // CHECK3:       omp.loop.exit:
 // CHECK3-NEXT:    [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK3-NEXT:    [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4
-// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP25]])
+// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP25]])
 // CHECK3-NEXT:    br label [[OMP_PRECOND_END]]
 // CHECK3:       cancel.exit:
 // CHECK3-NEXT:    [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK3-NEXT:    [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4
-// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP27]])
+// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP27]])
 // CHECK3-NEXT:    br label [[CANCEL_CONT:%.*]]
 // CHECK3:       omp.precond.end:
 // CHECK3-NEXT:    br label [[CANCEL_CONT]]
@@ -1363,7 +1363,7 @@ int main (int argc, char **argv) {
 // CHECK3:       omp.loop.exit:
 // CHECK3-NEXT:    [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK3-NEXT:    [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4
-// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]])
+// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP21]])
 // CHECK3-NEXT:    br label [[OMP_PRECOND_END]]
 // CHECK3:       omp.precond.end:
 // CHECK3-NEXT:    ret void
@@ -1674,7 +1674,7 @@ int main (int argc, char **argv) {
 // CHECK9:       omp.loop.exit:
 // CHECK9-NEXT:    [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK9-NEXT:    [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4
-// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]])
+// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP22]])
 // CHECK9-NEXT:    br label [[OMP_PRECOND_END]]
 // CHECK9:       omp.precond.end:
 // CHECK9-NEXT:    ret void
@@ -1980,7 +1980,7 @@ int main (int argc, char **argv) {
 // CHECK11:       omp.loop.exit:
 // CHECK11-NEXT:    [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK11-NEXT:    [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4
-// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]])
+// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP22]])
 // CHECK11-NEXT:    br label [[OMP_PRECOND_END]]
 // CHECK11:       omp.precond.end:
 // CHECK11-NEXT:    ret void
@@ -2200,7 +2200,7 @@ int main (int argc, char **argv) {
 // CHECK17:       omp.inner.for.end:
 // CHECK17-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK17:       omp.loop.exit:
-// CHECK17-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK17-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
 // CHECK17-NEXT:    ret void
 //
 //
@@ -2413,7 +2413,7 @@ int main (int argc, char **argv) {
 // CHECK19:       omp.inner.for.end:
 // CHECK19-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK19:       omp.loop.exit:
-// CHECK19-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK19-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
 // CHECK19-NEXT:    ret void
 //
 //
@@ -2730,7 +2730,7 @@ int main (int argc, char **argv) {
 // CHECK25:       omp.loop.exit:
 // CHECK25-NEXT:    [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK25-NEXT:    [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4
-// CHECK25-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]])
+// CHECK25-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP22]])
 // CHECK25-NEXT:    br label [[OMP_PRECOND_END]]
 // CHECK25:       omp.precond.end:
 // CHECK25-NEXT:    ret void
@@ -2973,7 +2973,7 @@ int main (int argc, char **argv) {
 // CHECK25:       omp.inner.for.end:
 // CHECK25-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK25:       omp.loop.exit:
-// CHECK25-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK25-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
 // CHECK25-NEXT:    ret void
 //
 //
@@ -3285,7 +3285,7 @@ int main (int argc, char **argv) {
 // CHECK27:       omp.loop.exit:
 // CHECK27-NEXT:    [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK27-NEXT:    [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4
-// CHECK27-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]])
+// CHECK27-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP22]])
 // CHECK27-NEXT:    br label [[OMP_PRECOND_END]]
 // CHECK27:       omp.precond.end:
 // CHECK27-NEXT:    ret void
@@ -3523,6 +3523,6 @@ int main (int argc, char **argv) {
 // CHECK27:       omp.inner.for.end:
 // CHECK27-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK27:       omp.loop.exit:
-// CHECK27-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK27-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
 // CHECK27-NEXT:    ret void
 //
diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_collapse_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_collapse_codegen.cpp
index 60c54f7bc0b4..64ee660b706c 100644
--- a/clang/test/OpenMP/teams_distribute_parallel_for_collapse_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_parallel_for_collapse_codegen.cpp
@@ -334,7 +334,7 @@ int main (int argc, char **argv) {
 // CHECK1:       omp.inner.for.end:
 // CHECK1-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK1:       omp.loop.exit:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
 // CHECK1-NEXT:    ret void
 //
 //
@@ -564,7 +564,7 @@ int main (int argc, char **argv) {
 // CHECK3:       omp.inner.for.end:
 // CHECK3-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK3:       omp.loop.exit:
-// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
 // CHECK3-NEXT:    ret void
 //
 //
@@ -991,7 +991,7 @@ int main (int argc, char **argv) {
 // CHECK9:       omp.loop.exit:
 // CHECK9-NEXT:    [[TMP33:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK9-NEXT:    [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4
-// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP34]])
+// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP34]])
 // CHECK9-NEXT:    br label [[OMP_PRECOND_END]]
 // CHECK9:       omp.precond.end:
 // CHECK9-NEXT:    ret void
@@ -1215,7 +1215,7 @@ int main (int argc, char **argv) {
 // CHECK9:       omp.inner.for.end:
 // CHECK9-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK9:       omp.loop.exit:
-// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
 // CHECK9-NEXT:    ret void
 //
 //
@@ -1643,7 +1643,7 @@ int main (int argc, char **argv) {
 // CHECK11:       omp.loop.exit:
 // CHECK11-NEXT:    [[TMP33:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK11-NEXT:    [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4
-// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP34]])
+// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP34]])
 // CHECK11-NEXT:    br label [[OMP_PRECOND_END]]
 // CHECK11:       omp.precond.end:
 // CHECK11-NEXT:    ret void
@@ -1861,6 +1861,6 @@ int main (int argc, char **argv) {
 // CHECK11:       omp.inner.for.end:
 // CHECK11-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK11:       omp.loop.exit:
-// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
 // CHECK11-NEXT:    ret void
 //
diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_copyin_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_copyin_codegen.cpp
index 3455597f9090..37d19f86a950 100644
--- a/clang/test/OpenMP/teams_distribute_parallel_for_copyin_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_parallel_for_copyin_codegen.cpp
@@ -314,7 +314,7 @@ int main() {
 // CHECK1:       omp.inner.for.end:
 // CHECK1-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK1:       omp.loop.exit:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP5]])
 // CHECK1-NEXT:    ret void
 //
 //
@@ -537,7 +537,7 @@ int main() {
 // CHECK1:       omp.inner.for.end:
 // CHECK1-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK1:       omp.loop.exit:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP5]])
 // CHECK1-NEXT:    ret void
 //
 //
@@ -764,7 +764,7 @@ int main() {
 // CHECK3:       omp.inner.for.end:
 // CHECK3-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK3:       omp.loop.exit:
-// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]])
+// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP5]])
 // CHECK3-NEXT:    ret void
 //
 //
@@ -982,7 +982,7 @@ int main() {
 // CHECK3:       omp.inner.for.end:
 // CHECK3-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK3:       omp.loop.exit:
-// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]])
+// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP5]])
 // CHECK3-NEXT:    ret void
 //
 //
@@ -1168,7 +1168,7 @@ int main() {
 // CHECK9:       omp.inner.for.end:
 // CHECK9-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK9:       omp.loop.exit:
-// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]])
+// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP5]])
 // CHECK9-NEXT:    ret void
 //
 //
diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_dist_schedule_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_dist_schedule_codegen.cpp
index f324d2c7ac90..eb3d2fc84568 100644
--- a/clang/test/OpenMP/teams_distribute_parallel_for_dist_schedule_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_parallel_for_dist_schedule_codegen.cpp
@@ -451,7 +451,7 @@ int main (int argc, char **argv) {
 // CHECK1:       omp.inner.for.end:
 // CHECK1-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK1:       omp.loop.exit:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
 // CHECK1-NEXT:    ret void
 //
 //
@@ -602,7 +602,7 @@ int main (int argc, char **argv) {
 // CHECK1:       omp.inner.for.end:
 // CHECK1-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK1:       omp.loop.exit:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
 // CHECK1-NEXT:    ret void
 //
 //
@@ -773,7 +773,7 @@ int main (int argc, char **argv) {
 // CHECK1:       omp.inner.for.end:
 // CHECK1-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK1:       omp.loop.exit:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
 // CHECK1-NEXT:    ret void
 //
 //
@@ -1080,7 +1080,7 @@ int main (int argc, char **argv) {
 // CHECK3:       omp.inner.for.end:
 // CHECK3-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK3:       omp.loop.exit:
-// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
 // CHECK3-NEXT:    ret void
 //
 //
@@ -1226,7 +1226,7 @@ int main (int argc, char **argv) {
 // CHECK3:       omp.inner.for.end:
 // CHECK3-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK3:       omp.loop.exit:
-// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
 // CHECK3-NEXT:    ret void
 //
 //
@@ -1392,7 +1392,7 @@ int main (int argc, char **argv) {
 // CHECK3:       omp.inner.for.end:
 // CHECK3-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK3:       omp.loop.exit:
-// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
 // CHECK3-NEXT:    ret void
 //
 //
@@ -1881,7 +1881,7 @@ int main (int argc, char **argv) {
 // CHECK9:       omp.loop.exit:
 // CHECK9-NEXT:    [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK9-NEXT:    [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4
-// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]])
+// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP22]])
 // CHECK9-NEXT:    br label [[OMP_PRECOND_END]]
 // CHECK9:       omp.precond.end:
 // CHECK9-NEXT:    ret void
@@ -2092,7 +2092,7 @@ int main (int argc, char **argv) {
 // CHECK9:       omp.loop.exit:
 // CHECK9-NEXT:    [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK9-NEXT:    [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4
-// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]])
+// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP22]])
 // CHECK9-NEXT:    br label [[OMP_PRECOND_END]]
 // CHECK9:       omp.precond.end:
 // CHECK9-NEXT:    ret void
@@ -2345,7 +2345,7 @@ int main (int argc, char **argv) {
 // CHECK9:       omp.loop.exit:
 // CHECK9-NEXT:    [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK9-NEXT:    [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4
-// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]])
+// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP22]])
 // CHECK9-NEXT:    br label [[OMP_PRECOND_END]]
 // CHECK9:       omp.precond.end:
 // CHECK9-NEXT:    ret void
@@ -2656,7 +2656,7 @@ int main (int argc, char **argv) {
 // CHECK9:       omp.inner.for.end:
 // CHECK9-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK9:       omp.loop.exit:
-// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
 // CHECK9-NEXT:    ret void
 //
 //
@@ -2806,7 +2806,7 @@ int main (int argc, char **argv) {
 // CHECK9:       omp.inner.for.end:
 // CHECK9-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK9:       omp.loop.exit:
-// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
 // CHECK9-NEXT:    ret void
 //
 //
@@ -2994,7 +2994,7 @@ int main (int argc, char **argv) {
 // CHECK9:       omp.inner.for.end:
 // CHECK9-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK9:       omp.loop.exit:
-// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
 // CHECK9-NEXT:    ret void
 //
 //
@@ -3480,7 +3480,7 @@ int main (int argc, char **argv) {
 // CHECK11:       omp.loop.exit:
 // CHECK11-NEXT:    [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK11-NEXT:    [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4
-// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]])
+// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP22]])
 // CHECK11-NEXT:    br label [[OMP_PRECOND_END]]
 // CHECK11:       omp.precond.end:
 // CHECK11-NEXT:    ret void
@@ -3686,7 +3686,7 @@ int main (int argc, char **argv) {
 // CHECK11:       omp.loop.exit:
 // CHECK11-NEXT:    [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK11-NEXT:    [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4
-// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]])
+// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP22]])
 // CHECK11-NEXT:    br label [[OMP_PRECOND_END]]
 // CHECK11:       omp.precond.end:
 // CHECK11-NEXT:    ret void
@@ -3934,7 +3934,7 @@ int main (int argc, char **argv) {
 // CHECK11:       omp.loop.exit:
 // CHECK11-NEXT:    [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK11-NEXT:    [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4
-// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]])
+// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP22]])
 // CHECK11-NEXT:    br label [[OMP_PRECOND_END]]
 // CHECK11:       omp.precond.end:
 // CHECK11-NEXT:    ret void
@@ -4240,7 +4240,7 @@ int main (int argc, char **argv) {
 // CHECK11:       omp.inner.for.end:
 // CHECK11-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK11:       omp.loop.exit:
-// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
 // CHECK11-NEXT:    ret void
 //
 //
@@ -4385,7 +4385,7 @@ int main (int argc, char **argv) {
 // CHECK11:       omp.inner.for.end:
 // CHECK11-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK11:       omp.loop.exit:
-// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
 // CHECK11-NEXT:    ret void
 //
 //
@@ -4568,6 +4568,6 @@ int main (int argc, char **argv) {
 // CHECK11:       omp.inner.for.end:
 // CHECK11-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK11:       omp.loop.exit:
-// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
 // CHECK11-NEXT:    ret void
 //
diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_firstprivate_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_firstprivate_codegen.cpp
index 24c3bc4adfef..904e6a1c1ce7 100644
--- a/clang/test/OpenMP/teams_distribute_parallel_for_firstprivate_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_parallel_for_firstprivate_codegen.cpp
@@ -679,7 +679,7 @@ int main() {
 // CHECK1:       omp.loop.exit:
 // CHECK1-NEXT:    [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK1-NEXT:    [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP21]])
 // CHECK1-NEXT:    call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR2]]
 // CHECK1-NEXT:    [[ARRAY_BEGIN12:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR3]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN12]], i64 2
@@ -1148,7 +1148,7 @@ int main() {
 // CHECK1:       omp.loop.exit:
 // CHECK1-NEXT:    [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK1-NEXT:    [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP21]])
 // CHECK1-NEXT:    call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR6]]) #[[ATTR2]]
 // CHECK1-NEXT:    [[ARRAY_BEGIN13:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN13]], i64 2
@@ -1737,7 +1737,7 @@ int main() {
 // CHECK3:       omp.loop.exit:
 // CHECK3-NEXT:    [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK3-NEXT:    [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4
-// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]])
+// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP21]])
 // CHECK3-NEXT:    call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR4]]) #[[ATTR2]]
 // CHECK3-NEXT:    [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR2]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN10]], i32 2
@@ -2200,7 +2200,7 @@ int main() {
 // CHECK3:       omp.loop.exit:
 // CHECK3-NEXT:    [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK3-NEXT:    [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4
-// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]])
+// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP21]])
 // CHECK3-NEXT:    call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR2]]
 // CHECK3-NEXT:    [[ARRAY_BEGIN11:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN11]], i32 2
@@ -2611,7 +2611,7 @@ int main() {
 // CHECK9:       omp.inner.for.end:
 // CHECK9-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK9:       omp.loop.exit:
-// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK9-NEXT:    ret void
 //
 //
diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_if_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_if_codegen.cpp
index 31367697db23..49ff6a5d08be 100644
--- a/clang/test/OpenMP/teams_distribute_parallel_for_if_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_parallel_for_if_codegen.cpp
@@ -322,7 +322,7 @@ int main() {
 // CHECK1:       omp.inner.for.end:
 // CHECK1-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK1:       omp.loop.exit:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK1-NEXT:    ret void
 //
 //
@@ -465,7 +465,7 @@ int main() {
 // CHECK1:       omp.inner.for.end:
 // CHECK1-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK1:       omp.loop.exit:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK1-NEXT:    ret void
 //
 //
@@ -742,7 +742,7 @@ int main() {
 // CHECK1:       omp.inner.for.end:
 // CHECK1-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK1:       omp.loop.exit:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK1-NEXT:    ret void
 //
 //
@@ -885,7 +885,7 @@ int main() {
 // CHECK1:       omp.inner.for.end:
 // CHECK1-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK1:       omp.loop.exit:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK1-NEXT:    ret void
 //
 //
@@ -1052,7 +1052,7 @@ int main() {
 // CHECK1:       omp.inner.for.end:
 // CHECK1-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK1:       omp.loop.exit:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK1-NEXT:    ret void
 //
 //
@@ -1327,7 +1327,7 @@ int main() {
 // CHECK1:       omp.inner.for.end:
 // CHECK1-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK1:       omp.loop.exit:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK1-NEXT:    ret void
 //
 //
@@ -1470,7 +1470,7 @@ int main() {
 // CHECK1:       omp.inner.for.end:
 // CHECK1-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK1:       omp.loop.exit:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK1-NEXT:    ret void
 //
 //
@@ -1637,6 +1637,6 @@ int main() {
 // CHECK1:       omp.inner.for.end:
 // CHECK1-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK1:       omp.loop.exit:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK1-NEXT:    ret void
 //
diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_lastprivate_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_lastprivate_codegen.cpp
index 45197d5e296d..7b8df7367b82 100644
--- a/clang/test/OpenMP/teams_distribute_parallel_for_lastprivate_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_parallel_for_lastprivate_codegen.cpp
@@ -411,7 +411,7 @@ int main() {
 // CHECK1:       omp.inner.for.end:
 // CHECK1-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK1:       omp.loop.exit:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP8]])
 // CHECK1-NEXT:    [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK1-NEXT:    [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0
 // CHECK1-NEXT:    br i1 [[TMP23]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]]
@@ -673,7 +673,7 @@ int main() {
 // CHECK3:       omp.inner.for.end:
 // CHECK3-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK3:       omp.loop.exit:
-// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]])
+// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP8]])
 // CHECK3-NEXT:    [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK3-NEXT:    [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0
 // CHECK3-NEXT:    br i1 [[TMP23]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]]
@@ -1115,7 +1115,7 @@ int main() {
 // CHECK9:       omp.loop.exit:
 // CHECK9-NEXT:    [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK9-NEXT:    [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4
-// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]])
+// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP22]])
 // CHECK9-NEXT:    [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK9-NEXT:    [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0
 // CHECK9-NEXT:    br i1 [[TMP24]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]]
@@ -1595,7 +1595,7 @@ int main() {
 // CHECK9:       omp.loop.exit:
 // CHECK9-NEXT:    [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK9-NEXT:    [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4
-// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]])
+// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP21]])
 // CHECK9-NEXT:    [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK9-NEXT:    [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0
 // CHECK9-NEXT:    br i1 [[TMP23]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]]
@@ -2095,7 +2095,7 @@ int main() {
 // CHECK11:       omp.loop.exit:
 // CHECK11-NEXT:    [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK11-NEXT:    [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4
-// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]])
+// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP22]])
 // CHECK11-NEXT:    [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK11-NEXT:    [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0
 // CHECK11-NEXT:    br i1 [[TMP24]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]]
@@ -2569,7 +2569,7 @@ int main() {
 // CHECK11:       omp.loop.exit:
 // CHECK11-NEXT:    [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK11-NEXT:    [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4
-// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]])
+// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP21]])
 // CHECK11-NEXT:    [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK11-NEXT:    [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0
 // CHECK11-NEXT:    br i1 [[TMP23]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]]
diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_num_threads_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_num_threads_codegen.cpp
index c805d739cf9c..a7b1f6eb309e 100644
--- a/clang/test/OpenMP/teams_distribute_parallel_for_num_threads_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_parallel_for_num_threads_codegen.cpp
@@ -371,7 +371,7 @@ int main() {
 // CHECK1:       omp.inner.for.end:
 // CHECK1-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK1:       omp.loop.exit:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK1-NEXT:    ret void
 // CHECK1:       terminate.lpad:
 // CHECK1-NEXT:    [[TMP11:%.*]] = landingpad { ptr, i32 }
@@ -538,7 +538,7 @@ int main() {
 // CHECK1:       omp.inner.for.end:
 // CHECK1-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK1:       omp.loop.exit:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK1-NEXT:    ret void
 // CHECK1:       terminate.lpad:
 // CHECK1-NEXT:    [[TMP11:%.*]] = landingpad { ptr, i32 }
@@ -887,7 +887,7 @@ int main() {
 // CHECK1:       omp.inner.for.end:
 // CHECK1-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK1:       omp.loop.exit:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK1-NEXT:    ret void
 // CHECK1:       terminate.lpad:
 // CHECK1-NEXT:    [[TMP11:%.*]] = landingpad { ptr, i32 }
@@ -1034,7 +1034,7 @@ int main() {
 // CHECK1:       omp.inner.for.end:
 // CHECK1-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK1:       omp.loop.exit:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK1-NEXT:    ret void
 // CHECK1:       terminate.lpad:
 // CHECK1-NEXT:    [[TMP11:%.*]] = landingpad { ptr, i32 }
@@ -1181,7 +1181,7 @@ int main() {
 // CHECK1:       omp.inner.for.end:
 // CHECK1-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK1:       omp.loop.exit:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK1-NEXT:    ret void
 // CHECK1:       terminate.lpad:
 // CHECK1-NEXT:    [[TMP11:%.*]] = landingpad { ptr, i32 }
@@ -1350,7 +1350,7 @@ int main() {
 // CHECK1:       omp.inner.for.end:
 // CHECK1-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK1:       omp.loop.exit:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK1-NEXT:    ret void
 // CHECK1:       terminate.lpad:
 // CHECK1-NEXT:    [[TMP11:%.*]] = landingpad { ptr, i32 }
@@ -1658,7 +1658,7 @@ int main() {
 // CHECK5:       omp.inner.for.end:
 // CHECK5-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK5:       omp.loop.exit:
-// CHECK5-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK5-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK5-NEXT:    ret void
 // CHECK5:       terminate.lpad:
 // CHECK5-NEXT:    [[TMP11:%.*]] = landingpad { ptr, i32 }
@@ -1825,7 +1825,7 @@ int main() {
 // CHECK5:       omp.inner.for.end:
 // CHECK5-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK5:       omp.loop.exit:
-// CHECK5-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK5-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK5-NEXT:    ret void
 // CHECK5:       terminate.lpad:
 // CHECK5-NEXT:    [[TMP11:%.*]] = landingpad { ptr, i32 }
@@ -2165,7 +2165,7 @@ int main() {
 // CHECK5:       omp.inner.for.end:
 // CHECK5-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK5:       omp.loop.exit:
-// CHECK5-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK5-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK5-NEXT:    ret void
 // CHECK5:       terminate.lpad:
 // CHECK5-NEXT:    [[TMP11:%.*]] = landingpad { ptr, i32 }
@@ -2312,7 +2312,7 @@ int main() {
 // CHECK5:       omp.inner.for.end:
 // CHECK5-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK5:       omp.loop.exit:
-// CHECK5-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK5-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK5-NEXT:    ret void
 // CHECK5:       terminate.lpad:
 // CHECK5-NEXT:    [[TMP11:%.*]] = landingpad { ptr, i32 }
@@ -2459,7 +2459,7 @@ int main() {
 // CHECK5:       omp.inner.for.end:
 // CHECK5-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK5:       omp.loop.exit:
-// CHECK5-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK5-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK5-NEXT:    ret void
 // CHECK5:       terminate.lpad:
 // CHECK5-NEXT:    [[TMP11:%.*]] = landingpad { ptr, i32 }
@@ -2628,7 +2628,7 @@ int main() {
 // CHECK5:       omp.inner.for.end:
 // CHECK5-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK5:       omp.loop.exit:
-// CHECK5-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK5-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK5-NEXT:    ret void
 // CHECK5:       terminate.lpad:
 // CHECK5-NEXT:    [[TMP11:%.*]] = landingpad { ptr, i32 }
diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_private_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_private_codegen.cpp
index fe537dc743d4..dc430fc55787 100644
--- a/clang/test/OpenMP/teams_distribute_parallel_for_private_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_parallel_for_private_codegen.cpp
@@ -496,7 +496,7 @@ int main() {
 // CHECK1:       omp.loop.exit:
 // CHECK1-NEXT:    [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK1-NEXT:    [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP17]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP17]])
 // CHECK1-NEXT:    call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]]
 // CHECK1-NEXT:    [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN7]], i64 2
@@ -804,7 +804,7 @@ int main() {
 // CHECK1:       omp.loop.exit:
 // CHECK1-NEXT:    [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK1-NEXT:    [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP16]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP16]])
 // CHECK1-NEXT:    call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]]
 // CHECK1-NEXT:    [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN8]], i64 2
@@ -1223,7 +1223,7 @@ int main() {
 // CHECK3:       omp.loop.exit:
 // CHECK3-NEXT:    [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK3-NEXT:    [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4
-// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP17]])
+// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP17]])
 // CHECK3-NEXT:    call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]]
 // CHECK3-NEXT:    [[ARRAY_BEGIN5:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN5]], i32 2
@@ -1525,7 +1525,7 @@ int main() {
 // CHECK3:       omp.loop.exit:
 // CHECK3-NEXT:    [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK3-NEXT:    [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4
-// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP16]])
+// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP16]])
 // CHECK3-NEXT:    call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]]
 // CHECK3-NEXT:    [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN6]], i32 2
@@ -1883,7 +1883,7 @@ int main() {
 // CHECK9:       omp.inner.for.end:
 // CHECK9-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK9:       omp.loop.exit:
-// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK9-NEXT:    ret void
 //
 //
diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_proc_bind_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_proc_bind_codegen.cpp
index cbd426aabb9c..386e772b7897 100644
--- a/clang/test/OpenMP/teams_distribute_parallel_for_proc_bind_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_parallel_for_proc_bind_codegen.cpp
@@ -263,7 +263,7 @@ int main() {
 // CHECK1:       omp.inner.for.end:
 // CHECK1-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK1:       omp.loop.exit:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK1-NEXT:    ret void
 //
 //
@@ -401,7 +401,7 @@ int main() {
 // CHECK1:       omp.inner.for.end:
 // CHECK1-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK1:       omp.loop.exit:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK1-NEXT:    ret void
 //
 //
@@ -580,6 +580,6 @@ int main() {
 // CHECK1:       omp.inner.for.end:
 // CHECK1-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK1:       omp.loop.exit:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK1-NEXT:    ret void
 //
diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_reduction_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_reduction_codegen.cpp
index a003b9f203a4..7ac42579e91b 100644
--- a/clang/test/OpenMP/teams_distribute_parallel_for_reduction_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_parallel_for_reduction_codegen.cpp
@@ -322,7 +322,7 @@ int main() {
 // CHECK1:       omp.inner.for.end:
 // CHECK1-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK1:       omp.loop.exit:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
 // CHECK1-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
 // CHECK1-NEXT:    store ptr [[SIVAR2]], ptr [[TMP14]], align 8
 // CHECK1-NEXT:    [[TMP15:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68.omp_outlined.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var)
@@ -615,7 +615,7 @@ int main() {
 // CHECK1:       omp.inner.for.end:
 // CHECK1-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK1:       omp.loop.exit:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
 // CHECK1-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
 // CHECK1-NEXT:    store ptr [[T_VAR2]], ptr [[TMP14]], align 8
 // CHECK1-NEXT:    [[TMP15:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.omp_outlined.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var)
@@ -903,7 +903,7 @@ int main() {
 // CHECK3:       omp.inner.for.end:
 // CHECK3-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK3:       omp.loop.exit:
-// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
 // CHECK3-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0
 // CHECK3-NEXT:    store ptr [[SIVAR1]], ptr [[TMP14]], align 4
 // CHECK3-NEXT:    [[TMP15:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68.omp_outlined.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var)
@@ -1192,7 +1192,7 @@ int main() {
 // CHECK3:       omp.inner.for.end:
 // CHECK3-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK3:       omp.loop.exit:
-// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
 // CHECK3-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0
 // CHECK3-NEXT:    store ptr [[T_VAR1]], ptr [[TMP14]], align 4
 // CHECK3-NEXT:    [[TMP15:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.omp_outlined.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var)
@@ -1439,7 +1439,7 @@ int main() {
 // CHECK9:       omp.inner.for.end:
 // CHECK9-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK9:       omp.loop.exit:
-// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
 // CHECK9-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
 // CHECK9-NEXT:    store ptr [[SIVAR2]], ptr [[TMP15]], align 8
 // CHECK9-NEXT:    [[TMP16:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l45.omp_outlined.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var)
diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_reduction_task_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_reduction_task_codegen.cpp
index b583fcad2d37..f58c848f4cae 100644
--- a/clang/test/OpenMP/teams_distribute_parallel_for_reduction_task_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_parallel_for_reduction_task_codegen.cpp
@@ -248,8 +248,8 @@ int main(int argc, char **argv) {
 // CHECK1-NEXT:    [[TMP72:%.*]] = load i32, ptr [[TMP71]], align 4
 // CHECK1-NEXT:    [[TMP73:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB4:[0-9]+]], i32 [[TMP72]], i32 2, i64 24, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l16.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var)
 // CHECK1-NEXT:    switch i32 [[TMP73]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
-// CHECK1-NEXT:    i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
-// CHECK1-NEXT:    i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
+// CHECK1-NEXT:      i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
+// CHECK1-NEXT:      i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
 // CHECK1-NEXT:    ]
 // CHECK1:       .omp.reduction.case1:
 // CHECK1-NEXT:    [[TMP74:%.*]] = load i32, ptr [[TMP0]], align 4
@@ -597,7 +597,7 @@ int main(int argc, char **argv) {
 // CHECK1:       omp.loop.exit:
 // CHECK1-NEXT:    [[TMP78:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK1-NEXT:    [[TMP79:%.*]] = load i32, ptr [[TMP78]], align 4
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP79]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP79]])
 // CHECK1-NEXT:    [[TMP80:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK1-NEXT:    [[TMP81:%.*]] = load i32, ptr [[TMP80]], align 4
 // CHECK1-NEXT:    call void @__kmpc_task_reduction_modifier_fini(ptr @[[GLOB1]], i32 [[TMP81]], i32 1)
@@ -612,8 +612,8 @@ int main(int argc, char **argv) {
 // CHECK1-NEXT:    [[TMP87:%.*]] = load i32, ptr [[TMP86]], align 4
 // CHECK1-NEXT:    [[TMP88:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB4]], i32 [[TMP87]], i32 2, i64 24, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l16.omp_outlined.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var)
 // CHECK1-NEXT:    switch i32 [[TMP88]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
-// CHECK1-NEXT:    i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
-// CHECK1-NEXT:    i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
+// CHECK1-NEXT:      i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
+// CHECK1-NEXT:      i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
 // CHECK1-NEXT:    ]
 // CHECK1:       .omp.reduction.case1:
 // CHECK1-NEXT:    [[TMP89:%.*]] = load i32, ptr [[TMP0]], align 4
@@ -805,21 +805,21 @@ int main(int argc, char **argv) {
 // CHECK1-NEXT:    call void @llvm.experimental.noalias.scope.decl(metadata [[META6:![0-9]+]])
 // CHECK1-NEXT:    call void @llvm.experimental.noalias.scope.decl(metadata [[META8:![0-9]+]])
 // CHECK1-NEXT:    call void @llvm.experimental.noalias.scope.decl(metadata [[META10:![0-9]+]])
-// CHECK1-NEXT:    store i32 [[TMP2]], ptr [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !12
-// CHECK1-NEXT:    store ptr [[TMP5]], ptr [[DOTPART_ID__ADDR_I]], align 8, !noalias !12
-// CHECK1-NEXT:    store ptr [[TMP8]], ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12
-// CHECK1-NEXT:    store ptr @.omp_task_privates_map., ptr [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !12
-// CHECK1-NEXT:    store ptr [[TMP3]], ptr [[DOTTASK_T__ADDR_I]], align 8, !noalias !12
-// CHECK1-NEXT:    store ptr [[TMP7]], ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !12
-// CHECK1-NEXT:    [[TMP9:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !12
-// CHECK1-NEXT:    [[TMP10:%.*]] = load ptr, ptr [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !12
-// CHECK1-NEXT:    [[TMP11:%.*]] = load ptr, ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12
+// CHECK1-NEXT:    store i32 [[TMP2]], ptr [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias [[META12:![0-9]+]]
+// CHECK1-NEXT:    store ptr [[TMP5]], ptr [[DOTPART_ID__ADDR_I]], align 8, !noalias [[META12]]
+// CHECK1-NEXT:    store ptr [[TMP8]], ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias [[META12]]
+// CHECK1-NEXT:    store ptr @.omp_task_privates_map., ptr [[DOTCOPY_FN__ADDR_I]], align 8, !noalias [[META12]]
+// CHECK1-NEXT:    store ptr [[TMP3]], ptr [[DOTTASK_T__ADDR_I]], align 8, !noalias [[META12]]
+// CHECK1-NEXT:    store ptr [[TMP7]], ptr [[__CONTEXT_ADDR_I]], align 8, !noalias [[META12]]
+// CHECK1-NEXT:    [[TMP9:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, !noalias [[META12]]
+// CHECK1-NEXT:    [[TMP10:%.*]] = load ptr, ptr [[DOTCOPY_FN__ADDR_I]], align 8, !noalias [[META12]]
+// CHECK1-NEXT:    [[TMP11:%.*]] = load ptr, ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias [[META12]]
 // CHECK1-NEXT:    call void [[TMP10]](ptr [[TMP11]], ptr [[DOTFIRSTPRIV_PTR_ADDR_I]]) #[[ATTR6]]
-// CHECK1-NEXT:    [[TMP12:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !12
+// CHECK1-NEXT:    [[TMP12:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias [[META12]]
 // CHECK1-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP9]], i32 0, i32 1
 // CHECK1-NEXT:    [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 8
 // CHECK1-NEXT:    [[TMP15:%.*]] = load ptr, ptr [[TMP12]], align 8
-// CHECK1-NEXT:    [[TMP16:%.*]] = load i32, ptr [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !12
+// CHECK1-NEXT:    [[TMP16:%.*]] = load i32, ptr [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias [[META12]]
 // CHECK1-NEXT:    [[TMP17:%.*]] = call ptr @__kmpc_task_reduction_get_th_data(i32 [[TMP16]], ptr [[TMP15]], ptr [[TMP14]])
 // CHECK1-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP9]], i32 0, i32 2
 // CHECK1-NEXT:    [[TMP19:%.*]] = load ptr, ptr [[TMP18]], align 8
@@ -839,7 +839,7 @@ int main(int argc, char **argv) {
 // CHECK1-NEXT:    [[TMP30:%.*]] = sub i64 [[TMP28]], [[TMP29]]
 // CHECK1-NEXT:    [[TMP31:%.*]] = add nuw i64 [[TMP30]], 1
 // CHECK1-NEXT:    [[TMP32:%.*]] = mul nuw i64 [[TMP31]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64)
-// CHECK1-NEXT:    store i64 [[TMP31]], ptr @{{reduction_size[.].+[.]}}, align 8, !noalias !12
+// CHECK1-NEXT:    store i64 [[TMP31]], ptr @{{reduction_size[.].+[.]}}, align 8, !noalias [[META12]]
 // CHECK1-NEXT:    [[TMP33:%.*]] = load ptr, ptr [[TMP12]], align 8
 // CHECK1-NEXT:    [[TMP34:%.*]] = call ptr @__kmpc_task_reduction_get_th_data(i32 [[TMP16]], ptr [[TMP33]], ptr [[TMP20]])
 // CHECK1-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP9]], i32 0, i32 2
@@ -849,8 +849,8 @@ int main(int argc, char **argv) {
 // CHECK1-NEXT:    [[TMP39:%.*]] = ptrtoint ptr [[TMP20]] to i64
 // CHECK1-NEXT:    [[TMP40:%.*]] = sub i64 [[TMP38]], [[TMP39]]
 // CHECK1-NEXT:    [[TMP41:%.*]] = getelementptr i8, ptr [[TMP34]], i64 [[TMP40]]
-// CHECK1-NEXT:    store ptr [[TMP4_I]], ptr [[TMP_I]], align 8, !noalias !12
-// CHECK1-NEXT:    store ptr [[TMP41]], ptr [[TMP4_I]], align 8, !noalias !12
+// CHECK1-NEXT:    store ptr [[TMP4_I]], ptr [[TMP_I]], align 8, !noalias [[META12]]
+// CHECK1-NEXT:    store ptr [[TMP41]], ptr [[TMP4_I]], align 8, !noalias [[META12]]
 // CHECK1-NEXT:    ret i32 0
 //
 //
diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_schedule_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_schedule_codegen.cpp
index ab0e08259efe..7518179f4776 100644
--- a/clang/test/OpenMP/teams_distribute_parallel_for_schedule_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_parallel_for_schedule_codegen.cpp
@@ -610,7 +610,7 @@ int main (int argc, char **argv) {
 // CHECK1:       omp.inner.for.end:
 // CHECK1-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK1:       omp.loop.exit:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
 // CHECK1-NEXT:    ret void
 //
 //
@@ -761,7 +761,7 @@ int main (int argc, char **argv) {
 // CHECK1:       omp.inner.for.end:
 // CHECK1-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK1:       omp.loop.exit:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
 // CHECK1-NEXT:    ret void
 //
 //
@@ -933,7 +933,7 @@ int main (int argc, char **argv) {
 // CHECK1-NEXT:    store i32 [[ADD8]], ptr [[DOTOMP_UB]], align 4
 // CHECK1-NEXT:    br label [[OMP_DISPATCH_COND]]
 // CHECK1:       omp.dispatch.end:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
 // CHECK1-NEXT:    ret void
 //
 //
@@ -1632,7 +1632,7 @@ int main (int argc, char **argv) {
 // CHECK3:       omp.inner.for.end:
 // CHECK3-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK3:       omp.loop.exit:
-// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
 // CHECK3-NEXT:    ret void
 //
 //
@@ -1778,7 +1778,7 @@ int main (int argc, char **argv) {
 // CHECK3:       omp.inner.for.end:
 // CHECK3-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK3:       omp.loop.exit:
-// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
 // CHECK3-NEXT:    ret void
 //
 //
@@ -1943,7 +1943,7 @@ int main (int argc, char **argv) {
 // CHECK3-NEXT:    store i32 [[ADD5]], ptr [[DOTOMP_UB]], align 4
 // CHECK3-NEXT:    br label [[OMP_DISPATCH_COND]]
 // CHECK3:       omp.dispatch.end:
-// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
 // CHECK3-NEXT:    ret void
 //
 //
@@ -2637,7 +2637,7 @@ int main (int argc, char **argv) {
 // CHECK5:       omp.inner.for.end:
 // CHECK5-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK5:       omp.loop.exit:
-// CHECK5-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK5-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
 // CHECK5-NEXT:    ret void
 //
 //
@@ -2788,7 +2788,7 @@ int main (int argc, char **argv) {
 // CHECK5:       omp.inner.for.end:
 // CHECK5-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK5:       omp.loop.exit:
-// CHECK5-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK5-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
 // CHECK5-NEXT:    ret void
 //
 //
@@ -2960,7 +2960,7 @@ int main (int argc, char **argv) {
 // CHECK5-NEXT:    store i32 [[ADD8]], ptr [[DOTOMP_UB]], align 4
 // CHECK5-NEXT:    br label [[OMP_DISPATCH_COND]]
 // CHECK5:       omp.dispatch.end:
-// CHECK5-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK5-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
 // CHECK5-NEXT:    ret void
 //
 //
@@ -3659,7 +3659,7 @@ int main (int argc, char **argv) {
 // CHECK7:       omp.inner.for.end:
 // CHECK7-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK7:       omp.loop.exit:
-// CHECK7-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK7-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
 // CHECK7-NEXT:    ret void
 //
 //
@@ -3805,7 +3805,7 @@ int main (int argc, char **argv) {
 // CHECK7:       omp.inner.for.end:
 // CHECK7-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK7:       omp.loop.exit:
-// CHECK7-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK7-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
 // CHECK7-NEXT:    ret void
 //
 //
@@ -3970,7 +3970,7 @@ int main (int argc, char **argv) {
 // CHECK7-NEXT:    store i32 [[ADD5]], ptr [[DOTOMP_UB]], align 4
 // CHECK7-NEXT:    br label [[OMP_DISPATCH_COND]]
 // CHECK7:       omp.dispatch.end:
-// CHECK7-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK7-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
 // CHECK7-NEXT:    ret void
 //
 //
@@ -4917,7 +4917,7 @@ int main (int argc, char **argv) {
 // CHECK13:       omp.loop.exit:
 // CHECK13-NEXT:    [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK13-NEXT:    [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4
-// CHECK13-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]])
+// CHECK13-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP22]])
 // CHECK13-NEXT:    br label [[OMP_PRECOND_END]]
 // CHECK13:       omp.precond.end:
 // CHECK13-NEXT:    ret void
@@ -5128,7 +5128,7 @@ int main (int argc, char **argv) {
 // CHECK13:       omp.loop.exit:
 // CHECK13-NEXT:    [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK13-NEXT:    [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4
-// CHECK13-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]])
+// CHECK13-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP22]])
 // CHECK13-NEXT:    br label [[OMP_PRECOND_END]]
 // CHECK13:       omp.precond.end:
 // CHECK13-NEXT:    ret void
@@ -5381,7 +5381,7 @@ int main (int argc, char **argv) {
 // CHECK13:       omp.loop.exit:
 // CHECK13-NEXT:    [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK13-NEXT:    [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4
-// CHECK13-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]])
+// CHECK13-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP22]])
 // CHECK13-NEXT:    br label [[OMP_PRECOND_END]]
 // CHECK13:       omp.precond.end:
 // CHECK13-NEXT:    ret void
@@ -6226,7 +6226,7 @@ int main (int argc, char **argv) {
 // CHECK13:       omp.inner.for.end:
 // CHECK13-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK13:       omp.loop.exit:
-// CHECK13-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK13-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
 // CHECK13-NEXT:    ret void
 //
 //
@@ -6376,7 +6376,7 @@ int main (int argc, char **argv) {
 // CHECK13:       omp.inner.for.end:
 // CHECK13-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK13:       omp.loop.exit:
-// CHECK13-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK13-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
 // CHECK13-NEXT:    ret void
 //
 //
@@ -6565,7 +6565,7 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    store i32 [[ADD8]], ptr [[DOTOMP_UB]], align 4
 // CHECK13-NEXT:    br label [[OMP_DISPATCH_COND]]
 // CHECK13:       omp.dispatch.end:
-// CHECK13-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]])
+// CHECK13-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP5]])
 // CHECK13-NEXT:    ret void
 //
 //
@@ -7537,7 +7537,7 @@ int main (int argc, char **argv) {
 // CHECK15:       omp.loop.exit:
 // CHECK15-NEXT:    [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK15-NEXT:    [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4
-// CHECK15-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]])
+// CHECK15-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP22]])
 // CHECK15-NEXT:    br label [[OMP_PRECOND_END]]
 // CHECK15:       omp.precond.end:
 // CHECK15-NEXT:    ret void
@@ -7743,7 +7743,7 @@ int main (int argc, char **argv) {
 // CHECK15:       omp.loop.exit:
 // CHECK15-NEXT:    [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK15-NEXT:    [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4
-// CHECK15-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]])
+// CHECK15-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP22]])
 // CHECK15-NEXT:    br label [[OMP_PRECOND_END]]
 // CHECK15:       omp.precond.end:
 // CHECK15-NEXT:    ret void
@@ -7991,7 +7991,7 @@ int main (int argc, char **argv) {
 // CHECK15:       omp.loop.exit:
 // CHECK15-NEXT:    [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK15-NEXT:    [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4
-// CHECK15-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]])
+// CHECK15-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP22]])
 // CHECK15-NEXT:    br label [[OMP_PRECOND_END]]
 // CHECK15:       omp.precond.end:
 // CHECK15-NEXT:    ret void
@@ -8821,7 +8821,7 @@ int main (int argc, char **argv) {
 // CHECK15:       omp.inner.for.end:
 // CHECK15-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK15:       omp.loop.exit:
-// CHECK15-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK15-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
 // CHECK15-NEXT:    ret void
 //
 //
@@ -8966,7 +8966,7 @@ int main (int argc, char **argv) {
 // CHECK15:       omp.inner.for.end:
 // CHECK15-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK15:       omp.loop.exit:
-// CHECK15-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK15-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
 // CHECK15-NEXT:    ret void
 //
 //
@@ -9148,7 +9148,7 @@ int main (int argc, char **argv) {
 // CHECK15-NEXT:    store i32 [[ADD5]], ptr [[DOTOMP_UB]], align 4
 // CHECK15-NEXT:    br label [[OMP_DISPATCH_COND]]
 // CHECK15:       omp.dispatch.end:
-// CHECK15-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]])
+// CHECK15-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP5]])
 // CHECK15-NEXT:    ret void
 //
 //
@@ -10111,7 +10111,7 @@ int main (int argc, char **argv) {
 // CHECK17:       omp.loop.exit:
 // CHECK17-NEXT:    [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK17-NEXT:    [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4
-// CHECK17-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]])
+// CHECK17-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP22]])
 // CHECK17-NEXT:    br label [[OMP_PRECOND_END]]
 // CHECK17:       omp.precond.end:
 // CHECK17-NEXT:    ret void
@@ -10322,7 +10322,7 @@ int main (int argc, char **argv) {
 // CHECK17:       omp.loop.exit:
 // CHECK17-NEXT:    [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK17-NEXT:    [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4
-// CHECK17-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]])
+// CHECK17-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP22]])
 // CHECK17-NEXT:    br label [[OMP_PRECOND_END]]
 // CHECK17:       omp.precond.end:
 // CHECK17-NEXT:    ret void
@@ -10575,7 +10575,7 @@ int main (int argc, char **argv) {
 // CHECK17:       omp.loop.exit:
 // CHECK17-NEXT:    [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK17-NEXT:    [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4
-// CHECK17-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]])
+// CHECK17-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP22]])
 // CHECK17-NEXT:    br label [[OMP_PRECOND_END]]
 // CHECK17:       omp.precond.end:
 // CHECK17-NEXT:    ret void
@@ -11420,7 +11420,7 @@ int main (int argc, char **argv) {
 // CHECK17:       omp.inner.for.end:
 // CHECK17-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK17:       omp.loop.exit:
-// CHECK17-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK17-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
 // CHECK17-NEXT:    ret void
 //
 //
@@ -11570,7 +11570,7 @@ int main (int argc, char **argv) {
 // CHECK17:       omp.inner.for.end:
 // CHECK17-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK17:       omp.loop.exit:
-// CHECK17-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK17-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
 // CHECK17-NEXT:    ret void
 //
 //
@@ -11759,7 +11759,7 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    store i32 [[ADD8]], ptr [[DOTOMP_UB]], align 4
 // CHECK17-NEXT:    br label [[OMP_DISPATCH_COND]]
 // CHECK17:       omp.dispatch.end:
-// CHECK17-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]])
+// CHECK17-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP5]])
 // CHECK17-NEXT:    ret void
 //
 //
@@ -12731,7 +12731,7 @@ int main (int argc, char **argv) {
 // CHECK19:       omp.loop.exit:
 // CHECK19-NEXT:    [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK19-NEXT:    [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4
-// CHECK19-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]])
+// CHECK19-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP22]])
 // CHECK19-NEXT:    br label [[OMP_PRECOND_END]]
 // CHECK19:       omp.precond.end:
 // CHECK19-NEXT:    ret void
@@ -12937,7 +12937,7 @@ int main (int argc, char **argv) {
 // CHECK19:       omp.loop.exit:
 // CHECK19-NEXT:    [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK19-NEXT:    [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4
-// CHECK19-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]])
+// CHECK19-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP22]])
 // CHECK19-NEXT:    br label [[OMP_PRECOND_END]]
 // CHECK19:       omp.precond.end:
 // CHECK19-NEXT:    ret void
@@ -13185,7 +13185,7 @@ int main (int argc, char **argv) {
 // CHECK19:       omp.loop.exit:
 // CHECK19-NEXT:    [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK19-NEXT:    [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4
-// CHECK19-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]])
+// CHECK19-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP22]])
 // CHECK19-NEXT:    br label [[OMP_PRECOND_END]]
 // CHECK19:       omp.precond.end:
 // CHECK19-NEXT:    ret void
@@ -14015,7 +14015,7 @@ int main (int argc, char **argv) {
 // CHECK19:       omp.inner.for.end:
 // CHECK19-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK19:       omp.loop.exit:
-// CHECK19-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK19-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
 // CHECK19-NEXT:    ret void
 //
 //
@@ -14160,7 +14160,7 @@ int main (int argc, char **argv) {
 // CHECK19:       omp.inner.for.end:
 // CHECK19-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK19:       omp.loop.exit:
-// CHECK19-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK19-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
 // CHECK19-NEXT:    ret void
 //
 //
@@ -14342,7 +14342,7 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    store i32 [[ADD5]], ptr [[DOTOMP_UB]], align 4
 // CHECK19-NEXT:    br label [[OMP_DISPATCH_COND]]
 // CHECK19:       omp.dispatch.end:
-// CHECK19-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]])
+// CHECK19-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP5]])
 // CHECK19-NEXT:    ret void
 //
 //
diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_simd_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_simd_codegen.cpp
index 96439c053ea8..854afe3b9bbc 100644
--- a/clang/test/OpenMP/teams_distribute_parallel_for_simd_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_parallel_for_simd_codegen.cpp
@@ -583,7 +583,7 @@ int main (int argc, char **argv) {
 // CHECK1:       omp.loop.exit:
 // CHECK1-NEXT:    [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK1-NEXT:    [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP21]])
 // CHECK1-NEXT:    [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK1-NEXT:    [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0
 // CHECK1-NEXT:    br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -828,7 +828,7 @@ int main (int argc, char **argv) {
 // CHECK1:       omp.loop.exit:
 // CHECK1-NEXT:    [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK1-NEXT:    [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP25]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP25]])
 // CHECK1-NEXT:    [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK1-NEXT:    [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0
 // CHECK1-NEXT:    br i1 [[TMP27]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -1249,7 +1249,7 @@ int main (int argc, char **argv) {
 // CHECK3:       omp.loop.exit:
 // CHECK3-NEXT:    [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK3-NEXT:    [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4
-// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]])
+// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP21]])
 // CHECK3-NEXT:    [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK3-NEXT:    [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0
 // CHECK3-NEXT:    br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -1489,7 +1489,7 @@ int main (int argc, char **argv) {
 // CHECK3:       omp.loop.exit:
 // CHECK3-NEXT:    [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK3-NEXT:    [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4
-// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP25]])
+// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP25]])
 // CHECK3-NEXT:    [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK3-NEXT:    [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0
 // CHECK3-NEXT:    br i1 [[TMP27]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -2126,7 +2126,7 @@ int main (int argc, char **argv) {
 // CHECK9:       omp.loop.exit:
 // CHECK9-NEXT:    [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK9-NEXT:    [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4
-// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP26]])
+// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP26]])
 // CHECK9-NEXT:    [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK9-NEXT:    [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0
 // CHECK9-NEXT:    br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -2490,7 +2490,7 @@ int main (int argc, char **argv) {
 // CHECK11:       omp.loop.exit:
 // CHECK11-NEXT:    [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK11-NEXT:    [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4
-// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP26]])
+// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP26]])
 // CHECK11-NEXT:    [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK11-NEXT:    [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0
 // CHECK11-NEXT:    br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -2929,7 +2929,7 @@ int main (int argc, char **argv) {
 // CHECK17:       omp.inner.for.end:
 // CHECK17-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK17:       omp.loop.exit:
-// CHECK17-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]])
+// CHECK17-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP6]])
 // CHECK17-NEXT:    [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK17-NEXT:    [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0
 // CHECK17-NEXT:    br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -3192,7 +3192,7 @@ int main (int argc, char **argv) {
 // CHECK19:       omp.inner.for.end:
 // CHECK19-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK19:       omp.loop.exit:
-// CHECK19-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]])
+// CHECK19-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP6]])
 // CHECK19-NEXT:    [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK19-NEXT:    [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0
 // CHECK19-NEXT:    br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -3687,7 +3687,7 @@ int main (int argc, char **argv) {
 // CHECK25:       omp.loop.exit:
 // CHECK25-NEXT:    [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK25-NEXT:    [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4
-// CHECK25-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP26]])
+// CHECK25-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP26]])
 // CHECK25-NEXT:    [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK25-NEXT:    [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0
 // CHECK25-NEXT:    br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -3955,7 +3955,7 @@ int main (int argc, char **argv) {
 // CHECK25:       omp.inner.for.end:
 // CHECK25-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK25:       omp.loop.exit:
-// CHECK25-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK25-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP4]])
 // CHECK25-NEXT:    [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK25-NEXT:    [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0
 // CHECK25-NEXT:    br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -4314,7 +4314,7 @@ int main (int argc, char **argv) {
 // CHECK27:       omp.loop.exit:
 // CHECK27-NEXT:    [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK27-NEXT:    [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4
-// CHECK27-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP26]])
+// CHECK27-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP26]])
 // CHECK27-NEXT:    [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK27-NEXT:    [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0
 // CHECK27-NEXT:    br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -4577,7 +4577,7 @@ int main (int argc, char **argv) {
 // CHECK27:       omp.inner.for.end:
 // CHECK27-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK27:       omp.loop.exit:
-// CHECK27-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK27-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP4]])
 // CHECK27-NEXT:    [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK27-NEXT:    [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0
 // CHECK27-NEXT:    br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_simd_collapse_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_simd_collapse_codegen.cpp
index e68bd591519b..e7b9978dd43c 100644
--- a/clang/test/OpenMP/teams_distribute_parallel_for_simd_collapse_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_parallel_for_simd_collapse_codegen.cpp
@@ -347,7 +347,7 @@ int main (int argc, char **argv) {
 // CHECK1:       omp.inner.for.end:
 // CHECK1-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK1:       omp.loop.exit:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
 // CHECK1-NEXT:    [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK1-NEXT:    [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0
 // CHECK1-NEXT:    br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -593,7 +593,7 @@ int main (int argc, char **argv) {
 // CHECK3:       omp.inner.for.end:
 // CHECK3-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK3:       omp.loop.exit:
-// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
 // CHECK3-NEXT:    [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK3-NEXT:    [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0
 // CHECK3-NEXT:    br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -1186,7 +1186,7 @@ int main (int argc, char **argv) {
 // CHECK9:       omp.loop.exit:
 // CHECK9-NEXT:    [[TMP33:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK9-NEXT:    [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4
-// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP34]])
+// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP34]])
 // CHECK9-NEXT:    [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK9-NEXT:    [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0
 // CHECK9-NEXT:    br i1 [[TMP36]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -1436,7 +1436,7 @@ int main (int argc, char **argv) {
 // CHECK9:       omp.inner.for.end:
 // CHECK9-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK9:       omp.loop.exit:
-// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
 // CHECK9-NEXT:    [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK9-NEXT:    [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0
 // CHECK9-NEXT:    br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -1890,7 +1890,7 @@ int main (int argc, char **argv) {
 // CHECK11:       omp.loop.exit:
 // CHECK11-NEXT:    [[TMP33:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK11-NEXT:    [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4
-// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP34]])
+// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP34]])
 // CHECK11-NEXT:    [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK11-NEXT:    [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0
 // CHECK11-NEXT:    br i1 [[TMP36]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -2134,7 +2134,7 @@ int main (int argc, char **argv) {
 // CHECK11:       omp.inner.for.end:
 // CHECK11-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK11:       omp.loop.exit:
-// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
 // CHECK11-NEXT:    [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK11-NEXT:    [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0
 // CHECK11-NEXT:    br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_simd_dist_schedule_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_simd_dist_schedule_codegen.cpp
index c2226bc9e75d..d3777e81047c 100644
--- a/clang/test/OpenMP/teams_distribute_parallel_for_simd_dist_schedule_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_parallel_for_simd_dist_schedule_codegen.cpp
@@ -461,7 +461,7 @@ int main (int argc, char **argv) {
 // CHECK1:       omp.inner.for.end:
 // CHECK1-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK1:       omp.loop.exit:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
 // CHECK1-NEXT:    [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK1-NEXT:    [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0
 // CHECK1-NEXT:    br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -626,7 +626,7 @@ int main (int argc, char **argv) {
 // CHECK1:       omp.inner.for.end:
 // CHECK1-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK1:       omp.loop.exit:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
 // CHECK1-NEXT:    [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK1-NEXT:    [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0
 // CHECK1-NEXT:    br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -811,7 +811,7 @@ int main (int argc, char **argv) {
 // CHECK1:       omp.inner.for.end:
 // CHECK1-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK1:       omp.loop.exit:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
 // CHECK1-NEXT:    [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK1-NEXT:    [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0
 // CHECK1-NEXT:    br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -1132,7 +1132,7 @@ int main (int argc, char **argv) {
 // CHECK3:       omp.inner.for.end:
 // CHECK3-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK3:       omp.loop.exit:
-// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
 // CHECK3-NEXT:    [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK3-NEXT:    [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0
 // CHECK3-NEXT:    br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -1292,7 +1292,7 @@ int main (int argc, char **argv) {
 // CHECK3:       omp.inner.for.end:
 // CHECK3-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK3:       omp.loop.exit:
-// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
 // CHECK3-NEXT:    [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK3-NEXT:    [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0
 // CHECK3-NEXT:    br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -1472,7 +1472,7 @@ int main (int argc, char **argv) {
 // CHECK3:       omp.inner.for.end:
 // CHECK3-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK3:       omp.loop.exit:
-// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
 // CHECK3-NEXT:    [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK3-NEXT:    [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0
 // CHECK3-NEXT:    br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -2227,7 +2227,7 @@ int main (int argc, char **argv) {
 // CHECK9:       omp.loop.exit:
 // CHECK9-NEXT:    [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK9-NEXT:    [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4
-// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]])
+// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP22]])
 // CHECK9-NEXT:    [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK9-NEXT:    [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0
 // CHECK9-NEXT:    br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -2462,7 +2462,7 @@ int main (int argc, char **argv) {
 // CHECK9:       omp.loop.exit:
 // CHECK9-NEXT:    [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK9-NEXT:    [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4
-// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]])
+// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP22]])
 // CHECK9-NEXT:    [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK9-NEXT:    [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0
 // CHECK9-NEXT:    br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -2739,7 +2739,7 @@ int main (int argc, char **argv) {
 // CHECK9:       omp.loop.exit:
 // CHECK9-NEXT:    [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK9-NEXT:    [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4
-// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]])
+// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP22]])
 // CHECK9-NEXT:    [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK9-NEXT:    [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0
 // CHECK9-NEXT:    br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -3069,7 +3069,7 @@ int main (int argc, char **argv) {
 // CHECK9:       omp.inner.for.end:
 // CHECK9-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK9:       omp.loop.exit:
-// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
 // CHECK9-NEXT:    [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK9-NEXT:    [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0
 // CHECK9-NEXT:    br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -3233,7 +3233,7 @@ int main (int argc, char **argv) {
 // CHECK9:       omp.inner.for.end:
 // CHECK9-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK9:       omp.loop.exit:
-// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
 // CHECK9-NEXT:    [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK9-NEXT:    [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0
 // CHECK9-NEXT:    br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -3435,7 +3435,7 @@ int main (int argc, char **argv) {
 // CHECK9:       omp.inner.for.end:
 // CHECK9-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK9:       omp.loop.exit:
-// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
 // CHECK9-NEXT:    [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK9-NEXT:    [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0
 // CHECK9-NEXT:    br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -3940,7 +3940,7 @@ int main (int argc, char **argv) {
 // CHECK11:       omp.loop.exit:
 // CHECK11-NEXT:    [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK11-NEXT:    [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4
-// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]])
+// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP22]])
 // CHECK11-NEXT:    [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK11-NEXT:    [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0
 // CHECK11-NEXT:    br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -4170,7 +4170,7 @@ int main (int argc, char **argv) {
 // CHECK11:       omp.loop.exit:
 // CHECK11-NEXT:    [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK11-NEXT:    [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4
-// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]])
+// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP22]])
 // CHECK11-NEXT:    [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK11-NEXT:    [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0
 // CHECK11-NEXT:    br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -4442,7 +4442,7 @@ int main (int argc, char **argv) {
 // CHECK11:       omp.loop.exit:
 // CHECK11-NEXT:    [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK11-NEXT:    [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4
-// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]])
+// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP22]])
 // CHECK11-NEXT:    [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK11-NEXT:    [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0
 // CHECK11-NEXT:    br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -4767,7 +4767,7 @@ int main (int argc, char **argv) {
 // CHECK11:       omp.inner.for.end:
 // CHECK11-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK11:       omp.loop.exit:
-// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
 // CHECK11-NEXT:    [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK11-NEXT:    [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0
 // CHECK11-NEXT:    br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -4926,7 +4926,7 @@ int main (int argc, char **argv) {
 // CHECK11:       omp.inner.for.end:
 // CHECK11-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK11:       omp.loop.exit:
-// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
 // CHECK11-NEXT:    [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK11-NEXT:    [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0
 // CHECK11-NEXT:    br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -5123,7 +5123,7 @@ int main (int argc, char **argv) {
 // CHECK11:       omp.inner.for.end:
 // CHECK11-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK11:       omp.loop.exit:
-// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
 // CHECK11-NEXT:    [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK11-NEXT:    [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0
 // CHECK11-NEXT:    br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_simd_firstprivate_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_simd_firstprivate_codegen.cpp
index 4c9299e6cb9d..4f87d40e58f6 100644
--- a/clang/test/OpenMP/teams_distribute_parallel_for_simd_firstprivate_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_parallel_for_simd_firstprivate_codegen.cpp
@@ -689,7 +689,7 @@ int main() {
 // CHECK1:       omp.loop.exit:
 // CHECK1-NEXT:    [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK1-NEXT:    [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP21]])
 // CHECK1-NEXT:    [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK1-NEXT:    [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0
 // CHECK1-NEXT:    br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -1172,7 +1172,7 @@ int main() {
 // CHECK1:       omp.loop.exit:
 // CHECK1-NEXT:    [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK1-NEXT:    [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP21]])
 // CHECK1-NEXT:    [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK1-NEXT:    [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0
 // CHECK1-NEXT:    br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -1775,7 +1775,7 @@ int main() {
 // CHECK3:       omp.loop.exit:
 // CHECK3-NEXT:    [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK3-NEXT:    [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4
-// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]])
+// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP21]])
 // CHECK3-NEXT:    [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK3-NEXT:    [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0
 // CHECK3-NEXT:    br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -2252,7 +2252,7 @@ int main() {
 // CHECK3:       omp.loop.exit:
 // CHECK3-NEXT:    [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK3-NEXT:    [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4
-// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]])
+// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP21]])
 // CHECK3-NEXT:    [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK3-NEXT:    [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0
 // CHECK3-NEXT:    br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -3323,7 +3323,7 @@ int main() {
 // CHECK9:       omp.inner.for.end:
 // CHECK9-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK9:       omp.loop.exit:
-// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK9-NEXT:    [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK9-NEXT:    [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0
 // CHECK9-NEXT:    br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_simd_if_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_simd_if_codegen.cpp
index 275058b195e3..7c86b180ec67 100644
--- a/clang/test/OpenMP/teams_distribute_parallel_for_simd_if_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_parallel_for_simd_if_codegen.cpp
@@ -326,7 +326,7 @@ int main() {
 // CHECK1:       omp.inner.for.end:
 // CHECK1-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK1:       omp.loop.exit:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK1-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK1-NEXT:    [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
 // CHECK1-NEXT:    br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -483,7 +483,7 @@ int main() {
 // CHECK1:       omp.inner.for.end:
 // CHECK1-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK1:       omp.loop.exit:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK1-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK1-NEXT:    [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
 // CHECK1-NEXT:    br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -774,7 +774,7 @@ int main() {
 // CHECK1:       omp.inner.for.end:
 // CHECK1-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK1:       omp.loop.exit:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK1-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK1-NEXT:    [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
 // CHECK1-NEXT:    br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -931,7 +931,7 @@ int main() {
 // CHECK1:       omp.inner.for.end:
 // CHECK1-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK1:       omp.loop.exit:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK1-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK1-NEXT:    [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
 // CHECK1-NEXT:    br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -1112,7 +1112,7 @@ int main() {
 // CHECK1:       omp.inner.for.end:
 // CHECK1-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK1:       omp.loop.exit:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK1-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK1-NEXT:    [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
 // CHECK1-NEXT:    br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -1401,7 +1401,7 @@ int main() {
 // CHECK1:       omp.inner.for.end:
 // CHECK1-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK1:       omp.loop.exit:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK1-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK1-NEXT:    [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
 // CHECK1-NEXT:    br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -1558,7 +1558,7 @@ int main() {
 // CHECK1:       omp.inner.for.end:
 // CHECK1-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK1:       omp.loop.exit:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK1-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK1-NEXT:    [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
 // CHECK1-NEXT:    br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -1739,7 +1739,7 @@ int main() {
 // CHECK1:       omp.inner.for.end:
 // CHECK1-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK1:       omp.loop.exit:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK1-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK1-NEXT:    [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
 // CHECK1-NEXT:    br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -1966,7 +1966,7 @@ int main() {
 // CHECK3:       omp.inner.for.end:
 // CHECK3-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK3:       omp.loop.exit:
-// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK3-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK3-NEXT:    [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
 // CHECK3-NEXT:    br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -2123,7 +2123,7 @@ int main() {
 // CHECK3:       omp.inner.for.end:
 // CHECK3-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK3:       omp.loop.exit:
-// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK3-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK3-NEXT:    [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
 // CHECK3-NEXT:    br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -2414,7 +2414,7 @@ int main() {
 // CHECK3:       omp.inner.for.end:
 // CHECK3-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK3:       omp.loop.exit:
-// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK3-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK3-NEXT:    [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
 // CHECK3-NEXT:    br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -2571,7 +2571,7 @@ int main() {
 // CHECK3:       omp.inner.for.end:
 // CHECK3-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK3:       omp.loop.exit:
-// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK3-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK3-NEXT:    [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
 // CHECK3-NEXT:    br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -2855,7 +2855,7 @@ int main() {
 // CHECK3:       omp.loop.exit:
 // CHECK3-NEXT:    [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK3-NEXT:    [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4
-// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]])
+// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP22]])
 // CHECK3-NEXT:    [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK3-NEXT:    [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0
 // CHECK3-NEXT:    br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -2982,7 +2982,7 @@ int main() {
 // CHECK3:       omp.loop.exit:
 // CHECK3-NEXT:    [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK3-NEXT:    [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4
-// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]])
+// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP22]])
 // CHECK3-NEXT:    [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK3-NEXT:    [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0
 // CHECK3-NEXT:    br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -3271,7 +3271,7 @@ int main() {
 // CHECK3:       omp.inner.for.end:
 // CHECK3-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK3:       omp.loop.exit:
-// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK3-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK3-NEXT:    [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
 // CHECK3-NEXT:    br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -3428,7 +3428,7 @@ int main() {
 // CHECK3:       omp.inner.for.end:
 // CHECK3-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK3:       omp.loop.exit:
-// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK3-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK3-NEXT:    [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
 // CHECK3-NEXT:    br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -3609,7 +3609,7 @@ int main() {
 // CHECK3:       omp.inner.for.end:
 // CHECK3-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK3:       omp.loop.exit:
-// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK3-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK3-NEXT:    [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
 // CHECK3-NEXT:    br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -4427,7 +4427,7 @@ int main() {
 // CHECK9:       omp.inner.for.end:
 // CHECK9-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK9:       omp.loop.exit:
-// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK9-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK9-NEXT:    [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
 // CHECK9-NEXT:    br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -4584,7 +4584,7 @@ int main() {
 // CHECK9:       omp.inner.for.end:
 // CHECK9-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK9:       omp.loop.exit:
-// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK9-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK9-NEXT:    [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
 // CHECK9-NEXT:    br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -4875,7 +4875,7 @@ int main() {
 // CHECK9:       omp.inner.for.end:
 // CHECK9-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK9:       omp.loop.exit:
-// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK9-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK9-NEXT:    [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
 // CHECK9-NEXT:    br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -5032,7 +5032,7 @@ int main() {
 // CHECK9:       omp.inner.for.end:
 // CHECK9-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK9:       omp.loop.exit:
-// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK9-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK9-NEXT:    [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
 // CHECK9-NEXT:    br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -5213,7 +5213,7 @@ int main() {
 // CHECK9:       omp.inner.for.end:
 // CHECK9-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK9:       omp.loop.exit:
-// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK9-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK9-NEXT:    [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
 // CHECK9-NEXT:    br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -5502,7 +5502,7 @@ int main() {
 // CHECK9:       omp.inner.for.end:
 // CHECK9-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK9:       omp.loop.exit:
-// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK9-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK9-NEXT:    [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
 // CHECK9-NEXT:    br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -5659,7 +5659,7 @@ int main() {
 // CHECK9:       omp.inner.for.end:
 // CHECK9-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK9:       omp.loop.exit:
-// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK9-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK9-NEXT:    [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
 // CHECK9-NEXT:    br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -5840,7 +5840,7 @@ int main() {
 // CHECK9:       omp.inner.for.end:
 // CHECK9-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK9:       omp.loop.exit:
-// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK9-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK9-NEXT:    [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
 // CHECK9-NEXT:    br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -6067,7 +6067,7 @@ int main() {
 // CHECK11:       omp.inner.for.end:
 // CHECK11-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK11:       omp.loop.exit:
-// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK11-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK11-NEXT:    [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
 // CHECK11-NEXT:    br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -6224,7 +6224,7 @@ int main() {
 // CHECK11:       omp.inner.for.end:
 // CHECK11-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK11:       omp.loop.exit:
-// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK11-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK11-NEXT:    [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
 // CHECK11-NEXT:    br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -6515,7 +6515,7 @@ int main() {
 // CHECK11:       omp.inner.for.end:
 // CHECK11-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK11:       omp.loop.exit:
-// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK11-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK11-NEXT:    [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
 // CHECK11-NEXT:    br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -6672,7 +6672,7 @@ int main() {
 // CHECK11:       omp.inner.for.end:
 // CHECK11-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK11:       omp.loop.exit:
-// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK11-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK11-NEXT:    [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
 // CHECK11-NEXT:    br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -6956,7 +6956,7 @@ int main() {
 // CHECK11:       omp.loop.exit:
 // CHECK11-NEXT:    [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK11-NEXT:    [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4
-// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]])
+// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP22]])
 // CHECK11-NEXT:    [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK11-NEXT:    [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0
 // CHECK11-NEXT:    br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -7083,7 +7083,7 @@ int main() {
 // CHECK11:       omp.loop.exit:
 // CHECK11-NEXT:    [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK11-NEXT:    [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4
-// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]])
+// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP22]])
 // CHECK11-NEXT:    [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK11-NEXT:    [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0
 // CHECK11-NEXT:    br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -7372,7 +7372,7 @@ int main() {
 // CHECK11:       omp.inner.for.end:
 // CHECK11-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK11:       omp.loop.exit:
-// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK11-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK11-NEXT:    [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
 // CHECK11-NEXT:    br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -7529,7 +7529,7 @@ int main() {
 // CHECK11:       omp.inner.for.end:
 // CHECK11-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK11:       omp.loop.exit:
-// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK11-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK11-NEXT:    [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
 // CHECK11-NEXT:    br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -7710,7 +7710,7 @@ int main() {
 // CHECK11:       omp.inner.for.end:
 // CHECK11-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK11:       omp.loop.exit:
-// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK11-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK11-NEXT:    [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
 // CHECK11-NEXT:    br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_simd_lastprivate_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_simd_lastprivate_codegen.cpp
index fe2001842c26..268298c14801 100644
--- a/clang/test/OpenMP/teams_distribute_parallel_for_simd_lastprivate_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_parallel_for_simd_lastprivate_codegen.cpp
@@ -427,7 +427,7 @@ int main() {
 // CHECK1:       omp.inner.for.end:
 // CHECK1-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK1:       omp.loop.exit:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP8]])
 // CHECK1-NEXT:    [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK1-NEXT:    [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0
 // CHECK1-NEXT:    br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -703,7 +703,7 @@ int main() {
 // CHECK3:       omp.inner.for.end:
 // CHECK3-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK3:       omp.loop.exit:
-// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]])
+// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP8]])
 // CHECK3-NEXT:    [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK3-NEXT:    [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0
 // CHECK3-NEXT:    br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -1195,7 +1195,7 @@ int main() {
 // CHECK9:       omp.loop.exit:
 // CHECK9-NEXT:    [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK9-NEXT:    [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4
-// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]])
+// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP22]])
 // CHECK9-NEXT:    [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK9-NEXT:    [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0
 // CHECK9-NEXT:    br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -1689,7 +1689,7 @@ int main() {
 // CHECK9:       omp.loop.exit:
 // CHECK9-NEXT:    [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK9-NEXT:    [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4
-// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]])
+// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP21]])
 // CHECK9-NEXT:    [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK9-NEXT:    [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0
 // CHECK9-NEXT:    br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -2203,7 +2203,7 @@ int main() {
 // CHECK11:       omp.loop.exit:
 // CHECK11-NEXT:    [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK11-NEXT:    [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4
-// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]])
+// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP22]])
 // CHECK11-NEXT:    [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK11-NEXT:    [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0
 // CHECK11-NEXT:    br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -2691,7 +2691,7 @@ int main() {
 // CHECK11:       omp.loop.exit:
 // CHECK11-NEXT:    [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK11-NEXT:    [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4
-// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]])
+// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP21]])
 // CHECK11-NEXT:    [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK11-NEXT:    [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0
 // CHECK11-NEXT:    br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_simd_num_threads_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_simd_num_threads_codegen.cpp
index 49f57a000d3f..fa8bcf65b8fb 100644
--- a/clang/test/OpenMP/teams_distribute_parallel_for_simd_num_threads_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_parallel_for_simd_num_threads_codegen.cpp
@@ -380,7 +380,7 @@ int main() {
 // CHECK1:       omp.inner.for.end:
 // CHECK1-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK1:       omp.loop.exit:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK1-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK1-NEXT:    [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
 // CHECK1-NEXT:    br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -561,7 +561,7 @@ int main() {
 // CHECK1:       omp.inner.for.end:
 // CHECK1-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK1:       omp.loop.exit:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK1-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK1-NEXT:    [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
 // CHECK1-NEXT:    br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -924,7 +924,7 @@ int main() {
 // CHECK1:       omp.inner.for.end:
 // CHECK1-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK1:       omp.loop.exit:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK1-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK1-NEXT:    [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
 // CHECK1-NEXT:    br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -1085,7 +1085,7 @@ int main() {
 // CHECK1:       omp.inner.for.end:
 // CHECK1-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK1:       omp.loop.exit:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK1-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK1-NEXT:    [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
 // CHECK1-NEXT:    br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -1246,7 +1246,7 @@ int main() {
 // CHECK1:       omp.inner.for.end:
 // CHECK1-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK1:       omp.loop.exit:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK1-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK1-NEXT:    [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
 // CHECK1-NEXT:    br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -1429,7 +1429,7 @@ int main() {
 // CHECK1:       omp.inner.for.end:
 // CHECK1-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK1:       omp.loop.exit:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK1-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK1-NEXT:    [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
 // CHECK1-NEXT:    br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -2100,7 +2100,7 @@ int main() {
 // CHECK5:       omp.inner.for.end:
 // CHECK5-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK5:       omp.loop.exit:
-// CHECK5-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK5-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK5-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK5-NEXT:    [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
 // CHECK5-NEXT:    br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -2281,7 +2281,7 @@ int main() {
 // CHECK5:       omp.inner.for.end:
 // CHECK5-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK5:       omp.loop.exit:
-// CHECK5-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK5-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK5-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK5-NEXT:    [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
 // CHECK5-NEXT:    br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -2635,7 +2635,7 @@ int main() {
 // CHECK5:       omp.inner.for.end:
 // CHECK5-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK5:       omp.loop.exit:
-// CHECK5-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK5-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK5-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK5-NEXT:    [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
 // CHECK5-NEXT:    br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -2796,7 +2796,7 @@ int main() {
 // CHECK5:       omp.inner.for.end:
 // CHECK5-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK5:       omp.loop.exit:
-// CHECK5-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK5-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK5-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK5-NEXT:    [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
 // CHECK5-NEXT:    br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -2957,7 +2957,7 @@ int main() {
 // CHECK5:       omp.inner.for.end:
 // CHECK5-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK5:       omp.loop.exit:
-// CHECK5-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK5-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK5-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK5-NEXT:    [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
 // CHECK5-NEXT:    br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -3140,7 +3140,7 @@ int main() {
 // CHECK5:       omp.inner.for.end:
 // CHECK5-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK5:       omp.loop.exit:
-// CHECK5-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK5-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK5-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK5-NEXT:    [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
 // CHECK5-NEXT:    br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_simd_private_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_simd_private_codegen.cpp
index 7721daf4aa32..03bee1dbb63c 100644
--- a/clang/test/OpenMP/teams_distribute_parallel_for_simd_private_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_parallel_for_simd_private_codegen.cpp
@@ -505,7 +505,7 @@ int main() {
 // CHECK1:       omp.loop.exit:
 // CHECK1-NEXT:    [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK1-NEXT:    [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP17]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP17]])
 // CHECK1-NEXT:    [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK1-NEXT:    [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0
 // CHECK1-NEXT:    br i1 [[TMP19]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -827,7 +827,7 @@ int main() {
 // CHECK1:       omp.loop.exit:
 // CHECK1-NEXT:    [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK1-NEXT:    [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP16]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP16]])
 // CHECK1-NEXT:    [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK1-NEXT:    [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0
 // CHECK1-NEXT:    br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -1260,7 +1260,7 @@ int main() {
 // CHECK3:       omp.loop.exit:
 // CHECK3-NEXT:    [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK3-NEXT:    [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4
-// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP17]])
+// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP17]])
 // CHECK3-NEXT:    [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK3-NEXT:    [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0
 // CHECK3-NEXT:    br i1 [[TMP19]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -1576,7 +1576,7 @@ int main() {
 // CHECK3:       omp.loop.exit:
 // CHECK3-NEXT:    [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK3-NEXT:    [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4
-// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP16]])
+// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP16]])
 // CHECK3-NEXT:    [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK3-NEXT:    [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0
 // CHECK3-NEXT:    br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -2700,7 +2700,7 @@ int main() {
 // CHECK9:       omp.inner.for.end:
 // CHECK9-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK9:       omp.loop.exit:
-// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK9-NEXT:    [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK9-NEXT:    [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0
 // CHECK9-NEXT:    br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_simd_proc_bind_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_simd_proc_bind_codegen.cpp
index 2a3abf176929..7d35ea305f92 100644
--- a/clang/test/OpenMP/teams_distribute_parallel_for_simd_proc_bind_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_parallel_for_simd_proc_bind_codegen.cpp
@@ -272,7 +272,7 @@ int main() {
 // CHECK1:       omp.inner.for.end:
 // CHECK1-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK1:       omp.loop.exit:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK1-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK1-NEXT:    [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
 // CHECK1-NEXT:    br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -424,7 +424,7 @@ int main() {
 // CHECK1:       omp.inner.for.end:
 // CHECK1-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK1:       omp.loop.exit:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK1-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK1-NEXT:    [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
 // CHECK1-NEXT:    br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -617,7 +617,7 @@ int main() {
 // CHECK1:       omp.inner.for.end:
 // CHECK1-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK1:       omp.loop.exit:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK1-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK1-NEXT:    [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
 // CHECK1-NEXT:    br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_simd_reduction_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_simd_reduction_codegen.cpp
index 8745dd9710f6..2dd1db48f030 100644
--- a/clang/test/OpenMP/teams_distribute_parallel_for_simd_reduction_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_parallel_for_simd_reduction_codegen.cpp
@@ -333,7 +333,7 @@ int main() {
 // CHECK1:       omp.inner.for.end:
 // CHECK1-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK1:       omp.loop.exit:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
 // CHECK1-NEXT:    [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK1-NEXT:    [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0
 // CHECK1-NEXT:    br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -640,7 +640,7 @@ int main() {
 // CHECK1:       omp.inner.for.end:
 // CHECK1-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK1:       omp.loop.exit:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
 // CHECK1-NEXT:    [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK1-NEXT:    [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0
 // CHECK1-NEXT:    br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -942,7 +942,7 @@ int main() {
 // CHECK3:       omp.inner.for.end:
 // CHECK3-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK3:       omp.loop.exit:
-// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
 // CHECK3-NEXT:    [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK3-NEXT:    [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0
 // CHECK3-NEXT:    br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -1245,7 +1245,7 @@ int main() {
 // CHECK3:       omp.inner.for.end:
 // CHECK3-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK3:       omp.loop.exit:
-// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
 // CHECK3-NEXT:    [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK3-NEXT:    [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0
 // CHECK3-NEXT:    br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -1704,7 +1704,7 @@ int main() {
 // CHECK9:       omp.inner.for.end:
 // CHECK9-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK9:       omp.loop.exit:
-// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
 // CHECK9-NEXT:    [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK9-NEXT:    [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0
 // CHECK9-NEXT:    br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_simd_schedule_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_simd_schedule_codegen.cpp
index 0c0945a23d48..ff70e71e0126 100644
--- a/clang/test/OpenMP/teams_distribute_parallel_for_simd_schedule_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_parallel_for_simd_schedule_codegen.cpp
@@ -627,7 +627,7 @@ int main (int argc, char **argv) {
 // CHECK1:       omp.inner.for.end:
 // CHECK1-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK1:       omp.loop.exit:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
 // CHECK1-NEXT:    [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK1-NEXT:    [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0
 // CHECK1-NEXT:    br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -792,7 +792,7 @@ int main (int argc, char **argv) {
 // CHECK1:       omp.inner.for.end:
 // CHECK1-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK1:       omp.loop.exit:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
 // CHECK1-NEXT:    [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK1-NEXT:    [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0
 // CHECK1-NEXT:    br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -978,7 +978,7 @@ int main (int argc, char **argv) {
 // CHECK1-NEXT:    store i32 [[ADD8]], ptr [[DOTOMP_UB]], align 4
 // CHECK1-NEXT:    br label [[OMP_DISPATCH_COND]]
 // CHECK1:       omp.dispatch.end:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
 // CHECK1-NEXT:    [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK1-NEXT:    [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0
 // CHECK1-NEXT:    br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -1724,7 +1724,7 @@ int main (int argc, char **argv) {
 // CHECK2:       omp.inner.for.end:
 // CHECK2-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK2:       omp.loop.exit:
-// CHECK2-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK2-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
 // CHECK2-NEXT:    [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK2-NEXT:    [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0
 // CHECK2-NEXT:    br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -1889,7 +1889,7 @@ int main (int argc, char **argv) {
 // CHECK2:       omp.inner.for.end:
 // CHECK2-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK2:       omp.loop.exit:
-// CHECK2-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK2-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
 // CHECK2-NEXT:    [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK2-NEXT:    [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0
 // CHECK2-NEXT:    br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -2075,7 +2075,7 @@ int main (int argc, char **argv) {
 // CHECK2-NEXT:    store i32 [[ADD8]], ptr [[DOTOMP_UB]], align 4
 // CHECK2-NEXT:    br label [[OMP_DISPATCH_COND]]
 // CHECK2:       omp.dispatch.end:
-// CHECK2-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK2-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
 // CHECK2-NEXT:    [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK2-NEXT:    [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0
 // CHECK2-NEXT:    br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -2816,7 +2816,7 @@ int main (int argc, char **argv) {
 // CHECK5:       omp.inner.for.end:
 // CHECK5-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK5:       omp.loop.exit:
-// CHECK5-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK5-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
 // CHECK5-NEXT:    [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK5-NEXT:    [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0
 // CHECK5-NEXT:    br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -2976,7 +2976,7 @@ int main (int argc, char **argv) {
 // CHECK5:       omp.inner.for.end:
 // CHECK5-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK5:       omp.loop.exit:
-// CHECK5-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK5-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
 // CHECK5-NEXT:    [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK5-NEXT:    [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0
 // CHECK5-NEXT:    br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -3155,7 +3155,7 @@ int main (int argc, char **argv) {
 // CHECK5-NEXT:    store i32 [[ADD5]], ptr [[DOTOMP_UB]], align 4
 // CHECK5-NEXT:    br label [[OMP_DISPATCH_COND]]
 // CHECK5:       omp.dispatch.end:
-// CHECK5-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK5-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
 // CHECK5-NEXT:    [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK5-NEXT:    [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0
 // CHECK5-NEXT:    br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -3886,7 +3886,7 @@ int main (int argc, char **argv) {
 // CHECK6:       omp.inner.for.end:
 // CHECK6-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK6:       omp.loop.exit:
-// CHECK6-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK6-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
 // CHECK6-NEXT:    [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK6-NEXT:    [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0
 // CHECK6-NEXT:    br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -4046,7 +4046,7 @@ int main (int argc, char **argv) {
 // CHECK6:       omp.inner.for.end:
 // CHECK6-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK6:       omp.loop.exit:
-// CHECK6-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK6-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
 // CHECK6-NEXT:    [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK6-NEXT:    [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0
 // CHECK6-NEXT:    br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -4225,7 +4225,7 @@ int main (int argc, char **argv) {
 // CHECK6-NEXT:    store i32 [[ADD5]], ptr [[DOTOMP_UB]], align 4
 // CHECK6-NEXT:    br label [[OMP_DISPATCH_COND]]
 // CHECK6:       omp.dispatch.end:
-// CHECK6-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK6-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
 // CHECK6-NEXT:    [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK6-NEXT:    [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0
 // CHECK6-NEXT:    br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -5604,7 +5604,7 @@ int main (int argc, char **argv) {
 // CHECK13:       omp.loop.exit:
 // CHECK13-NEXT:    [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK13-NEXT:    [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4
-// CHECK13-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]])
+// CHECK13-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP22]])
 // CHECK13-NEXT:    [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK13-NEXT:    [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0
 // CHECK13-NEXT:    br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -5839,7 +5839,7 @@ int main (int argc, char **argv) {
 // CHECK13:       omp.loop.exit:
 // CHECK13-NEXT:    [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK13-NEXT:    [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4
-// CHECK13-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]])
+// CHECK13-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP22]])
 // CHECK13-NEXT:    [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK13-NEXT:    [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0
 // CHECK13-NEXT:    br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -6116,7 +6116,7 @@ int main (int argc, char **argv) {
 // CHECK13:       omp.loop.exit:
 // CHECK13-NEXT:    [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK13-NEXT:    [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4
-// CHECK13-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]])
+// CHECK13-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP22]])
 // CHECK13-NEXT:    [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK13-NEXT:    [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0
 // CHECK13-NEXT:    br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -7028,7 +7028,7 @@ int main (int argc, char **argv) {
 // CHECK13:       omp.inner.for.end:
 // CHECK13-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK13:       omp.loop.exit:
-// CHECK13-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK13-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
 // CHECK13-NEXT:    [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK13-NEXT:    [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0
 // CHECK13-NEXT:    br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -7192,7 +7192,7 @@ int main (int argc, char **argv) {
 // CHECK13:       omp.inner.for.end:
 // CHECK13-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK13:       omp.loop.exit:
-// CHECK13-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK13-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
 // CHECK13-NEXT:    [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK13-NEXT:    [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0
 // CHECK13-NEXT:    br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -7395,7 +7395,7 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    store i32 [[ADD8]], ptr [[DOTOMP_UB]], align 4
 // CHECK13-NEXT:    br label [[OMP_DISPATCH_COND]]
 // CHECK13:       omp.dispatch.end:
-// CHECK13-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]])
+// CHECK13-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP5]])
 // CHECK13-NEXT:    [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK13-NEXT:    [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0
 // CHECK13-NEXT:    br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -8415,7 +8415,7 @@ int main (int argc, char **argv) {
 // CHECK14:       omp.loop.exit:
 // CHECK14-NEXT:    [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK14-NEXT:    [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4
-// CHECK14-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]])
+// CHECK14-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP22]])
 // CHECK14-NEXT:    [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK14-NEXT:    [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0
 // CHECK14-NEXT:    br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -8650,7 +8650,7 @@ int main (int argc, char **argv) {
 // CHECK14:       omp.loop.exit:
 // CHECK14-NEXT:    [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK14-NEXT:    [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4
-// CHECK14-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]])
+// CHECK14-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP22]])
 // CHECK14-NEXT:    [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK14-NEXT:    [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0
 // CHECK14-NEXT:    br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -8927,7 +8927,7 @@ int main (int argc, char **argv) {
 // CHECK14:       omp.loop.exit:
 // CHECK14-NEXT:    [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK14-NEXT:    [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4
-// CHECK14-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]])
+// CHECK14-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP22]])
 // CHECK14-NEXT:    [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK14-NEXT:    [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0
 // CHECK14-NEXT:    br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -9839,7 +9839,7 @@ int main (int argc, char **argv) {
 // CHECK14:       omp.inner.for.end:
 // CHECK14-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK14:       omp.loop.exit:
-// CHECK14-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK14-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
 // CHECK14-NEXT:    [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK14-NEXT:    [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0
 // CHECK14-NEXT:    br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -10003,7 +10003,7 @@ int main (int argc, char **argv) {
 // CHECK14:       omp.inner.for.end:
 // CHECK14-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK14:       omp.loop.exit:
-// CHECK14-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK14-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
 // CHECK14-NEXT:    [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK14-NEXT:    [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0
 // CHECK14-NEXT:    br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -10206,7 +10206,7 @@ int main (int argc, char **argv) {
 // CHECK14-NEXT:    store i32 [[ADD8]], ptr [[DOTOMP_UB]], align 4
 // CHECK14-NEXT:    br label [[OMP_DISPATCH_COND]]
 // CHECK14:       omp.dispatch.end:
-// CHECK14-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]])
+// CHECK14-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP5]])
 // CHECK14-NEXT:    [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK14-NEXT:    [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0
 // CHECK14-NEXT:    br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -11225,7 +11225,7 @@ int main (int argc, char **argv) {
 // CHECK17:       omp.loop.exit:
 // CHECK17-NEXT:    [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK17-NEXT:    [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4
-// CHECK17-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]])
+// CHECK17-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP22]])
 // CHECK17-NEXT:    [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK17-NEXT:    [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0
 // CHECK17-NEXT:    br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -11455,7 +11455,7 @@ int main (int argc, char **argv) {
 // CHECK17:       omp.loop.exit:
 // CHECK17-NEXT:    [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK17-NEXT:    [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4
-// CHECK17-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]])
+// CHECK17-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP22]])
 // CHECK17-NEXT:    [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK17-NEXT:    [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0
 // CHECK17-NEXT:    br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -11727,7 +11727,7 @@ int main (int argc, char **argv) {
 // CHECK17:       omp.loop.exit:
 // CHECK17-NEXT:    [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK17-NEXT:    [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4
-// CHECK17-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]])
+// CHECK17-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP22]])
 // CHECK17-NEXT:    [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK17-NEXT:    [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0
 // CHECK17-NEXT:    br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -12624,7 +12624,7 @@ int main (int argc, char **argv) {
 // CHECK17:       omp.inner.for.end:
 // CHECK17-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK17:       omp.loop.exit:
-// CHECK17-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK17-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
 // CHECK17-NEXT:    [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK17-NEXT:    [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0
 // CHECK17-NEXT:    br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -12783,7 +12783,7 @@ int main (int argc, char **argv) {
 // CHECK17:       omp.inner.for.end:
 // CHECK17-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK17:       omp.loop.exit:
-// CHECK17-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK17-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
 // CHECK17-NEXT:    [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK17-NEXT:    [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0
 // CHECK17-NEXT:    br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -12979,7 +12979,7 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    store i32 [[ADD5]], ptr [[DOTOMP_UB]], align 4
 // CHECK17-NEXT:    br label [[OMP_DISPATCH_COND]]
 // CHECK17:       omp.dispatch.end:
-// CHECK17-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]])
+// CHECK17-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP5]])
 // CHECK17-NEXT:    [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK17-NEXT:    [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0
 // CHECK17-NEXT:    br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -13988,7 +13988,7 @@ int main (int argc, char **argv) {
 // CHECK19:       omp.loop.exit:
 // CHECK19-NEXT:    [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK19-NEXT:    [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4
-// CHECK19-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]])
+// CHECK19-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP22]])
 // CHECK19-NEXT:    [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK19-NEXT:    [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0
 // CHECK19-NEXT:    br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -14218,7 +14218,7 @@ int main (int argc, char **argv) {
 // CHECK19:       omp.loop.exit:
 // CHECK19-NEXT:    [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK19-NEXT:    [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4
-// CHECK19-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]])
+// CHECK19-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP22]])
 // CHECK19-NEXT:    [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK19-NEXT:    [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0
 // CHECK19-NEXT:    br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -14490,7 +14490,7 @@ int main (int argc, char **argv) {
 // CHECK19:       omp.loop.exit:
 // CHECK19-NEXT:    [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK19-NEXT:    [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4
-// CHECK19-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]])
+// CHECK19-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP22]])
 // CHECK19-NEXT:    [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK19-NEXT:    [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0
 // CHECK19-NEXT:    br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -15387,7 +15387,7 @@ int main (int argc, char **argv) {
 // CHECK19:       omp.inner.for.end:
 // CHECK19-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK19:       omp.loop.exit:
-// CHECK19-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK19-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
 // CHECK19-NEXT:    [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK19-NEXT:    [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0
 // CHECK19-NEXT:    br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -15546,7 +15546,7 @@ int main (int argc, char **argv) {
 // CHECK19:       omp.inner.for.end:
 // CHECK19-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK19:       omp.loop.exit:
-// CHECK19-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
+// CHECK19-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
 // CHECK19-NEXT:    [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK19-NEXT:    [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0
 // CHECK19-NEXT:    br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
@@ -15742,7 +15742,7 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    store i32 [[ADD5]], ptr [[DOTOMP_UB]], align 4
 // CHECK19-NEXT:    br label [[OMP_DISPATCH_COND]]
 // CHECK19:       omp.dispatch.end:
-// CHECK19-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]])
+// CHECK19-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP5]])
 // CHECK19-NEXT:    [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK19-NEXT:    [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0
 // CHECK19-NEXT:    br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
diff --git a/clang/test/OpenMP/teams_generic_loop_codegen-1.cpp b/clang/test/OpenMP/teams_generic_loop_codegen-1.cpp
index 041a28bd87e7..85a4dfea91a2 100644
--- a/clang/test/OpenMP/teams_generic_loop_codegen-1.cpp
+++ b/clang/test/OpenMP/teams_generic_loop_codegen-1.cpp
@@ -451,7 +451,7 @@ int main (int argc, char **argv) {
 // CHECK1:       omp.loop.exit:
 // CHECK1-NEXT:    [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK1-NEXT:    [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2:[0-9]+]], i32 [[TMP22]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]])
 // CHECK1-NEXT:    br label [[OMP_PRECOND_END]]
 // CHECK1:       omp.precond.end:
 // CHECK1-NEXT:    ret void
@@ -509,7 +509,7 @@ int main (int argc, char **argv) {
 // CHECK1-NEXT:    store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK1-NEXT:    [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK1-NEXT:    [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4
-// CHECK1-NEXT:    call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP9]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
+// CHECK1-NEXT:    call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP9]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
 // CHECK1-NEXT:    [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
 // CHECK1-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
 // CHECK1-NEXT:    [[CMP5:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]]
@@ -653,7 +653,7 @@ int main (int argc, char **argv) {
 // CHECK1:       omp.loop.exit:
 // CHECK1-NEXT:    [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK1-NEXT:    [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP22]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]])
 // CHECK1-NEXT:    br label [[OMP_PRECOND_END]]
 // CHECK1:       omp.precond.end:
 // CHECK1-NEXT:    ret void
@@ -1036,7 +1036,7 @@ int main (int argc, char **argv) {
 // CHECK3:       omp.loop.exit:
 // CHECK3-NEXT:    [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK3-NEXT:    [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4
-// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2:[0-9]+]], i32 [[TMP20]])
+// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP20]])
 // CHECK3-NEXT:    br label [[OMP_PRECOND_END]]
 // CHECK3:       omp.precond.end:
 // CHECK3-NEXT:    ret void
@@ -1092,7 +1092,7 @@ int main (int argc, char **argv) {
 // CHECK3-NEXT:    store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK3-NEXT:    [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK3-NEXT:    [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4
-// CHECK3-NEXT:    call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP9]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
+// CHECK3-NEXT:    call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP9]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
 // CHECK3-NEXT:    [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
 // CHECK3-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
 // CHECK3-NEXT:    [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]]
@@ -1233,7 +1233,7 @@ int main (int argc, char **argv) {
 // CHECK3:       omp.loop.exit:
 // CHECK3-NEXT:    [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK3-NEXT:    [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4
-// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP20]])
+// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP20]])
 // CHECK3-NEXT:    br label [[OMP_PRECOND_END]]
 // CHECK3:       omp.precond.end:
 // CHECK3-NEXT:    ret void
@@ -1538,7 +1538,7 @@ int main (int argc, char **argv) {
 // CHECK9:       omp.loop.exit:
 // CHECK9-NEXT:    [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK9-NEXT:    [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4
-// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2:[0-9]+]], i32 [[TMP23]])
+// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP23]])
 // CHECK9-NEXT:    br label [[OMP_PRECOND_END]]
 // CHECK9:       omp.precond.end:
 // CHECK9-NEXT:    ret void
@@ -1599,7 +1599,7 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK9-NEXT:    [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK9-NEXT:    [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4
-// CHECK9-NEXT:    call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
+// CHECK9-NEXT:    call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
 // CHECK9-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
 // CHECK9-NEXT:    [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
 // CHECK9-NEXT:    [[CMP5:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]]
@@ -1847,7 +1847,7 @@ int main (int argc, char **argv) {
 // CHECK11:       omp.loop.exit:
 // CHECK11-NEXT:    [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK11-NEXT:    [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4
-// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2:[0-9]+]], i32 [[TMP21]])
+// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]])
 // CHECK11-NEXT:    br label [[OMP_PRECOND_END]]
 // CHECK11:       omp.precond.end:
 // CHECK11-NEXT:    ret void
@@ -1906,7 +1906,7 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK11-NEXT:    [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK11-NEXT:    [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4
-// CHECK11-NEXT:    call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
+// CHECK11-NEXT:    call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
 // CHECK11-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
 // CHECK11-NEXT:    [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
 // CHECK11-NEXT:    [[CMP4:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]]
@@ -2091,7 +2091,7 @@ int main (int argc, char **argv) {
 // CHECK17:       omp.inner.for.end:
 // CHECK17-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK17:       omp.loop.exit:
-// CHECK17-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2:[0-9]+]], i32 [[TMP2]])
+// CHECK17-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]])
 // CHECK17-NEXT:    ret void
 //
 //
@@ -2128,7 +2128,7 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK17-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK17-NEXT:    [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4
-// CHECK17-NEXT:    call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
+// CHECK17-NEXT:    call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
 // CHECK17-NEXT:    [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
 // CHECK17-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122
 // CHECK17-NEXT:    br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
@@ -2307,7 +2307,7 @@ int main (int argc, char **argv) {
 // CHECK19:       omp.inner.for.end:
 // CHECK19-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK19:       omp.loop.exit:
-// CHECK19-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2:[0-9]+]], i32 [[TMP2]])
+// CHECK19-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]])
 // CHECK19-NEXT:    ret void
 //
 //
@@ -2342,7 +2342,7 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK19-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK19-NEXT:    [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4
-// CHECK19-NEXT:    call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
+// CHECK19-NEXT:    call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
 // CHECK19-NEXT:    [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
 // CHECK19-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122
 // CHECK19-NEXT:    br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
@@ -2594,7 +2594,7 @@ int main (int argc, char **argv) {
 // CHECK25:       omp.loop.exit:
 // CHECK25-NEXT:    [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK25-NEXT:    [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4
-// CHECK25-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2:[0-9]+]], i32 [[TMP23]])
+// CHECK25-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP23]])
 // CHECK25-NEXT:    br label [[OMP_PRECOND_END]]
 // CHECK25:       omp.precond.end:
 // CHECK25-NEXT:    ret void
@@ -2655,7 +2655,7 @@ int main (int argc, char **argv) {
 // CHECK25-NEXT:    store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK25-NEXT:    [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK25-NEXT:    [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4
-// CHECK25-NEXT:    call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
+// CHECK25-NEXT:    call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
 // CHECK25-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
 // CHECK25-NEXT:    [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
 // CHECK25-NEXT:    [[CMP5:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]]
@@ -2865,7 +2865,7 @@ int main (int argc, char **argv) {
 // CHECK25:       omp.inner.for.end:
 // CHECK25-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK25:       omp.loop.exit:
-// CHECK25-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP2]])
+// CHECK25-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]])
 // CHECK25-NEXT:    ret void
 //
 //
@@ -3152,7 +3152,7 @@ int main (int argc, char **argv) {
 // CHECK27:       omp.loop.exit:
 // CHECK27-NEXT:    [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK27-NEXT:    [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4
-// CHECK27-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2:[0-9]+]], i32 [[TMP21]])
+// CHECK27-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]])
 // CHECK27-NEXT:    br label [[OMP_PRECOND_END]]
 // CHECK27:       omp.precond.end:
 // CHECK27-NEXT:    ret void
@@ -3211,7 +3211,7 @@ int main (int argc, char **argv) {
 // CHECK27-NEXT:    store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK27-NEXT:    [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK27-NEXT:    [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4
-// CHECK27-NEXT:    call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
+// CHECK27-NEXT:    call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
 // CHECK27-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
 // CHECK27-NEXT:    [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
 // CHECK27-NEXT:    [[CMP4:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]]
@@ -3418,7 +3418,7 @@ int main (int argc, char **argv) {
 // CHECK27:       omp.inner.for.end:
 // CHECK27-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK27:       omp.loop.exit:
-// CHECK27-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP2]])
+// CHECK27-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]])
 // CHECK27-NEXT:    ret void
 //
 //
diff --git a/clang/test/OpenMP/teams_generic_loop_codegen.cpp b/clang/test/OpenMP/teams_generic_loop_codegen.cpp
index 2f3e70b1de58..2499fbb6811c 100644
--- a/clang/test/OpenMP/teams_generic_loop_codegen.cpp
+++ b/clang/test/OpenMP/teams_generic_loop_codegen.cpp
@@ -113,7 +113,7 @@ int foo() {
 // IR:       omp.loop.exit:
 // IR-NEXT:    [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // IR-NEXT:    [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4
-// IR-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2:[0-9]+]], i32 [[TMP17]])
+// IR-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP17]])
 // IR-NEXT:    [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // IR-NEXT:    [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0
 // IR-NEXT:    br i1 [[TMP19]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]]
@@ -129,8 +129,8 @@ int foo() {
 // IR-NEXT:    [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4
 // IR-NEXT:    [[TMP24:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3:[0-9]+]], i32 [[TMP23]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_Z3foov.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var)
 // IR-NEXT:    switch i32 [[TMP24]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
-// IR-NEXT:    i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
-// IR-NEXT:    i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
+// IR-NEXT:      i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
+// IR-NEXT:      i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
 // IR-NEXT:    ]
 // IR:       .omp.reduction.case1:
 // IR-NEXT:    [[TMP25:%.*]] = getelementptr i32, ptr [[TMP1]], i64 100
@@ -221,7 +221,7 @@ int foo() {
 // IR:       omp.arrayinit.done:
 // IR-NEXT:    [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // IR-NEXT:    [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4
-// IR-NEXT:    call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP6]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
+// IR-NEXT:    call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP6]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
 // IR-NEXT:    [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
 // IR-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 99
 // IR-NEXT:    br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
@@ -285,8 +285,8 @@ int foo() {
 // IR-NEXT:    [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4
 // IR-NEXT:    [[TMP25:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP24]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_Z3foov.omp_outlined.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var)
 // IR-NEXT:    switch i32 [[TMP25]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
-// IR-NEXT:    i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
-// IR-NEXT:    i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
+// IR-NEXT:      i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
+// IR-NEXT:      i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
 // IR-NEXT:    ]
 // IR:       .omp.reduction.case1:
 // IR-NEXT:    [[TMP26:%.*]] = getelementptr i32, ptr [[TMP1]], i64 100
@@ -486,7 +486,7 @@ int foo() {
 // IR-PCH:       omp.loop.exit:
 // IR-PCH-NEXT:    [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // IR-PCH-NEXT:    [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4
-// IR-PCH-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2:[0-9]+]], i32 [[TMP17]])
+// IR-PCH-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP17]])
 // IR-PCH-NEXT:    [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // IR-PCH-NEXT:    [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0
 // IR-PCH-NEXT:    br i1 [[TMP19]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]]
@@ -502,8 +502,8 @@ int foo() {
 // IR-PCH-NEXT:    [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4
 // IR-PCH-NEXT:    [[TMP24:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3:[0-9]+]], i32 [[TMP23]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_Z3foov.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var)
 // IR-PCH-NEXT:    switch i32 [[TMP24]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
-// IR-PCH-NEXT:    i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
-// IR-PCH-NEXT:    i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
+// IR-PCH-NEXT:      i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
+// IR-PCH-NEXT:      i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
 // IR-PCH-NEXT:    ]
 // IR-PCH:       .omp.reduction.case1:
 // IR-PCH-NEXT:    [[TMP25:%.*]] = getelementptr i32, ptr [[TMP1]], i64 100
@@ -594,7 +594,7 @@ int foo() {
 // IR-PCH:       omp.arrayinit.done:
 // IR-PCH-NEXT:    [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // IR-PCH-NEXT:    [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4
-// IR-PCH-NEXT:    call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP6]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
+// IR-PCH-NEXT:    call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP6]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
 // IR-PCH-NEXT:    [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
 // IR-PCH-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 99
 // IR-PCH-NEXT:    br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
@@ -658,8 +658,8 @@ int foo() {
 // IR-PCH-NEXT:    [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4
 // IR-PCH-NEXT:    [[TMP25:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP24]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_Z3foov.omp_outlined.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var)
 // IR-PCH-NEXT:    switch i32 [[TMP25]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
-// IR-PCH-NEXT:    i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
-// IR-PCH-NEXT:    i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
+// IR-PCH-NEXT:      i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
+// IR-PCH-NEXT:      i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
 // IR-PCH-NEXT:    ]
 // IR-PCH:       .omp.reduction.case1:
 // IR-PCH-NEXT:    [[TMP26:%.*]] = getelementptr i32, ptr [[TMP1]], i64 100
diff --git a/clang/test/OpenMP/teams_generic_loop_collapse_codegen.cpp b/clang/test/OpenMP/teams_generic_loop_collapse_codegen.cpp
index 4cf8f88b4c08..49df83c9c765 100644
--- a/clang/test/OpenMP/teams_generic_loop_collapse_codegen.cpp
+++ b/clang/test/OpenMP/teams_generic_loop_collapse_codegen.cpp
@@ -242,7 +242,7 @@ int main (int argc, char **argv) {
 // CHECK1:       omp.inner.for.end:
 // CHECK1-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK1:       omp.loop.exit:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2:[0-9]+]], i32 [[TMP2]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]])
 // CHECK1-NEXT:    ret void
 //
 //
@@ -281,7 +281,7 @@ int main (int argc, char **argv) {
 // CHECK1-NEXT:    store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK1-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK1-NEXT:    [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4
-// CHECK1-NEXT:    call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
+// CHECK1-NEXT:    call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
 // CHECK1-NEXT:    [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
 // CHECK1-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 56087
 // CHECK1-NEXT:    br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
@@ -476,7 +476,7 @@ int main (int argc, char **argv) {
 // CHECK3:       omp.inner.for.end:
 // CHECK3-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK3:       omp.loop.exit:
-// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2:[0-9]+]], i32 [[TMP2]])
+// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]])
 // CHECK3-NEXT:    ret void
 //
 //
@@ -513,7 +513,7 @@ int main (int argc, char **argv) {
 // CHECK3-NEXT:    store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK3-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK3-NEXT:    [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4
-// CHECK3-NEXT:    call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
+// CHECK3-NEXT:    call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
 // CHECK3-NEXT:    [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
 // CHECK3-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 56087
 // CHECK3-NEXT:    br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
@@ -835,7 +835,7 @@ int main (int argc, char **argv) {
 // CHECK9:       omp.loop.exit:
 // CHECK9-NEXT:    [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK9-NEXT:    [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4
-// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2:[0-9]+]], i32 [[TMP26]])
+// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP26]])
 // CHECK9-NEXT:    br label [[OMP_PRECOND_END]]
 // CHECK9:       omp.precond.end:
 // CHECK9-NEXT:    ret void
@@ -917,7 +917,7 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK9-NEXT:    [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK9-NEXT:    [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4
-// CHECK9-NEXT:    call void @__kmpc_for_static_init_8(ptr @[[GLOB2]], i32 [[TMP15]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1)
+// CHECK9-NEXT:    call void @__kmpc_for_static_init_8(ptr @[[GLOB2:[0-9]+]], i32 [[TMP15]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1)
 // CHECK9-NEXT:    [[TMP16:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8
 // CHECK9-NEXT:    [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8
 // CHECK9-NEXT:    [[CMP13:%.*]] = icmp sgt i64 [[TMP16]], [[TMP17]]
@@ -1124,7 +1124,7 @@ int main (int argc, char **argv) {
 // CHECK9:       omp.inner.for.end:
 // CHECK9-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK9:       omp.loop.exit:
-// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP2]])
+// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]])
 // CHECK9-NEXT:    ret void
 //
 //
@@ -1487,7 +1487,7 @@ int main (int argc, char **argv) {
 // CHECK11:       omp.loop.exit:
 // CHECK11-NEXT:    [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK11-NEXT:    [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4
-// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2:[0-9]+]], i32 [[TMP28]])
+// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP28]])
 // CHECK11-NEXT:    br label [[OMP_PRECOND_END]]
 // CHECK11:       omp.precond.end:
 // CHECK11-NEXT:    ret void
@@ -1571,7 +1571,7 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK11-NEXT:    [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK11-NEXT:    [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4
-// CHECK11-NEXT:    call void @__kmpc_for_static_init_8(ptr @[[GLOB2]], i32 [[TMP15]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1)
+// CHECK11-NEXT:    call void @__kmpc_for_static_init_8(ptr @[[GLOB2:[0-9]+]], i32 [[TMP15]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1)
 // CHECK11-NEXT:    [[TMP16:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8
 // CHECK11-NEXT:    [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8
 // CHECK11-NEXT:    [[CMP15:%.*]] = icmp sgt i64 [[TMP16]], [[TMP17]]
@@ -1774,7 +1774,7 @@ int main (int argc, char **argv) {
 // CHECK11:       omp.inner.for.end:
 // CHECK11-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK11:       omp.loop.exit:
-// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP2]])
+// CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]])
 // CHECK11-NEXT:    ret void
 //
 //
diff --git a/clang/test/OpenMP/teams_generic_loop_private_codegen.cpp b/clang/test/OpenMP/teams_generic_loop_private_codegen.cpp
index ab5cbdf1b8c9..5ef729f044e0 100644
--- a/clang/test/OpenMP/teams_generic_loop_private_codegen.cpp
+++ b/clang/test/OpenMP/teams_generic_loop_private_codegen.cpp
@@ -382,7 +382,7 @@ int main() {
 // CHECK1:       omp.loop.exit:
 // CHECK1-NEXT:    [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK1-NEXT:    [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2:[0-9]+]], i32 [[TMP14]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP14]])
 // CHECK1-NEXT:    call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]]
 // CHECK1-NEXT:    [[ARRAY_BEGIN2:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN2]], i64 2
@@ -443,7 +443,7 @@ int main() {
 // CHECK1-NEXT:    call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]])
 // CHECK1-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK1-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
-// CHECK1-NEXT:    call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
+// CHECK1-NEXT:    call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
 // CHECK1-NEXT:    [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
 // CHECK1-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1
 // CHECK1-NEXT:    br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
@@ -690,7 +690,7 @@ int main() {
 // CHECK1:       omp.loop.exit:
 // CHECK1-NEXT:    [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK1-NEXT:    [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP14]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP14]])
 // CHECK1-NEXT:    call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]]
 // CHECK1-NEXT:    [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN4]], i64 2
@@ -1113,7 +1113,7 @@ int main() {
 // CHECK3:       omp.loop.exit:
 // CHECK3-NEXT:    [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK3-NEXT:    [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4
-// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2:[0-9]+]], i32 [[TMP12]])
+// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP12]])
 // CHECK3-NEXT:    call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]]
 // CHECK3-NEXT:    [[ARRAY_BEGIN2:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN2]], i32 2
@@ -1172,7 +1172,7 @@ int main() {
 // CHECK3-NEXT:    call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]])
 // CHECK3-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK3-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
-// CHECK3-NEXT:    call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
+// CHECK3-NEXT:    call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
 // CHECK3-NEXT:    [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
 // CHECK3-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1
 // CHECK3-NEXT:    br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
@@ -1415,7 +1415,7 @@ int main() {
 // CHECK3:       omp.loop.exit:
 // CHECK3-NEXT:    [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK3-NEXT:    [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4
-// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP12]])
+// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP12]])
 // CHECK3-NEXT:    call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]]
 // CHECK3-NEXT:    [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN4]], i32 2
@@ -1793,7 +1793,7 @@ int main() {
 // CHECK9:       omp.inner.for.end:
 // CHECK9-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK9:       omp.loop.exit:
-// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2:[0-9]+]], i32 [[TMP1]])
+// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]])
 // CHECK9-NEXT:    ret void
 //
 //
@@ -1835,7 +1835,7 @@ int main() {
 // CHECK9-NEXT:    store ptr [[G1]], ptr [[_TMP3]], align 8
 // CHECK9-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK9-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
-// CHECK9-NEXT:    call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
+// CHECK9-NEXT:    call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
 // CHECK9-NEXT:    [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
 // CHECK9-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1
 // CHECK9-NEXT:    br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
diff --git a/clang/test/OpenMP/teams_generic_loop_reduction_codegen.cpp b/clang/test/OpenMP/teams_generic_loop_reduction_codegen.cpp
index f91ee759cddf..4da49eed32ef 100644
--- a/clang/test/OpenMP/teams_generic_loop_reduction_codegen.cpp
+++ b/clang/test/OpenMP/teams_generic_loop_reduction_codegen.cpp
@@ -223,7 +223,7 @@ int main() {
 // CHECK1:       omp.inner.for.end:
 // CHECK1-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK1:       omp.loop.exit:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2:[0-9]+]], i32 [[TMP2]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]])
 // CHECK1-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
 // CHECK1-NEXT:    store ptr [[SIVAR1]], ptr [[TMP14]], align 8
 // CHECK1-NEXT:    [[TMP15:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3:[0-9]+]], i32 [[TMP2]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var)
@@ -282,7 +282,7 @@ int main() {
 // CHECK1-NEXT:    store i32 0, ptr [[SIVAR2]], align 4
 // CHECK1-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK1-NEXT:    [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4
-// CHECK1-NEXT:    call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
+// CHECK1-NEXT:    call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
 // CHECK1-NEXT:    [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
 // CHECK1-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1
 // CHECK1-NEXT:    br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
@@ -516,7 +516,7 @@ int main() {
 // CHECK1:       omp.inner.for.end:
 // CHECK1-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK1:       omp.loop.exit:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP2]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]])
 // CHECK1-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
 // CHECK1-NEXT:    store ptr [[T_VAR1]], ptr [[TMP14]], align 8
 // CHECK1-NEXT:    [[TMP15:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP2]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var)
@@ -806,7 +806,7 @@ int main() {
 // CHECK3:       omp.inner.for.end:
 // CHECK3-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK3:       omp.loop.exit:
-// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2:[0-9]+]], i32 [[TMP2]])
+// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]])
 // CHECK3-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0
 // CHECK3-NEXT:    store ptr [[SIVAR1]], ptr [[TMP12]], align 4
 // CHECK3-NEXT:    [[TMP13:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3:[0-9]+]], i32 [[TMP2]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var)
@@ -863,7 +863,7 @@ int main() {
 // CHECK3-NEXT:    store i32 0, ptr [[SIVAR1]], align 4
 // CHECK3-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK3-NEXT:    [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4
-// CHECK3-NEXT:    call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
+// CHECK3-NEXT:    call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
 // CHECK3-NEXT:    [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
 // CHECK3-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1
 // CHECK3-NEXT:    br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
@@ -1095,7 +1095,7 @@ int main() {
 // CHECK3:       omp.inner.for.end:
 // CHECK3-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK3:       omp.loop.exit:
-// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP2]])
+// CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]])
 // CHECK3-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0
 // CHECK3-NEXT:    store ptr [[T_VAR1]], ptr [[TMP12]], align 4
 // CHECK3-NEXT:    [[TMP13:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP2]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var)
@@ -1336,7 +1336,7 @@ int main() {
 // CHECK9:       omp.inner.for.end:
 // CHECK9-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK9:       omp.loop.exit:
-// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB2:[0-9]+]], i32 [[TMP2]])
+// CHECK9-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]])
 // CHECK9-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
 // CHECK9-NEXT:    store ptr [[SIVAR1]], ptr [[TMP14]], align 8
 // CHECK9-NEXT:    [[TMP15:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3:[0-9]+]], i32 [[TMP2]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l45.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var)
@@ -1396,7 +1396,7 @@ int main() {
 // CHECK9-NEXT:    store i32 0, ptr [[SIVAR2]], align 4
 // CHECK9-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK9-NEXT:    [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4
-// CHECK9-NEXT:    call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
+// CHECK9-NEXT:    call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
 // CHECK9-NEXT:    [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
 // CHECK9-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1
 // CHECK9-NEXT:    br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
-- 
cgit v1.2.3


From ffe41819e58365dfbe85a22556c0d9d284e746b9 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Tue, 12 Mar 2024 12:10:30 +0000
Subject: [Support] Add KnownBits::abds signed absolute difference and rename
 absdiff -> abdu (#84897)

When I created KnownBits::absdiff, I totally missed that we already have ISD::ABDS/ABDU nodes, and we use this term in other places/targets as well.

I've added the KnownBits::abds implementation and renamed KnownBits::absdiff to KnownBits::abdu.

Followup to #84791
---
 llvm/include/llvm/Support/KnownBits.h    |  7 +++--
 llvm/lib/Support/KnownBits.cpp           | 23 +++++++++++++++--
 llvm/lib/Target/X86/X86ISelLowering.cpp  |  2 +-
 llvm/unittests/Support/KnownBitsTest.cpp | 44 +++++++++++++++++++++++++++-----
 4 files changed, 65 insertions(+), 11 deletions(-)

diff --git a/llvm/include/llvm/Support/KnownBits.h b/llvm/include/llvm/Support/KnownBits.h
index 06d2c90f7b0f..73cb01e0644a 100644
--- a/llvm/include/llvm/Support/KnownBits.h
+++ b/llvm/include/llvm/Support/KnownBits.h
@@ -390,8 +390,11 @@ public:
   /// Compute known bits for smin(LHS, RHS).
   static KnownBits smin(const KnownBits &LHS, const KnownBits &RHS);
 
-  /// Compute known bits for absdiff(LHS, RHS).
-  static KnownBits absdiff(const KnownBits &LHS, const KnownBits &RHS);
+  /// Compute known bits for abdu(LHS, RHS).
+  static KnownBits abdu(const KnownBits &LHS, const KnownBits &RHS);
+
+  /// Compute known bits for abds(LHS, RHS).
+  static KnownBits abds(const KnownBits &LHS, const KnownBits &RHS);
 
   /// Compute known bits for shl(LHS, RHS).
   /// NOTE: RHS (shift amount) bitwidth doesn't need to be the same as LHS.
diff --git a/llvm/lib/Support/KnownBits.cpp b/llvm/lib/Support/KnownBits.cpp
index c33c3680825a..d72355dab6f1 100644
--- a/llvm/lib/Support/KnownBits.cpp
+++ b/llvm/lib/Support/KnownBits.cpp
@@ -231,8 +231,8 @@ KnownBits KnownBits::smin(const KnownBits &LHS, const KnownBits &RHS) {
   return Flip(umax(Flip(LHS), Flip(RHS)));
 }
 
-KnownBits KnownBits::absdiff(const KnownBits &LHS, const KnownBits &RHS) {
-  // absdiff(LHS,RHS) = sub(umax(LHS,RHS), umin(LHS,RHS)).
+KnownBits KnownBits::abdu(const KnownBits &LHS, const KnownBits &RHS) {
+  // abdu(LHS,RHS) = sub(umax(LHS,RHS), umin(LHS,RHS)).
   KnownBits UMaxValue = umax(LHS, RHS);
   KnownBits UMinValue = umin(LHS, RHS);
   KnownBits MinMaxDiff = computeForAddSub(/*Add=*/false, /*NSW=*/false,
@@ -250,6 +250,25 @@ KnownBits KnownBits::absdiff(const KnownBits &LHS, const KnownBits &RHS) {
   return KnownAbsDiff;
 }
 
+KnownBits KnownBits::abds(const KnownBits &LHS, const KnownBits &RHS) {
+  // abds(LHS,RHS) = sub(smax(LHS,RHS), smin(LHS,RHS)).
+  KnownBits SMaxValue = smax(LHS, RHS);
+  KnownBits SMinValue = smin(LHS, RHS);
+  KnownBits MinMaxDiff = computeForAddSub(/*Add=*/false, /*NSW=*/false,
+                                          /*NUW=*/false, SMaxValue, SMinValue);
+
+  // find the common bits between sub(LHS,RHS) and sub(RHS,LHS).
+  KnownBits Diff0 =
+      computeForAddSub(/*Add=*/false, /*NSW=*/false, /*NUW=*/false, LHS, RHS);
+  KnownBits Diff1 =
+      computeForAddSub(/*Add=*/false, /*NSW=*/false, /*NUW=*/false, RHS, LHS);
+  KnownBits SubDiff = Diff0.intersectWith(Diff1);
+
+  KnownBits KnownAbsDiff = MinMaxDiff.unionWith(SubDiff);
+  assert(!KnownAbsDiff.hasConflict() && "Bad Output");
+  return KnownAbsDiff;
+}
+
 static unsigned getMaxShiftAmount(const APInt &MaxValue, unsigned BitWidth) {
   if (isPowerOf2_32(BitWidth))
     return MaxValue.extractBitsAsZExtValue(Log2_32(BitWidth), 0);
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index a74901958ac0..b4d0421c14c0 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -36753,7 +36753,7 @@ static void computeKnownBitsForPSADBW(SDValue LHS, SDValue RHS,
   APInt DemandedSrcElts = APIntOps::ScaleBitMask(DemandedElts, NumSrcElts);
   Known = DAG.computeKnownBits(RHS, DemandedSrcElts, Depth + 1);
   Known2 = DAG.computeKnownBits(LHS, DemandedSrcElts, Depth + 1);
-  Known = KnownBits::absdiff(Known, Known2).zext(16);
+  Known = KnownBits::abdu(Known, Known2).zext(16);
   // Known = (((D0 + D1) + (D2 + D3)) + ((D4 + D5) + (D6 + D7)))
   Known = KnownBits::computeForAddSub(/*Add=*/true, /*NSW=*/true, /*NUW=*/true,
                                       Known, Known);
diff --git a/llvm/unittests/Support/KnownBitsTest.cpp b/llvm/unittests/Support/KnownBitsTest.cpp
index 2ac25f0b2801..d3177ce7e983 100644
--- a/llvm/unittests/Support/KnownBitsTest.cpp
+++ b/llvm/unittests/Support/KnownBitsTest.cpp
@@ -294,18 +294,18 @@ TEST(KnownBitsTest, SignBitUnknown) {
   EXPECT_TRUE(Known.isSignUnknown());
 }
 
-TEST(KnownBitsTest, AbsDiffSpecialCase) {
-  // There are 2 implementation of absdiff - both are currently needed to cover
+TEST(KnownBitsTest, ABDUSpecialCase) {
+  // There are 2 implementations of abdu - both are currently needed to cover
   // extra cases.
   KnownBits LHS, RHS, Res;
 
-  // absdiff(LHS,RHS) = sub(umax(LHS,RHS), umin(LHS,RHS)).
+  // abdu(LHS,RHS) = sub(umax(LHS,RHS), umin(LHS,RHS)).
   // Actual: false (Inputs = 1011, 101?, Computed = 000?, Exact = 000?)
   LHS.One = APInt(4, 0b1011);
   RHS.One = APInt(4, 0b1010);
   LHS.Zero = APInt(4, 0b0100);
   RHS.Zero = APInt(4, 0b0100);
-  Res = KnownBits::absdiff(LHS, RHS);
+  Res = KnownBits::abdu(LHS, RHS);
   EXPECT_EQ(0b0000ul, Res.One.getZExtValue());
   EXPECT_EQ(0b1110ul, Res.Zero.getZExtValue());
 
@@ -315,11 +315,37 @@ TEST(KnownBitsTest, AbsDiffSpecialCase) {
   RHS.One = APInt(4, 0b1000);
   LHS.Zero = APInt(4, 0b0000);
   RHS.Zero = APInt(4, 0b0111);
-  Res = KnownBits::absdiff(LHS, RHS);
+  Res = KnownBits::abdu(LHS, RHS);
   EXPECT_EQ(0b0001ul, Res.One.getZExtValue());
   EXPECT_EQ(0b0000ul, Res.Zero.getZExtValue());
 }
 
+TEST(KnownBitsTest, ABDSSpecialCase) {
+  // There are 2 implementations of abds - both are currently needed to cover
+  // extra cases.
+  KnownBits LHS, RHS, Res;
+
+  // abds(LHS,RHS) = sub(smax(LHS,RHS), smin(LHS,RHS)).
+  // Actual: false (Inputs = 1011, 10??, Computed = ????, Exact = 00??)
+  LHS.One = APInt(4, 0b1011);
+  RHS.One = APInt(4, 0b1000);
+  LHS.Zero = APInt(4, 0b0100);
+  RHS.Zero = APInt(4, 0b0100);
+  Res = KnownBits::abds(LHS, RHS);
+  EXPECT_EQ(0, Res.One.getSExtValue());
+  EXPECT_EQ(-4, Res.Zero.getSExtValue());
+
+  // find the common bits between sub(LHS,RHS) and sub(RHS,LHS).
+  // Actual: false (Inputs = ???1, 1000, Computed = ???1, Exact = 0??1)
+  LHS.One = APInt(4, 0b0001);
+  RHS.One = APInt(4, 0b1000);
+  LHS.Zero = APInt(4, 0b0000);
+  RHS.Zero = APInt(4, 0b0111);
+  Res = KnownBits::abds(LHS, RHS);
+  EXPECT_EQ(1, Res.One.getSExtValue());
+  EXPECT_EQ(0, Res.Zero.getSExtValue());
+}
+
 TEST(KnownBitsTest, BinaryExhaustive) {
   testBinaryOpExhaustive(
       [](const KnownBits &Known1, const KnownBits &Known2) {
@@ -359,10 +385,16 @@ TEST(KnownBitsTest, BinaryExhaustive) {
       [](const APInt &N1, const APInt &N2) { return APIntOps::smin(N1, N2); });
   testBinaryOpExhaustive(
       [](const KnownBits &Known1, const KnownBits &Known2) {
-        return KnownBits::absdiff(Known1, Known2);
+        return KnownBits::abdu(Known1, Known2);
       },
       [](const APInt &N1, const APInt &N2) { return APIntOps::abdu(N1, N2); },
       checkCorrectnessOnlyBinary);
+  testBinaryOpExhaustive(
+      [](const KnownBits &Known1, const KnownBits &Known2) {
+        return KnownBits::abds(Known1, Known2);
+      },
+      [](const APInt &N1, const APInt &N2) { return APIntOps::abds(N1, N2); },
+      checkCorrectnessOnlyBinary);
   testBinaryOpExhaustive(
       [](const KnownBits &Known1, const KnownBits &Known2) {
         return KnownBits::udiv(Known1, Known2);
-- 
cgit v1.2.3


From b5a16b6d8ad51df7c14cd696f3dc1f98b6984905 Mon Sep 17 00:00:00 2001
From: Sirraide <aeternalmail@gmail.com>
Date: Tue, 12 Mar 2024 13:42:43 +0100
Subject: [Clang] [Parser] Support [[omp::assume]] (#84582)

This pr implements the `[[omp::assume]]` spelling for the `__attribute__((assume))` attribute. It does not change anything about how that attribute is handled by the rest of Clang.
---
 clang/docs/ReleaseNotes.rst       |  5 +++++
 clang/include/clang/Basic/Attr.td |  2 +-
 clang/lib/Basic/Attributes.cpp    |  8 ++++++--
 clang/lib/Parse/ParseDeclCXX.cpp  |  4 +++-
 clang/test/OpenMP/attr-assume.cpp | 13 +++++++++++++
 5 files changed, 28 insertions(+), 4 deletions(-)
 create mode 100644 clang/test/OpenMP/attr-assume.cpp

diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 4a08b78d78b6..6c30af304d98 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -500,6 +500,11 @@ Python Binding Changes
 
 - Exposed `CXRewriter` API as `class Rewriter`.
 
+OpenMP Support
+--------------
+
+- Added support for the `[[omp::assume]]` attribute.
+
 Additional Information
 ======================
 
diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td
index fd7970d0451a..080340669b60 100644
--- a/clang/include/clang/Basic/Attr.td
+++ b/clang/include/clang/Basic/Attr.td
@@ -4159,7 +4159,7 @@ def OMPDeclareVariant : InheritableAttr {
 }
 
 def OMPAssume : InheritableAttr {
-  let Spellings = [Clang<"assume">];
+  let Spellings = [Clang<"assume">, CXX11<"omp", "assume">];
   let Subjects = SubjectList<[Function, ObjCMethod]>;
   let InheritEvenIfAlreadyPresent = 1;
   let Documentation = [OMPAssumeDocs];
diff --git a/clang/lib/Basic/Attributes.cpp b/clang/lib/Basic/Attributes.cpp
index 44a4f1890d39..867d241a2cf8 100644
--- a/clang/lib/Basic/Attributes.cpp
+++ b/clang/lib/Basic/Attributes.cpp
@@ -47,8 +47,12 @@ int clang::hasAttribute(AttributeCommonInfo::Syntax Syntax,
   // attributes. We support those, but not through the typical attribute
   // machinery that goes through TableGen. We support this in all OpenMP modes
   // so long as double square brackets are enabled.
-  if (LangOpts.OpenMP && ScopeName == "omp")
-    return (Name == "directive" || Name == "sequence") ? 1 : 0;
+  //
+  // Other OpenMP attributes (e.g. [[omp::assume]]) are handled via the
+  // regular attribute parsing machinery.
+  if (LangOpts.OpenMP && ScopeName == "omp" &&
+      (Name == "directive" || Name == "sequence"))
+    return 1;
 
   int res = hasAttributeImpl(Syntax, Name, ScopeName, Target, LangOpts);
   if (res)
diff --git a/clang/lib/Parse/ParseDeclCXX.cpp b/clang/lib/Parse/ParseDeclCXX.cpp
index bdca10c4c7c0..77d2382ea6d9 100644
--- a/clang/lib/Parse/ParseDeclCXX.cpp
+++ b/clang/lib/Parse/ParseDeclCXX.cpp
@@ -4634,7 +4634,9 @@ bool Parser::ParseCXX11AttributeArgs(
     return true;
   }
 
-  if (ScopeName && ScopeName->isStr("omp")) {
+  // [[omp::directive]] and [[omp::sequence]] need special handling.
+  if (ScopeName && ScopeName->isStr("omp") &&
+      (AttrName->isStr("directive") || AttrName->isStr("sequence"))) {
     Diag(AttrNameLoc, getLangOpts().OpenMP >= 51
                           ? diag::warn_omp51_compat_attributes
                           : diag::ext_omp_attributes);
diff --git a/clang/test/OpenMP/attr-assume.cpp b/clang/test/OpenMP/attr-assume.cpp
new file mode 100644
index 000000000000..09c22f98d1e2
--- /dev/null
+++ b/clang/test/OpenMP/attr-assume.cpp
@@ -0,0 +1,13 @@
+// RUN: %clang_cc1 -fsyntax-only -fopenmp -verify %s
+[[omp::assume(3)]] void f1(); // expected-error {{expected string literal as argument of 'assume' attribute}}
+[[omp::assume(int)]] void f2(); // expected-error {{expected string literal as argument of 'assume' attribute}}
+[[omp::assume(for)]] void f3(); // expected-error {{expected string literal as argument of 'assume' attribute}}
+[[omp::assume("QQQQ")]] void f4(); // expected-warning {{unknown assumption string 'QQQQ'; attribute is potentially ignored}}
+[[omp::assume("omp_no_openmp")]] void f5();
+[[omp::assume("omp_noopenmp")]] void f6(); // expected-warning {{unknown assumption string 'omp_noopenmp' may be misspelled; attribute is potentially ignored, did you mean 'omp_no_openmp'?}}
+[[omp::assume("omp_no_openmp_routine")]] void f7(); // expected-warning {{unknown assumption string 'omp_no_openmp_routine' may be misspelled; attribute is potentially ignored, did you mean 'omp_no_openmp_routines'?}}
+[[omp::assume("omp_no_openmp1")]] void f8(); // expected-warning {{unknown assumption string 'omp_no_openmp1' may be misspelled; attribute is potentially ignored, did you mean 'omp_no_openmp'?}}
+[[omp::assume("omp_no_openmp", "omp_no_openmp")]] void f9(); // expected-error {{'assume' attribute takes one argument}}
+
+[[omp::assume(3)]] int g1; // expected-error {{expected string literal as argument of 'assume' attribute}}
+[[omp::assume("omp_no_openmp")]] int g2; // expected-warning {{'assume' attribute only applies to functions and Objective-C methods}}
-- 
cgit v1.2.3


From 80ab8234ac309418637488b97e0a62d8377b2ecf Mon Sep 17 00:00:00 2001
From: NagyDonat <donat.nagy@ericsson.com>
Date: Tue, 12 Mar 2024 13:51:12 +0100
Subject: [analyzer] Accept C library functions from the `std` namespace
 (#84469)

Previously, the function `isCLibraryFunction()` and logic relying on it
only accepted functions that are declared directly within a TU (i.e. not
in a namespace or a class). However C++ headers like <cstdlib> declare
many C standard library functions within the namespace `std`, so this
commit ensures that functions within the namespace `std` are also
accepted.

After this commit it will be possible to match functions like `malloc`
or `free` with `CallDescription::Mode::CLibrary`.

---------

Co-authored-by: Balazs Benics <benicsbalazs@gmail.com>
---
 .../Core/PathSensitive/CallDescription.h           |  8 +-
 clang/lib/StaticAnalyzer/Core/CheckerContext.cpp   |  8 +-
 clang/unittests/StaticAnalyzer/CMakeLists.txt      |  1 +
 .../StaticAnalyzer/IsCLibraryFunctionTest.cpp      | 89 ++++++++++++++++++++++
 .../clang/unittests/StaticAnalyzer/BUILD.gn        |  1 +
 5 files changed, 98 insertions(+), 9 deletions(-)
 create mode 100644 clang/unittests/StaticAnalyzer/IsCLibraryFunctionTest.cpp

diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/CallDescription.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/CallDescription.h
index 3432d2648633..b4e1636130ca 100644
--- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/CallDescription.h
+++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/CallDescription.h
@@ -41,12 +41,8 @@ public:
     ///  - We also accept calls where the number of arguments or parameters is
     ///    greater than the specified value.
     /// For the exact heuristics, see CheckerContext::isCLibraryFunction().
-    /// Note that functions whose declaration context is not a TU (e.g.
-    /// methods, functions in namespaces) are not accepted as C library
-    /// functions.
-    /// FIXME: If I understand it correctly, this discards calls where C++ code
-    /// refers a C library function through the namespace `std::` via headers
-    /// like <cstdlib>.
+    /// (This mode only matches functions that are declared either directly
+    /// within a TU or in the namespace `std`.)
     CLibrary,
 
     /// Matches "simple" functions that are not methods. (Static methods are
diff --git a/clang/lib/StaticAnalyzer/Core/CheckerContext.cpp b/clang/lib/StaticAnalyzer/Core/CheckerContext.cpp
index d6d4cec9dd3d..1a9bff529e9b 100644
--- a/clang/lib/StaticAnalyzer/Core/CheckerContext.cpp
+++ b/clang/lib/StaticAnalyzer/Core/CheckerContext.cpp
@@ -87,9 +87,11 @@ bool CheckerContext::isCLibraryFunction(const FunctionDecl *FD,
   if (!II)
     return false;
 
-  // Look through 'extern "C"' and anything similar invented in the future.
-  // If this function is not in TU directly, it is not a C library function.
-  if (!FD->getDeclContext()->getRedeclContext()->isTranslationUnit())
+  // C library functions are either declared directly within a TU (the common
+  // case) or they are accessed through the namespace `std` (when they are used
+  // in C++ via headers like <cstdlib>).
+  const DeclContext *DC = FD->getDeclContext()->getRedeclContext();
+  if (!(DC->isTranslationUnit() || DC->isStdNamespace()))
     return false;
 
   // If this function is not externally visible, it is not a C library function.
diff --git a/clang/unittests/StaticAnalyzer/CMakeLists.txt b/clang/unittests/StaticAnalyzer/CMakeLists.txt
index 775f0f8486b8..db56e77331b8 100644
--- a/clang/unittests/StaticAnalyzer/CMakeLists.txt
+++ b/clang/unittests/StaticAnalyzer/CMakeLists.txt
@@ -11,6 +11,7 @@ add_clang_unittest(StaticAnalysisTests
   CallEventTest.cpp
   ConflictingEvalCallsTest.cpp
   FalsePositiveRefutationBRVisitorTest.cpp
+  IsCLibraryFunctionTest.cpp
   NoStateChangeFuncVisitorTest.cpp
   ParamRegionTest.cpp
   RangeSetTest.cpp
diff --git a/clang/unittests/StaticAnalyzer/IsCLibraryFunctionTest.cpp b/clang/unittests/StaticAnalyzer/IsCLibraryFunctionTest.cpp
new file mode 100644
index 000000000000..19c66cc6bee1
--- /dev/null
+++ b/clang/unittests/StaticAnalyzer/IsCLibraryFunctionTest.cpp
@@ -0,0 +1,89 @@
+#include "clang/ASTMatchers/ASTMatchFinder.h"
+#include "clang/ASTMatchers/ASTMatchers.h"
+#include "clang/Analysis/AnalysisDeclContext.h"
+#include "clang/Frontend/ASTUnit.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
+#include "clang/Tooling/Tooling.h"
+#include "gtest/gtest.h"
+
+#include <memory>
+
+using namespace clang;
+using namespace ento;
+using namespace ast_matchers;
+
+testing::AssertionResult extractFunctionDecl(StringRef Code,
+                                             const FunctionDecl *&Result) {
+  auto ASTUnit = tooling::buildASTFromCode(Code);
+  if (!ASTUnit)
+    return testing::AssertionFailure() << "AST construction failed";
+
+  ASTContext &Context = ASTUnit->getASTContext();
+  if (Context.getDiagnostics().hasErrorOccurred())
+    return testing::AssertionFailure() << "Compilation error";
+
+  auto Matches = ast_matchers::match(functionDecl().bind("fn"), Context);
+  if (Matches.empty())
+    return testing::AssertionFailure() << "No function declaration found";
+
+  if (Matches.size() > 1)
+    return testing::AssertionFailure()
+           << "Multiple function declarations found";
+
+  Result = Matches[0].getNodeAs<FunctionDecl>("fn");
+  return testing::AssertionSuccess();
+}
+
+TEST(IsCLibraryFunctionTest, AcceptsGlobal) {
+  const FunctionDecl *Result;
+  ASSERT_TRUE(extractFunctionDecl(R"cpp(void fun();)cpp", Result));
+  EXPECT_TRUE(CheckerContext::isCLibraryFunction(Result));
+}
+
+TEST(IsCLibraryFunctionTest, AcceptsExternCGlobal) {
+  const FunctionDecl *Result;
+  ASSERT_TRUE(
+      extractFunctionDecl(R"cpp(extern "C" { void fun(); })cpp", Result));
+  EXPECT_TRUE(CheckerContext::isCLibraryFunction(Result));
+}
+
+TEST(IsCLibraryFunctionTest, RejectsNoInlineNoExternalLinkage) {
+  // Functions that are neither inlined nor externally visible cannot be C library functions.
+  const FunctionDecl *Result;
+  ASSERT_TRUE(extractFunctionDecl(R"cpp(static void fun();)cpp", Result));
+  EXPECT_FALSE(CheckerContext::isCLibraryFunction(Result));
+}
+
+TEST(IsCLibraryFunctionTest, RejectsAnonymousNamespace) {
+  const FunctionDecl *Result;
+  ASSERT_TRUE(
+      extractFunctionDecl(R"cpp(namespace { void fun(); })cpp", Result));
+  EXPECT_FALSE(CheckerContext::isCLibraryFunction(Result));
+}
+
+TEST(IsCLibraryFunctionTest, AcceptsStdNamespace) {
+  const FunctionDecl *Result;
+  ASSERT_TRUE(
+      extractFunctionDecl(R"cpp(namespace std { void fun(); })cpp", Result));
+  EXPECT_TRUE(CheckerContext::isCLibraryFunction(Result));
+}
+
+TEST(IsCLibraryFunctionTest, RejectsOtherNamespaces) {
+  const FunctionDecl *Result;
+  ASSERT_TRUE(
+      extractFunctionDecl(R"cpp(namespace stdx { void fun(); })cpp", Result));
+  EXPECT_FALSE(CheckerContext::isCLibraryFunction(Result));
+}
+
+TEST(IsCLibraryFunctionTest, RejectsClassStatic) {
+  const FunctionDecl *Result;
+  ASSERT_TRUE(
+      extractFunctionDecl(R"cpp(class A { static void fun(); };)cpp", Result));
+  EXPECT_FALSE(CheckerContext::isCLibraryFunction(Result));
+}
+
+TEST(IsCLibraryFunctionTest, RejectsClassMember) {
+  const FunctionDecl *Result;
+  ASSERT_TRUE(extractFunctionDecl(R"cpp(class A { void fun(); };)cpp", Result));
+  EXPECT_FALSE(CheckerContext::isCLibraryFunction(Result));
+}
diff --git a/llvm/utils/gn/secondary/clang/unittests/StaticAnalyzer/BUILD.gn b/llvm/utils/gn/secondary/clang/unittests/StaticAnalyzer/BUILD.gn
index 01c2b6ced336..9c240cff1816 100644
--- a/llvm/utils/gn/secondary/clang/unittests/StaticAnalyzer/BUILD.gn
+++ b/llvm/utils/gn/secondary/clang/unittests/StaticAnalyzer/BUILD.gn
@@ -19,6 +19,7 @@ unittest("StaticAnalysisTests") {
     "CallEventTest.cpp",
     "ConflictingEvalCallsTest.cpp",
     "FalsePositiveRefutationBRVisitorTest.cpp",
+    "IsCLibraryFunctionTest.cpp",
     "NoStateChangeFuncVisitorTest.cpp",
     "ParamRegionTest.cpp",
     "RangeSetTest.cpp",
-- 
cgit v1.2.3


From 0b2c24e0b33236c1dec39fe8b007b4c6aeb170b3 Mon Sep 17 00:00:00 2001
From: Zahira Ammarguellat <zahira.ammarguellat@intel.com>
Date: Tue, 12 Mar 2024 05:51:38 -0700
Subject: Fix warning message when using negative complex range options.
 (#84567)

When `-fcx-no-limited-range` or` -fno-cx-fortran-rules` follows another
complex range option on the command line, it will trigger a warning with
empty message.
`warning: overriding '-fcx-fortran-rules' option with ''
[-Woverriding-option]`
or
`warning: overriding '-fcx-limited-range' option with ''
[-Woverriding-option]`
This patch fixes that.
---
 clang/lib/Driver/ToolChains/Clang.cpp | 35 ++++++++++++++++++++++++++---------
 clang/test/Driver/range.c             | 23 ++++++++++++++++++-----
 2 files changed, 44 insertions(+), 14 deletions(-)

diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
index cc568b9a715b..6246a28a1306 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -2687,8 +2687,8 @@ static void CollectArgsForIntegratedAssembler(Compilation &C,
   }
 }
 
-static StringRef EnumComplexRangeToStr(LangOptions::ComplexRangeKind Range) {
-  StringRef RangeStr = "";
+static StringRef EnumComplexRangeToStr(LangOptions::ComplexRangeKind Range,
+                                       StringRef Option) {
   switch (Range) {
   case LangOptions::ComplexRangeKind::CX_Limited:
     return "-fcx-limited-range";
@@ -2697,17 +2697,32 @@ static StringRef EnumComplexRangeToStr(LangOptions::ComplexRangeKind Range) {
     return "-fcx-fortran-rules";
     break;
   default:
-    return RangeStr;
+    return Option;
     break;
   }
 }
 
 static void EmitComplexRangeDiag(const Driver &D,
                                  LangOptions::ComplexRangeKind Range1,
-                                 LangOptions::ComplexRangeKind Range2) {
-  if (Range1 != Range2 && Range1 != LangOptions::ComplexRangeKind::CX_None)
-    D.Diag(clang::diag::warn_drv_overriding_option)
-        << EnumComplexRangeToStr(Range1) << EnumComplexRangeToStr(Range2);
+                                 LangOptions::ComplexRangeKind Range2,
+                                 StringRef Option = StringRef()) {
+  if (Range1 != Range2 && Range1 != LangOptions::ComplexRangeKind::CX_None) {
+    bool NegateFortranOption = false;
+    bool NegateLimitedOption = false;
+    if (!Option.empty()) {
+      NegateFortranOption =
+          Range1 == LangOptions::ComplexRangeKind::CX_Fortran &&
+          Option == "-fno-cx-fortran-rules";
+      NegateLimitedOption =
+          Range1 == LangOptions::ComplexRangeKind::CX_Limited &&
+          Option == "-fno-cx-limited-range";
+    }
+    if (Option.empty() ||
+        !Option.empty() && !NegateFortranOption && !NegateLimitedOption)
+      D.Diag(clang::diag::warn_drv_overriding_option)
+          << EnumComplexRangeToStr(Range1, Option)
+          << EnumComplexRangeToStr(Range2, Option);
+  }
 }
 
 static std::string
@@ -2815,7 +2830,8 @@ static void RenderFloatingPointOptions(const ToolChain &TC, const Driver &D,
       break;
     }
     case options::OPT_fno_cx_limited_range:
-      EmitComplexRangeDiag(D, Range, LangOptions::ComplexRangeKind::CX_Full);
+      EmitComplexRangeDiag(D, Range, LangOptions::ComplexRangeKind::CX_Full,
+                           "-fno-cx-limited-range");
       Range = LangOptions::ComplexRangeKind::CX_Full;
       break;
     case options::OPT_fcx_fortran_rules: {
@@ -2824,7 +2840,8 @@ static void RenderFloatingPointOptions(const ToolChain &TC, const Driver &D,
       break;
     }
     case options::OPT_fno_cx_fortran_rules:
-      EmitComplexRangeDiag(D, Range, LangOptions::ComplexRangeKind::CX_Full);
+      EmitComplexRangeDiag(D, Range, LangOptions::ComplexRangeKind::CX_Full,
+                           "-fno-cx-fortran-rules");
       Range = LangOptions::ComplexRangeKind::CX_Full;
       break;
     case options::OPT_ffp_model_EQ: {
diff --git a/clang/test/Driver/range.c b/clang/test/Driver/range.c
index 49116df2f448..2d1fd7f9f1a9 100644
--- a/clang/test/Driver/range.c
+++ b/clang/test/Driver/range.c
@@ -12,12 +12,23 @@
 // RUN: %clang -### -target x86_64 -fcx-fortran-rules -c %s 2>&1 \
 // RUN:   | FileCheck --check-prefix=FRTRN %s
 
+// RUN: %clang -### -target x86_64 -fcx-fortran-rules -c %s 2>&1 \
+// RUN:   -fno-cx-fortran-rules | FileCheck --check-prefix=FULL %s
+
+// RUN: %clang -### -target x86_64 -fcx-fortran-rules -fno-cx-limited-range \
+// RUN: -c %s 2>&1 | FileCheck --check-prefix=WARN3 %s
+
 // RUN: %clang -### -target x86_64 -fno-cx-fortran-rules -c %s 2>&1 \
 // RUN:   | FileCheck  %s
 
-// RUN: %clang -### -target x86_64 -fcx-limited-range \
-// RUN: -fcx-fortran-rules -c %s 2>&1 \
-// RUN:   | FileCheck --check-prefix=WARN1 %s
+// RUN: %clang -### -target x86_64 -fcx-limited-range -fcx-fortran-rules \
+// RUN: -c %s 2>&1 | FileCheck --check-prefix=WARN1 %s
+
+// RUN: %clang -### -target x86_64 -fcx-limited-range -fno-cx-fortran-rules \
+// RUN: -c %s 2>&1 | FileCheck --check-prefix=WARN4 %s
+
+// RUN: %clang -### -target x86_64 -fcx-limited-range -fno-cx-limited-range \
+// RUN: -c %s 2>&1 | FileCheck --check-prefix=FULL %s
 
 // RUN: %clang -### -target x86_64 -fcx-fortran-rules \
 // RUN: -fcx-limited-range  -c %s 2>&1 \
@@ -32,8 +43,8 @@
 // RUN: %clang -### -target x86_64 -fcx-limited-range -ffast-math -c %s 2>&1 \
 // RUN:   | FileCheck --check-prefix=LMTD %s
 
-// RUN: %clang -### -target x86_64 -ffast-math -fno-cx-limited-range -c %s 2>&1 \
-// RUN:   | FileCheck --check-prefix=FULL %s
+// RUN: %clang -### -target x86_64 -ffast-math -fno-cx-limited-range \
+// RUN: -c %s 2>&1 | FileCheck --check-prefix=FULL %s
 
 // RUN: %clang -### -Werror -target x86_64 -fcx-limited-range -c %s 2>&1 \
 // RUN:   | FileCheck --check-prefix=LMTD %s
@@ -50,3 +61,5 @@
 // CHECK-NOT: -complex-range=fortran
 // WARN1: warning: overriding '-fcx-limited-range' option with '-fcx-fortran-rules' [-Woverriding-option]
 // WARN2: warning: overriding '-fcx-fortran-rules' option with '-fcx-limited-range' [-Woverriding-option]
+// WARN3: warning: overriding '-fcx-fortran-rules' option with '-fno-cx-limited-range' [-Woverriding-option]
+// WARN4: warning: overriding '-fcx-limited-range' option with '-fno-cx-fortran-rules' [-Woverriding-option]
-- 
cgit v1.2.3


From 629afd4dea1860755b4b7c05bb48538b8710c9d1 Mon Sep 17 00:00:00 2001
From: Krzysztof Parzyszek <Krzysztof.Parzyszek@amd.com>
Date: Tue, 12 Mar 2024 07:52:48 -0500
Subject: [flang][Lower] Fix use-after-free with TypeRange (#84369)

TypeRange is an iterator range, it does not own storage spanned by the
iterators. When using TypeRange, make sure that the actual contents
don't "expire" while the range is in use.

This was detected by address sanitizer.
---
 flang/lib/Lower/IO.cpp | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/flang/lib/Lower/IO.cpp b/flang/lib/Lower/IO.cpp
index 699897adcd0b..ac82276bcddb 100644
--- a/flang/lib/Lower/IO.cpp
+++ b/flang/lib/Lower/IO.cpp
@@ -242,8 +242,11 @@ static void makeNextConditionalOn(fir::FirOpBuilder &builder,
   // is in a fir.iterate_while loop, the result must be propagated up to the
   // loop scope as an extra ifOp result. (The propagation is done in genIoLoop.)
   mlir::TypeRange resTy;
+  // TypeRange does not own its contents, so make sure the the type object
+  // is live until the end of the function.
+  mlir::IntegerType boolTy = builder.getI1Type();
   if (inLoop)
-    resTy = builder.getI1Type();
+    resTy = boolTy;
   auto ifOp = builder.create<fir::IfOp>(loc, resTy, ok,
                                         /*withElseRegion=*/inLoop);
   builder.setInsertionPointToStart(&ifOp.getThenRegion().front());
-- 
cgit v1.2.3


From c7f1a987a66a1ba0865ecc18adbe4dc8dbc0c788 Mon Sep 17 00:00:00 2001
From: Sven van Haastregt <sven.vanhaastregt@arm.com>
Date: Tue, 12 Mar 2024 12:51:30 +0000
Subject: [OpenCL] Elaborate about BIenqueue_kernel expansion; NFC

---
 clang/lib/CodeGen/CGBuiltin.cpp | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 20c357579391..93ab46507977 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -5460,7 +5460,13 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
   }
 
   // OpenCL v2.0, s6.13.17 - Enqueue kernel function.
-  // It contains four different overload formats specified in Table 6.13.17.1.
+  // Table 6.13.17.1 specifies four overload forms of enqueue_kernel.
+  // The code below expands the builtin call to a call to one of the following
+  // functions that an OpenCL runtime library will have to provide:
+  //   __enqueue_kernel_basic
+  //   __enqueue_kernel_varargs
+  //   __enqueue_kernel_basic_events
+  //   __enqueue_kernel_events_varargs
   case Builtin::BIenqueue_kernel: {
     StringRef Name; // Generated function call name
     unsigned NumArgs = E->getNumArgs();
-- 
cgit v1.2.3


From 871086bf7fad42d610bfe02224662bdc71494a70 Mon Sep 17 00:00:00 2001
From: Krzysztof Parzyszek <Krzysztof.Parzyszek@amd.com>
Date: Tue, 12 Mar 2024 07:53:57 -0500
Subject: [flang] Avoid passing null pointers to nonnull parameters (#84785)

Certain functions in glibc have "nonnull" attributes on pointer
parameters (even in cases where passing a null pointer should be handled
correctly). There are a few cases of such calls in flang: memcmp and
memcpy with the length parameter set to 0.

Avoid passing a null pointer to these functions, since the conflict with
the nonnull attribute could cause an undefined behavior.

This was detected by the undefined behavior sanitizer.
---
 flang/include/flang/Parser/char-block.h |  7 +++++++
 flang/runtime/buffer.h                  | 11 ++++++++---
 flang/runtime/temporary-stack.cpp       |  9 +++++++--
 3 files changed, 22 insertions(+), 5 deletions(-)

diff --git a/flang/include/flang/Parser/char-block.h b/flang/include/flang/Parser/char-block.h
index fc5de2607b51..acd8aee98bf8 100644
--- a/flang/include/flang/Parser/char-block.h
+++ b/flang/include/flang/Parser/char-block.h
@@ -129,6 +129,13 @@ public:
 
 private:
   int Compare(const CharBlock &that) const {
+    // "memcmp" in glibc has "nonnull" attributes on the input pointers.
+    // Avoid passing null pointers, since it would result in an undefined
+    // behavior.
+    if (size() == 0)
+      return that.size() == 0 ? 0 : -1;
+    if (that.size() == 0)
+      return 1;
     std::size_t bytes{std::min(size(), that.size())};
     int cmp{std::memcmp(static_cast<const void *>(begin()),
         static_cast<const void *>(that.begin()), bytes)};
diff --git a/flang/runtime/buffer.h b/flang/runtime/buffer.h
index a77a5a5dda5c..93fda36f500d 100644
--- a/flang/runtime/buffer.h
+++ b/flang/runtime/buffer.h
@@ -148,10 +148,15 @@ private:
       buffer_ =
           reinterpret_cast<char *>(AllocateMemoryOrCrash(terminator, size_));
       auto chunk{std::min<std::int64_t>(length_, oldSize - start_)};
-      std::memcpy(buffer_, old + start_, chunk);
+      // "memcpy" in glibc has a "nonnull" attribute on the source pointer.
+      // Avoid passing a null pointer, since it would result in an undefined
+      // behavior.
+      if (old != nullptr) {
+        std::memcpy(buffer_, old + start_, chunk);
+        std::memcpy(buffer_ + chunk, old, length_ - chunk);
+        FreeMemory(old);
+      }
       start_ = 0;
-      std::memcpy(buffer_ + chunk, old, length_ - chunk);
-      FreeMemory(old);
     }
   }
 
diff --git a/flang/runtime/temporary-stack.cpp b/flang/runtime/temporary-stack.cpp
index b4d7c6064457..667b10e04dbd 100644
--- a/flang/runtime/temporary-stack.cpp
+++ b/flang/runtime/temporary-stack.cpp
@@ -93,8 +93,13 @@ void DescriptorStorage<COPY_VALUES>::resize(size_type newCapacity) {
   }
   Descriptor **newData =
       static_cast<Descriptor **>(AllocateMemoryOrCrash(terminator_, bytes));
-  memcpy(newData, data_, capacity_ * sizeof(Descriptor *));
-  FreeMemory(data_);
+  // "memcpy" in glibc has a "nonnull" attribute on the source pointer.
+  // Avoid passing a null pointer, since it would result in an undefined
+  // behavior.
+  if (data_ != nullptr) {
+    memcpy(newData, data_, capacity_ * sizeof(Descriptor *));
+    FreeMemory(data_);
+  }
   data_ = newData;
   capacity_ = newCapacity;
 }
-- 
cgit v1.2.3


From bde7a6b791872b63456cb4e50e63046728a65196 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Stefan=20Gr=C3=A4nitz?= <stefan.graenitz@gmail.com>
Date: Tue, 12 Mar 2024 13:55:38 +0100
Subject: [clang-repl] Expose CreateExecutor() and ResetExecutor() in extended
 Interpreter interface (#84460)

IncrementalExecutor is an implementation detail of the Interpreter. In
order to test extended features properly, we must be able to setup and
tear down the executor manually.
---
 clang/include/clang/Interpreter/Interpreter.h      |  9 +++++++-
 clang/lib/Interpreter/Interpreter.cpp              |  6 ++++++
 clang/unittests/Interpreter/CMakeLists.txt         |  1 +
 .../Interpreter/InterpreterExtensionsTest.cpp      | 24 ++++++++++++++++++++++
 4 files changed, 39 insertions(+), 1 deletion(-)

diff --git a/clang/include/clang/Interpreter/Interpreter.h b/clang/include/clang/Interpreter/Interpreter.h
index 469ce1fd75bf..1dcba1ef9679 100644
--- a/clang/include/clang/Interpreter/Interpreter.h
+++ b/clang/include/clang/Interpreter/Interpreter.h
@@ -96,7 +96,6 @@ class Interpreter {
   // An optional parser for CUDA offloading
   std::unique_ptr<IncrementalParser> DeviceParser;
 
-  llvm::Error CreateExecutor();
   unsigned InitPTUSize = 0;
 
   // This member holds the last result of the value printing. It's a class
@@ -114,6 +113,14 @@ protected:
   // That's useful for testing and out-of-tree clients.
   Interpreter(std::unique_ptr<CompilerInstance> CI, llvm::Error &Err);
 
+  // Create the internal IncrementalExecutor, or re-create it after calling
+  // ResetExecutor().
+  llvm::Error CreateExecutor();
+
+  // Delete the internal IncrementalExecutor. This causes a hard shutdown of the
+  // JIT engine. In particular, it doesn't run cleanup or destructors.
+  void ResetExecutor();
+
   // Lazily construct the RuntimeInterfaceBuilder. The provided instance will be
   // used for the entire lifetime of the interpreter. The default implementation
   // targets the in-process __clang_Interpreter runtime. Override this to use a
diff --git a/clang/lib/Interpreter/Interpreter.cpp b/clang/lib/Interpreter/Interpreter.cpp
index e293fefb5249..7fa52f2f15fc 100644
--- a/clang/lib/Interpreter/Interpreter.cpp
+++ b/clang/lib/Interpreter/Interpreter.cpp
@@ -375,6 +375,10 @@ Interpreter::Parse(llvm::StringRef Code) {
 llvm::Error Interpreter::CreateExecutor() {
   const clang::TargetInfo &TI =
       getCompilerInstance()->getASTContext().getTargetInfo();
+  if (IncrExecutor)
+    return llvm::make_error<llvm::StringError>("Operation failed. "
+                                               "Execution engine exists",
+                                               std::error_code());
   llvm::Error Err = llvm::Error::success();
   auto Executor = std::make_unique<IncrementalExecutor>(*TSCtx, Err, TI);
   if (!Err)
@@ -383,6 +387,8 @@ llvm::Error Interpreter::CreateExecutor() {
   return Err;
 }
 
+void Interpreter::ResetExecutor() { IncrExecutor.reset(); }
+
 llvm::Error Interpreter::Execute(PartialTranslationUnit &T) {
   assert(T.TheModule);
   if (!IncrExecutor) {
diff --git a/clang/unittests/Interpreter/CMakeLists.txt b/clang/unittests/Interpreter/CMakeLists.txt
index 046d96ad0ec6..498070b43d92 100644
--- a/clang/unittests/Interpreter/CMakeLists.txt
+++ b/clang/unittests/Interpreter/CMakeLists.txt
@@ -4,6 +4,7 @@ set(LLVM_LINK_COMPONENTS
   OrcJIT
   Support
   TargetParser
+  TestingSupport
   )
 
 add_clang_unittest(ClangReplInterpreterTests
diff --git a/clang/unittests/Interpreter/InterpreterExtensionsTest.cpp b/clang/unittests/Interpreter/InterpreterExtensionsTest.cpp
index 4e9f2dba210a..f1c3d65ab0a9 100644
--- a/clang/unittests/Interpreter/InterpreterExtensionsTest.cpp
+++ b/clang/unittests/Interpreter/InterpreterExtensionsTest.cpp
@@ -27,6 +27,30 @@
 using namespace clang;
 namespace {
 
+class TestCreateResetExecutor : public Interpreter {
+public:
+  TestCreateResetExecutor(std::unique_ptr<CompilerInstance> CI,
+                          llvm::Error &Err)
+      : Interpreter(std::move(CI), Err) {}
+
+  llvm::Error testCreateExecutor() { return Interpreter::CreateExecutor(); }
+
+  void resetExecutor() { Interpreter::ResetExecutor(); }
+};
+
+TEST(InterpreterExtensionsTest, ExecutorCreateReset) {
+  clang::IncrementalCompilerBuilder CB;
+  llvm::Error ErrOut = llvm::Error::success();
+  TestCreateResetExecutor Interp(cantFail(CB.CreateCpp()), ErrOut);
+  cantFail(std::move(ErrOut));
+  cantFail(Interp.testCreateExecutor());
+  Interp.resetExecutor();
+  cantFail(Interp.testCreateExecutor());
+  EXPECT_THAT_ERROR(Interp.testCreateExecutor(),
+                    llvm::FailedWithMessage("Operation failed. "
+                                            "Execution engine exists"));
+}
+
 class RecordRuntimeIBMetrics : public Interpreter {
   struct NoopRuntimeInterfaceBuilder : public RuntimeInterfaceBuilder {
     NoopRuntimeInterfaceBuilder(Sema &S) : S(S) {}
-- 
cgit v1.2.3


From ee137e234cb70e24dbfd8903904c5d5eceda3fb0 Mon Sep 17 00:00:00 2001
From: Danial Klimkin <dklimkin@google.com>
Date: Tue, 12 Mar 2024 14:21:04 +0100
Subject: Update BUILD.bazel for 80ab8234ac309418637488b97e0a62d8377b2ecf
 (#84908)

---
 utils/bazel/llvm-project-overlay/clang/unittests/BUILD.bazel | 1 +
 1 file changed, 1 insertion(+)

diff --git a/utils/bazel/llvm-project-overlay/clang/unittests/BUILD.bazel b/utils/bazel/llvm-project-overlay/clang/unittests/BUILD.bazel
index 095efe9babfb..9823027b766c 100644
--- a/utils/bazel/llvm-project-overlay/clang/unittests/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/clang/unittests/BUILD.bazel
@@ -396,6 +396,7 @@ cc_test(
         ":static_analyzer_test_headers",
         "//clang:analysis",
         "//clang:ast",
+        "//clang:ast_matchers",
         "//clang:basic",
         "//clang:frontend",
         "//clang:static_analyzer_core",
-- 
cgit v1.2.3


From f03aaa3c0cca77c15adfbb4544f296bc0441f6fc Mon Sep 17 00:00:00 2001
From: Jie Fu <jiefu@tencent.com>
Date: Tue, 12 Mar 2024 21:24:37 +0800
Subject: [clang] Silence -Wlogical-op-parentheses in Clang.cpp (NFC)

llvm-project/clang/lib/Driver/ToolChains/Clang.cpp:2721:49:
error: '&&' within '||' [-Werror,-Wlogical-op-parentheses]
        !Option.empty() && !NegateFortranOption && !NegateLimitedOption)
        ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^~~~~~~~~~~~~~~~~~~~~~~
llvm-project/clang/lib/Driver/ToolChains/Clang.cpp:2721:49:
note: place parentheses around the '&&' expression to silence this warning
        !Option.empty() && !NegateFortranOption && !NegateLimitedOption)
                                                ^
        (                                                              )
1 error generated.
---
 clang/lib/Driver/ToolChains/Clang.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
index 6246a28a1306..3a7a1cf99c79 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -2718,7 +2718,7 @@ static void EmitComplexRangeDiag(const Driver &D,
           Option == "-fno-cx-limited-range";
     }
     if (Option.empty() ||
-        !Option.empty() && !NegateFortranOption && !NegateLimitedOption)
+        (!Option.empty() && !NegateFortranOption && !NegateLimitedOption))
       D.Diag(clang::diag::warn_drv_overriding_option)
           << EnumComplexRangeToStr(Range1, Option)
           << EnumComplexRangeToStr(Range2, Option);
-- 
cgit v1.2.3


From d96d917f38ab111adf7b1d24616a939e75141a89 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo@fhahn.com>
Date: Tue, 12 Mar 2024 13:23:05 +0000
Subject: [Matrix] Add tests showing mis-compile with lifetime.end and fusion.

Add a set of tests showing miscompiles due to multiply fusion
introducing loads to dead objects after lifetime.end.
---
 .../multiply-fused-lifetime-ends.ll                | 707 +++++++++++++++++++++
 1 file changed, 707 insertions(+)
 create mode 100644 llvm/test/Transforms/LowerMatrixIntrinsics/multiply-fused-lifetime-ends.ll

diff --git a/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-fused-lifetime-ends.ll b/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-fused-lifetime-ends.ll
new file mode 100644
index 000000000000..ef8665b79690
--- /dev/null
+++ b/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-fused-lifetime-ends.ll
@@ -0,0 +1,707 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -passes=lower-matrix-intrinsics -fuse-matrix-tile-size=2 -matrix-allow-contract -force-fuse-matrix %s -S | FileCheck %s
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
+; Tests to make sure no loads are introduced after a lifetime.end by multiply
+; fusion.
+
+; FIXME: Currently the tests are mis-compiled, with loads being introduced after
+;       llvm.lifetime.end calls.
+
+define void @lifetime_for_first_arg_before_multiply(ptr noalias %B, ptr noalias %C) {
+; CHECK-LABEL: @lifetime_for_first_arg_before_multiply(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[A:%.*]] = alloca <4 x double>, align 32
+; CHECK-NEXT:    call void @init(ptr [[A]])
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 -1, ptr [[A]])
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr double, ptr [[A]], i64 0
+; CHECK-NEXT:    [[COL_LOAD:%.*]] = load <2 x double>, ptr [[TMP0]], align 8
+; CHECK-NEXT:    [[VEC_GEP:%.*]] = getelementptr double, ptr [[TMP0]], i64 2
+; CHECK-NEXT:    [[COL_LOAD1:%.*]] = load <2 x double>, ptr [[VEC_GEP]], align 8
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr double, ptr [[B:%.*]], i64 0
+; CHECK-NEXT:    [[COL_LOAD2:%.*]] = load <2 x double>, ptr [[TMP1]], align 8
+; CHECK-NEXT:    [[VEC_GEP3:%.*]] = getelementptr double, ptr [[TMP1]], i64 2
+; CHECK-NEXT:    [[COL_LOAD4:%.*]] = load <2 x double>, ptr [[VEC_GEP3]], align 8
+; CHECK-NEXT:    [[BLOCK:%.*]] = shufflevector <2 x double> [[COL_LOAD]], <2 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x double> [[COL_LOAD2]], i64 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x double> poison, double [[TMP2]], i64 0
+; CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT]], <1 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP3:%.*]] = fmul contract <1 x double> [[BLOCK]], [[SPLAT_SPLAT]]
+; CHECK-NEXT:    [[BLOCK5:%.*]] = shufflevector <2 x double> [[COL_LOAD1]], <2 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <2 x double> [[COL_LOAD2]], i64 1
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT6:%.*]] = insertelement <1 x double> poison, double [[TMP4]], i64 0
+; CHECK-NEXT:    [[SPLAT_SPLAT7:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT6]], <1 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP5:%.*]] = call contract <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[BLOCK5]], <1 x double> [[SPLAT_SPLAT7]], <1 x double> [[TMP3]])
+; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <1 x double> [[TMP5]], <1 x double> poison, <2 x i32> <i32 0, i32 poison>
+; CHECK-NEXT:    [[TMP7:%.*]] = shufflevector <2 x double> zeroinitializer, <2 x double> [[TMP6]], <2 x i32> <i32 2, i32 1>
+; CHECK-NEXT:    [[BLOCK8:%.*]] = shufflevector <2 x double> [[TMP7]], <2 x double> poison, <1 x i32> <i32 1>
+; CHECK-NEXT:    [[BLOCK9:%.*]] = shufflevector <2 x double> [[COL_LOAD]], <2 x double> poison, <1 x i32> <i32 1>
+; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <2 x double> [[COL_LOAD2]], i64 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT10:%.*]] = insertelement <1 x double> poison, double [[TMP8]], i64 0
+; CHECK-NEXT:    [[SPLAT_SPLAT11:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT10]], <1 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP9:%.*]] = fmul contract <1 x double> [[BLOCK9]], [[SPLAT_SPLAT11]]
+; CHECK-NEXT:    [[BLOCK12:%.*]] = shufflevector <2 x double> [[COL_LOAD1]], <2 x double> poison, <1 x i32> <i32 1>
+; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <2 x double> [[COL_LOAD2]], i64 1
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT13:%.*]] = insertelement <1 x double> poison, double [[TMP10]], i64 0
+; CHECK-NEXT:    [[SPLAT_SPLAT14:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT13]], <1 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP11:%.*]] = call contract <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[BLOCK12]], <1 x double> [[SPLAT_SPLAT14]], <1 x double> [[TMP9]])
+; CHECK-NEXT:    [[TMP12:%.*]] = shufflevector <1 x double> [[TMP11]], <1 x double> poison, <2 x i32> <i32 0, i32 poison>
+; CHECK-NEXT:    [[TMP13:%.*]] = shufflevector <2 x double> [[TMP7]], <2 x double> [[TMP12]], <2 x i32> <i32 0, i32 2>
+; CHECK-NEXT:    [[BLOCK15:%.*]] = shufflevector <2 x double> [[COL_LOAD]], <2 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP14:%.*]] = extractelement <2 x double> [[COL_LOAD4]], i64 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT16:%.*]] = insertelement <1 x double> poison, double [[TMP14]], i64 0
+; CHECK-NEXT:    [[SPLAT_SPLAT17:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT16]], <1 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP15:%.*]] = fmul contract <1 x double> [[BLOCK15]], [[SPLAT_SPLAT17]]
+; CHECK-NEXT:    [[BLOCK18:%.*]] = shufflevector <2 x double> [[COL_LOAD1]], <2 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP16:%.*]] = extractelement <2 x double> [[COL_LOAD4]], i64 1
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT19:%.*]] = insertelement <1 x double> poison, double [[TMP16]], i64 0
+; CHECK-NEXT:    [[SPLAT_SPLAT20:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT19]], <1 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP17:%.*]] = call contract <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[BLOCK18]], <1 x double> [[SPLAT_SPLAT20]], <1 x double> [[TMP15]])
+; CHECK-NEXT:    [[TMP18:%.*]] = shufflevector <1 x double> [[TMP17]], <1 x double> poison, <2 x i32> <i32 0, i32 poison>
+; CHECK-NEXT:    [[TMP19:%.*]] = shufflevector <2 x double> zeroinitializer, <2 x double> [[TMP18]], <2 x i32> <i32 2, i32 1>
+; CHECK-NEXT:    [[BLOCK21:%.*]] = shufflevector <2 x double> [[TMP19]], <2 x double> poison, <1 x i32> <i32 1>
+; CHECK-NEXT:    [[BLOCK22:%.*]] = shufflevector <2 x double> [[COL_LOAD]], <2 x double> poison, <1 x i32> <i32 1>
+; CHECK-NEXT:    [[TMP20:%.*]] = extractelement <2 x double> [[COL_LOAD4]], i64 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT23:%.*]] = insertelement <1 x double> poison, double [[TMP20]], i64 0
+; CHECK-NEXT:    [[SPLAT_SPLAT24:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT23]], <1 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP21:%.*]] = fmul contract <1 x double> [[BLOCK22]], [[SPLAT_SPLAT24]]
+; CHECK-NEXT:    [[BLOCK25:%.*]] = shufflevector <2 x double> [[COL_LOAD1]], <2 x double> poison, <1 x i32> <i32 1>
+; CHECK-NEXT:    [[TMP22:%.*]] = extractelement <2 x double> [[COL_LOAD4]], i64 1
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT26:%.*]] = insertelement <1 x double> poison, double [[TMP22]], i64 0
+; CHECK-NEXT:    [[SPLAT_SPLAT27:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT26]], <1 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP23:%.*]] = call contract <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[BLOCK25]], <1 x double> [[SPLAT_SPLAT27]], <1 x double> [[TMP21]])
+; CHECK-NEXT:    [[TMP24:%.*]] = shufflevector <1 x double> [[TMP23]], <1 x double> poison, <2 x i32> <i32 0, i32 poison>
+; CHECK-NEXT:    [[TMP25:%.*]] = shufflevector <2 x double> [[TMP19]], <2 x double> [[TMP24]], <2 x i32> <i32 0, i32 2>
+; CHECK-NEXT:    [[TMP26:%.*]] = getelementptr double, ptr [[C:%.*]], i64 0
+; CHECK-NEXT:    store <2 x double> [[TMP13]], ptr [[TMP26]], align 8
+; CHECK-NEXT:    [[VEC_GEP28:%.*]] = getelementptr double, ptr [[TMP26]], i64 2
+; CHECK-NEXT:    store <2 x double> [[TMP25]], ptr [[VEC_GEP28]], align 8
+; CHECK-NEXT:    ret void
+;
+entry:
+  %A = alloca <4 x double>
+  call void @init(ptr %A)
+  %a = load <4 x double>, ptr %A, align 8
+  %b = load <4 x double>, ptr %B, align 8
+  call void @llvm.lifetime.end(i64 -1, ptr %A)
+  %c = call <4 x double> @llvm.matrix.multiply(<4 x double> %a, <4 x double> %b, i32 2, i32 2, i32 2)
+  store <4 x double> %c, ptr %C, align 8
+  ret void
+}
+
+define void @lifetime_for_second_arg_before_multiply(ptr noalias %A, ptr noalias %C) {
+; CHECK-LABEL: @lifetime_for_second_arg_before_multiply(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[B:%.*]] = alloca <4 x double>, align 32
+; CHECK-NEXT:    call void @init(ptr [[B]])
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 -1, ptr [[B]])
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr double, ptr [[A:%.*]], i64 0
+; CHECK-NEXT:    [[COL_LOAD:%.*]] = load <2 x double>, ptr [[TMP0]], align 8
+; CHECK-NEXT:    [[VEC_GEP:%.*]] = getelementptr double, ptr [[TMP0]], i64 2
+; CHECK-NEXT:    [[COL_LOAD1:%.*]] = load <2 x double>, ptr [[VEC_GEP]], align 8
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr double, ptr [[B]], i64 0
+; CHECK-NEXT:    [[COL_LOAD2:%.*]] = load <2 x double>, ptr [[TMP1]], align 8
+; CHECK-NEXT:    [[VEC_GEP3:%.*]] = getelementptr double, ptr [[TMP1]], i64 2
+; CHECK-NEXT:    [[COL_LOAD4:%.*]] = load <2 x double>, ptr [[VEC_GEP3]], align 8
+; CHECK-NEXT:    [[BLOCK:%.*]] = shufflevector <2 x double> [[COL_LOAD]], <2 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x double> [[COL_LOAD2]], i64 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x double> poison, double [[TMP2]], i64 0
+; CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT]], <1 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP3:%.*]] = fmul contract <1 x double> [[BLOCK]], [[SPLAT_SPLAT]]
+; CHECK-NEXT:    [[BLOCK5:%.*]] = shufflevector <2 x double> [[COL_LOAD1]], <2 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <2 x double> [[COL_LOAD2]], i64 1
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT6:%.*]] = insertelement <1 x double> poison, double [[TMP4]], i64 0
+; CHECK-NEXT:    [[SPLAT_SPLAT7:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT6]], <1 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP5:%.*]] = call contract <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[BLOCK5]], <1 x double> [[SPLAT_SPLAT7]], <1 x double> [[TMP3]])
+; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <1 x double> [[TMP5]], <1 x double> poison, <2 x i32> <i32 0, i32 poison>
+; CHECK-NEXT:    [[TMP7:%.*]] = shufflevector <2 x double> zeroinitializer, <2 x double> [[TMP6]], <2 x i32> <i32 2, i32 1>
+; CHECK-NEXT:    [[BLOCK8:%.*]] = shufflevector <2 x double> [[TMP7]], <2 x double> poison, <1 x i32> <i32 1>
+; CHECK-NEXT:    [[BLOCK9:%.*]] = shufflevector <2 x double> [[COL_LOAD]], <2 x double> poison, <1 x i32> <i32 1>
+; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <2 x double> [[COL_LOAD2]], i64 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT10:%.*]] = insertelement <1 x double> poison, double [[TMP8]], i64 0
+; CHECK-NEXT:    [[SPLAT_SPLAT11:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT10]], <1 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP9:%.*]] = fmul contract <1 x double> [[BLOCK9]], [[SPLAT_SPLAT11]]
+; CHECK-NEXT:    [[BLOCK12:%.*]] = shufflevector <2 x double> [[COL_LOAD1]], <2 x double> poison, <1 x i32> <i32 1>
+; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <2 x double> [[COL_LOAD2]], i64 1
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT13:%.*]] = insertelement <1 x double> poison, double [[TMP10]], i64 0
+; CHECK-NEXT:    [[SPLAT_SPLAT14:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT13]], <1 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP11:%.*]] = call contract <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[BLOCK12]], <1 x double> [[SPLAT_SPLAT14]], <1 x double> [[TMP9]])
+; CHECK-NEXT:    [[TMP12:%.*]] = shufflevector <1 x double> [[TMP11]], <1 x double> poison, <2 x i32> <i32 0, i32 poison>
+; CHECK-NEXT:    [[TMP13:%.*]] = shufflevector <2 x double> [[TMP7]], <2 x double> [[TMP12]], <2 x i32> <i32 0, i32 2>
+; CHECK-NEXT:    [[BLOCK15:%.*]] = shufflevector <2 x double> [[COL_LOAD]], <2 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP14:%.*]] = extractelement <2 x double> [[COL_LOAD4]], i64 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT16:%.*]] = insertelement <1 x double> poison, double [[TMP14]], i64 0
+; CHECK-NEXT:    [[SPLAT_SPLAT17:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT16]], <1 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP15:%.*]] = fmul contract <1 x double> [[BLOCK15]], [[SPLAT_SPLAT17]]
+; CHECK-NEXT:    [[BLOCK18:%.*]] = shufflevector <2 x double> [[COL_LOAD1]], <2 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP16:%.*]] = extractelement <2 x double> [[COL_LOAD4]], i64 1
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT19:%.*]] = insertelement <1 x double> poison, double [[TMP16]], i64 0
+; CHECK-NEXT:    [[SPLAT_SPLAT20:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT19]], <1 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP17:%.*]] = call contract <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[BLOCK18]], <1 x double> [[SPLAT_SPLAT20]], <1 x double> [[TMP15]])
+; CHECK-NEXT:    [[TMP18:%.*]] = shufflevector <1 x double> [[TMP17]], <1 x double> poison, <2 x i32> <i32 0, i32 poison>
+; CHECK-NEXT:    [[TMP19:%.*]] = shufflevector <2 x double> zeroinitializer, <2 x double> [[TMP18]], <2 x i32> <i32 2, i32 1>
+; CHECK-NEXT:    [[BLOCK21:%.*]] = shufflevector <2 x double> [[TMP19]], <2 x double> poison, <1 x i32> <i32 1>
+; CHECK-NEXT:    [[BLOCK22:%.*]] = shufflevector <2 x double> [[COL_LOAD]], <2 x double> poison, <1 x i32> <i32 1>
+; CHECK-NEXT:    [[TMP20:%.*]] = extractelement <2 x double> [[COL_LOAD4]], i64 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT23:%.*]] = insertelement <1 x double> poison, double [[TMP20]], i64 0
+; CHECK-NEXT:    [[SPLAT_SPLAT24:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT23]], <1 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP21:%.*]] = fmul contract <1 x double> [[BLOCK22]], [[SPLAT_SPLAT24]]
+; CHECK-NEXT:    [[BLOCK25:%.*]] = shufflevector <2 x double> [[COL_LOAD1]], <2 x double> poison, <1 x i32> <i32 1>
+; CHECK-NEXT:    [[TMP22:%.*]] = extractelement <2 x double> [[COL_LOAD4]], i64 1
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT26:%.*]] = insertelement <1 x double> poison, double [[TMP22]], i64 0
+; CHECK-NEXT:    [[SPLAT_SPLAT27:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT26]], <1 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP23:%.*]] = call contract <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[BLOCK25]], <1 x double> [[SPLAT_SPLAT27]], <1 x double> [[TMP21]])
+; CHECK-NEXT:    [[TMP24:%.*]] = shufflevector <1 x double> [[TMP23]], <1 x double> poison, <2 x i32> <i32 0, i32 poison>
+; CHECK-NEXT:    [[TMP25:%.*]] = shufflevector <2 x double> [[TMP19]], <2 x double> [[TMP24]], <2 x i32> <i32 0, i32 2>
+; CHECK-NEXT:    [[TMP26:%.*]] = getelementptr double, ptr [[C:%.*]], i64 0
+; CHECK-NEXT:    store <2 x double> [[TMP13]], ptr [[TMP26]], align 8
+; CHECK-NEXT:    [[VEC_GEP28:%.*]] = getelementptr double, ptr [[TMP26]], i64 2
+; CHECK-NEXT:    store <2 x double> [[TMP25]], ptr [[VEC_GEP28]], align 8
+; CHECK-NEXT:    ret void
+;
+entry:
+  %B = alloca <4 x double>
+  call void @init(ptr %B)
+  %a = load <4 x double>, ptr %A, align 8
+  %b = load <4 x double>, ptr %B, align 8
+  call void @llvm.lifetime.end(i64 -1, ptr %B)
+  %c = call <4 x double> @llvm.matrix.multiply(<4 x double> %a, <4 x double> %b, i32 2, i32 2, i32 2)
+  store <4 x double> %c, ptr %C, align 8
+  ret void
+}
+
+define void @lifetime_for_first_arg_before_multiply_load_from_offset(ptr noalias %B, ptr noalias %C) {
+; CHECK-LABEL: @lifetime_for_first_arg_before_multiply_load_from_offset(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[A:%.*]] = alloca <8 x double>, align 64
+; CHECK-NEXT:    call void @init(ptr [[A]])
+; CHECK-NEXT:    [[GEP_8:%.*]] = getelementptr i8, ptr [[A]], i64 8
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 -1, ptr [[A]])
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr double, ptr [[GEP_8]], i64 0
+; CHECK-NEXT:    [[COL_LOAD:%.*]] = load <2 x double>, ptr [[TMP0]], align 8
+; CHECK-NEXT:    [[VEC_GEP:%.*]] = getelementptr double, ptr [[TMP0]], i64 2
+; CHECK-NEXT:    [[COL_LOAD1:%.*]] = load <2 x double>, ptr [[VEC_GEP]], align 8
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr double, ptr [[B:%.*]], i64 0
+; CHECK-NEXT:    [[COL_LOAD2:%.*]] = load <2 x double>, ptr [[TMP1]], align 8
+; CHECK-NEXT:    [[VEC_GEP3:%.*]] = getelementptr double, ptr [[TMP1]], i64 2
+; CHECK-NEXT:    [[COL_LOAD4:%.*]] = load <2 x double>, ptr [[VEC_GEP3]], align 8
+; CHECK-NEXT:    [[BLOCK:%.*]] = shufflevector <2 x double> [[COL_LOAD]], <2 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x double> [[COL_LOAD2]], i64 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x double> poison, double [[TMP2]], i64 0
+; CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT]], <1 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP3:%.*]] = fmul contract <1 x double> [[BLOCK]], [[SPLAT_SPLAT]]
+; CHECK-NEXT:    [[BLOCK5:%.*]] = shufflevector <2 x double> [[COL_LOAD1]], <2 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <2 x double> [[COL_LOAD2]], i64 1
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT6:%.*]] = insertelement <1 x double> poison, double [[TMP4]], i64 0
+; CHECK-NEXT:    [[SPLAT_SPLAT7:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT6]], <1 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP5:%.*]] = call contract <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[BLOCK5]], <1 x double> [[SPLAT_SPLAT7]], <1 x double> [[TMP3]])
+; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <1 x double> [[TMP5]], <1 x double> poison, <2 x i32> <i32 0, i32 poison>
+; CHECK-NEXT:    [[TMP7:%.*]] = shufflevector <2 x double> zeroinitializer, <2 x double> [[TMP6]], <2 x i32> <i32 2, i32 1>
+; CHECK-NEXT:    [[BLOCK8:%.*]] = shufflevector <2 x double> [[TMP7]], <2 x double> poison, <1 x i32> <i32 1>
+; CHECK-NEXT:    [[BLOCK9:%.*]] = shufflevector <2 x double> [[COL_LOAD]], <2 x double> poison, <1 x i32> <i32 1>
+; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <2 x double> [[COL_LOAD2]], i64 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT10:%.*]] = insertelement <1 x double> poison, double [[TMP8]], i64 0
+; CHECK-NEXT:    [[SPLAT_SPLAT11:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT10]], <1 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP9:%.*]] = fmul contract <1 x double> [[BLOCK9]], [[SPLAT_SPLAT11]]
+; CHECK-NEXT:    [[BLOCK12:%.*]] = shufflevector <2 x double> [[COL_LOAD1]], <2 x double> poison, <1 x i32> <i32 1>
+; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <2 x double> [[COL_LOAD2]], i64 1
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT13:%.*]] = insertelement <1 x double> poison, double [[TMP10]], i64 0
+; CHECK-NEXT:    [[SPLAT_SPLAT14:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT13]], <1 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP11:%.*]] = call contract <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[BLOCK12]], <1 x double> [[SPLAT_SPLAT14]], <1 x double> [[TMP9]])
+; CHECK-NEXT:    [[TMP12:%.*]] = shufflevector <1 x double> [[TMP11]], <1 x double> poison, <2 x i32> <i32 0, i32 poison>
+; CHECK-NEXT:    [[TMP13:%.*]] = shufflevector <2 x double> [[TMP7]], <2 x double> [[TMP12]], <2 x i32> <i32 0, i32 2>
+; CHECK-NEXT:    [[BLOCK15:%.*]] = shufflevector <2 x double> [[COL_LOAD]], <2 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP14:%.*]] = extractelement <2 x double> [[COL_LOAD4]], i64 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT16:%.*]] = insertelement <1 x double> poison, double [[TMP14]], i64 0
+; CHECK-NEXT:    [[SPLAT_SPLAT17:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT16]], <1 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP15:%.*]] = fmul contract <1 x double> [[BLOCK15]], [[SPLAT_SPLAT17]]
+; CHECK-NEXT:    [[BLOCK18:%.*]] = shufflevector <2 x double> [[COL_LOAD1]], <2 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP16:%.*]] = extractelement <2 x double> [[COL_LOAD4]], i64 1
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT19:%.*]] = insertelement <1 x double> poison, double [[TMP16]], i64 0
+; CHECK-NEXT:    [[SPLAT_SPLAT20:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT19]], <1 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP17:%.*]] = call contract <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[BLOCK18]], <1 x double> [[SPLAT_SPLAT20]], <1 x double> [[TMP15]])
+; CHECK-NEXT:    [[TMP18:%.*]] = shufflevector <1 x double> [[TMP17]], <1 x double> poison, <2 x i32> <i32 0, i32 poison>
+; CHECK-NEXT:    [[TMP19:%.*]] = shufflevector <2 x double> zeroinitializer, <2 x double> [[TMP18]], <2 x i32> <i32 2, i32 1>
+; CHECK-NEXT:    [[BLOCK21:%.*]] = shufflevector <2 x double> [[TMP19]], <2 x double> poison, <1 x i32> <i32 1>
+; CHECK-NEXT:    [[BLOCK22:%.*]] = shufflevector <2 x double> [[COL_LOAD]], <2 x double> poison, <1 x i32> <i32 1>
+; CHECK-NEXT:    [[TMP20:%.*]] = extractelement <2 x double> [[COL_LOAD4]], i64 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT23:%.*]] = insertelement <1 x double> poison, double [[TMP20]], i64 0
+; CHECK-NEXT:    [[SPLAT_SPLAT24:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT23]], <1 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP21:%.*]] = fmul contract <1 x double> [[BLOCK22]], [[SPLAT_SPLAT24]]
+; CHECK-NEXT:    [[BLOCK25:%.*]] = shufflevector <2 x double> [[COL_LOAD1]], <2 x double> poison, <1 x i32> <i32 1>
+; CHECK-NEXT:    [[TMP22:%.*]] = extractelement <2 x double> [[COL_LOAD4]], i64 1
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT26:%.*]] = insertelement <1 x double> poison, double [[TMP22]], i64 0
+; CHECK-NEXT:    [[SPLAT_SPLAT27:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT26]], <1 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP23:%.*]] = call contract <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[BLOCK25]], <1 x double> [[SPLAT_SPLAT27]], <1 x double> [[TMP21]])
+; CHECK-NEXT:    [[TMP24:%.*]] = shufflevector <1 x double> [[TMP23]], <1 x double> poison, <2 x i32> <i32 0, i32 poison>
+; CHECK-NEXT:    [[TMP25:%.*]] = shufflevector <2 x double> [[TMP19]], <2 x double> [[TMP24]], <2 x i32> <i32 0, i32 2>
+; CHECK-NEXT:    [[TMP26:%.*]] = getelementptr double, ptr [[C:%.*]], i64 0
+; CHECK-NEXT:    store <2 x double> [[TMP13]], ptr [[TMP26]], align 8
+; CHECK-NEXT:    [[VEC_GEP28:%.*]] = getelementptr double, ptr [[TMP26]], i64 2
+; CHECK-NEXT:    store <2 x double> [[TMP25]], ptr [[VEC_GEP28]], align 8
+; CHECK-NEXT:    ret void
+;
+entry:
+  %A = alloca <8 x double>
+  call void @init(ptr %A)
+  %gep.8 = getelementptr i8, ptr %A, i64 8
+  %a = load <4 x double>, ptr %gep.8, align 8
+  %b = load <4 x double>, ptr %B, align 8
+  call void @llvm.lifetime.end(i64 -1, ptr %A)
+  %c = call <4 x double> @llvm.matrix.multiply(<4 x double> %a, <4 x double> %b, i32 2, i32 2, i32 2)
+  store <4 x double> %c, ptr %C, align 8
+  ret void
+}
+
+define void @lifetime_for_first_arg_before_multiply_lifetime_does_not_dominate(ptr noalias %B, ptr noalias %C, i1 %c.0) {
+; CHECK-LABEL: @lifetime_for_first_arg_before_multiply_lifetime_does_not_dominate(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[A:%.*]] = alloca <4 x double>, align 32
+; CHECK-NEXT:    call void @init(ptr [[A]])
+; CHECK-NEXT:    br i1 [[C:%.*]], label [[THEN:%.*]], label [[EXIT:%.*]]
+; CHECK:       then:
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 -1, ptr [[A]])
+; CHECK-NEXT:    br label [[EXIT]]
+; CHECK:       exit:
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr double, ptr [[A]], i64 0
+; CHECK-NEXT:    [[COL_LOAD:%.*]] = load <2 x double>, ptr [[TMP0]], align 8
+; CHECK-NEXT:    [[VEC_GEP:%.*]] = getelementptr double, ptr [[TMP0]], i64 2
+; CHECK-NEXT:    [[COL_LOAD1:%.*]] = load <2 x double>, ptr [[VEC_GEP]], align 8
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr double, ptr [[B:%.*]], i64 0
+; CHECK-NEXT:    [[COL_LOAD2:%.*]] = load <2 x double>, ptr [[TMP1]], align 8
+; CHECK-NEXT:    [[VEC_GEP3:%.*]] = getelementptr double, ptr [[TMP1]], i64 2
+; CHECK-NEXT:    [[COL_LOAD4:%.*]] = load <2 x double>, ptr [[VEC_GEP3]], align 8
+; CHECK-NEXT:    [[BLOCK:%.*]] = shufflevector <2 x double> [[COL_LOAD]], <2 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x double> [[COL_LOAD2]], i64 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x double> poison, double [[TMP2]], i64 0
+; CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT]], <1 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP3:%.*]] = fmul contract <1 x double> [[BLOCK]], [[SPLAT_SPLAT]]
+; CHECK-NEXT:    [[BLOCK5:%.*]] = shufflevector <2 x double> [[COL_LOAD1]], <2 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <2 x double> [[COL_LOAD2]], i64 1
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT6:%.*]] = insertelement <1 x double> poison, double [[TMP4]], i64 0
+; CHECK-NEXT:    [[SPLAT_SPLAT7:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT6]], <1 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP5:%.*]] = call contract <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[BLOCK5]], <1 x double> [[SPLAT_SPLAT7]], <1 x double> [[TMP3]])
+; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <1 x double> [[TMP5]], <1 x double> poison, <2 x i32> <i32 0, i32 poison>
+; CHECK-NEXT:    [[TMP7:%.*]] = shufflevector <2 x double> zeroinitializer, <2 x double> [[TMP6]], <2 x i32> <i32 2, i32 1>
+; CHECK-NEXT:    [[BLOCK8:%.*]] = shufflevector <2 x double> [[TMP7]], <2 x double> poison, <1 x i32> <i32 1>
+; CHECK-NEXT:    [[BLOCK9:%.*]] = shufflevector <2 x double> [[COL_LOAD]], <2 x double> poison, <1 x i32> <i32 1>
+; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <2 x double> [[COL_LOAD2]], i64 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT10:%.*]] = insertelement <1 x double> poison, double [[TMP8]], i64 0
+; CHECK-NEXT:    [[SPLAT_SPLAT11:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT10]], <1 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP9:%.*]] = fmul contract <1 x double> [[BLOCK9]], [[SPLAT_SPLAT11]]
+; CHECK-NEXT:    [[BLOCK12:%.*]] = shufflevector <2 x double> [[COL_LOAD1]], <2 x double> poison, <1 x i32> <i32 1>
+; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <2 x double> [[COL_LOAD2]], i64 1
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT13:%.*]] = insertelement <1 x double> poison, double [[TMP10]], i64 0
+; CHECK-NEXT:    [[SPLAT_SPLAT14:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT13]], <1 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP11:%.*]] = call contract <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[BLOCK12]], <1 x double> [[SPLAT_SPLAT14]], <1 x double> [[TMP9]])
+; CHECK-NEXT:    [[TMP12:%.*]] = shufflevector <1 x double> [[TMP11]], <1 x double> poison, <2 x i32> <i32 0, i32 poison>
+; CHECK-NEXT:    [[TMP13:%.*]] = shufflevector <2 x double> [[TMP7]], <2 x double> [[TMP12]], <2 x i32> <i32 0, i32 2>
+; CHECK-NEXT:    [[BLOCK15:%.*]] = shufflevector <2 x double> [[COL_LOAD]], <2 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP14:%.*]] = extractelement <2 x double> [[COL_LOAD4]], i64 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT16:%.*]] = insertelement <1 x double> poison, double [[TMP14]], i64 0
+; CHECK-NEXT:    [[SPLAT_SPLAT17:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT16]], <1 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP15:%.*]] = fmul contract <1 x double> [[BLOCK15]], [[SPLAT_SPLAT17]]
+; CHECK-NEXT:    [[BLOCK18:%.*]] = shufflevector <2 x double> [[COL_LOAD1]], <2 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP16:%.*]] = extractelement <2 x double> [[COL_LOAD4]], i64 1
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT19:%.*]] = insertelement <1 x double> poison, double [[TMP16]], i64 0
+; CHECK-NEXT:    [[SPLAT_SPLAT20:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT19]], <1 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP17:%.*]] = call contract <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[BLOCK18]], <1 x double> [[SPLAT_SPLAT20]], <1 x double> [[TMP15]])
+; CHECK-NEXT:    [[TMP18:%.*]] = shufflevector <1 x double> [[TMP17]], <1 x double> poison, <2 x i32> <i32 0, i32 poison>
+; CHECK-NEXT:    [[TMP19:%.*]] = shufflevector <2 x double> zeroinitializer, <2 x double> [[TMP18]], <2 x i32> <i32 2, i32 1>
+; CHECK-NEXT:    [[BLOCK21:%.*]] = shufflevector <2 x double> [[TMP19]], <2 x double> poison, <1 x i32> <i32 1>
+; CHECK-NEXT:    [[BLOCK22:%.*]] = shufflevector <2 x double> [[COL_LOAD]], <2 x double> poison, <1 x i32> <i32 1>
+; CHECK-NEXT:    [[TMP20:%.*]] = extractelement <2 x double> [[COL_LOAD4]], i64 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT23:%.*]] = insertelement <1 x double> poison, double [[TMP20]], i64 0
+; CHECK-NEXT:    [[SPLAT_SPLAT24:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT23]], <1 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP21:%.*]] = fmul contract <1 x double> [[BLOCK22]], [[SPLAT_SPLAT24]]
+; CHECK-NEXT:    [[BLOCK25:%.*]] = shufflevector <2 x double> [[COL_LOAD1]], <2 x double> poison, <1 x i32> <i32 1>
+; CHECK-NEXT:    [[TMP22:%.*]] = extractelement <2 x double> [[COL_LOAD4]], i64 1
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT26:%.*]] = insertelement <1 x double> poison, double [[TMP22]], i64 0
+; CHECK-NEXT:    [[SPLAT_SPLAT27:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT26]], <1 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP23:%.*]] = call contract <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[BLOCK25]], <1 x double> [[SPLAT_SPLAT27]], <1 x double> [[TMP21]])
+; CHECK-NEXT:    [[TMP24:%.*]] = shufflevector <1 x double> [[TMP23]], <1 x double> poison, <2 x i32> <i32 0, i32 poison>
+; CHECK-NEXT:    [[TMP25:%.*]] = shufflevector <2 x double> [[TMP19]], <2 x double> [[TMP24]], <2 x i32> <i32 0, i32 2>
+; CHECK-NEXT:    [[TMP26:%.*]] = getelementptr double, ptr [[C1:%.*]], i64 0
+; CHECK-NEXT:    store <2 x double> [[TMP13]], ptr [[TMP26]], align 8
+; CHECK-NEXT:    [[VEC_GEP28:%.*]] = getelementptr double, ptr [[TMP26]], i64 2
+; CHECK-NEXT:    store <2 x double> [[TMP25]], ptr [[VEC_GEP28]], align 8
+; CHECK-NEXT:    ret void
+;
+entry:
+  %A = alloca <4 x double>
+  call void @init(ptr %A)
+  %a = load <4 x double>, ptr %A, align 8
+  %b = load <4 x double>, ptr %B, align 8
+  br i1 %c.0, label %then, label %exit
+
+then:
+  call void @llvm.lifetime.end(i64 -1, ptr %A)
+  br label %exit
+
+exit:
+  %m = call <4 x double> @llvm.matrix.multiply(<4 x double> %a, <4 x double> %b, i32 2, i32 2, i32 2)
+  store <4 x double> %m, ptr %C, align 8
+  ret void
+}
+
+define void @lifetime_for_second_arg_before_multiply_lifetime_does_not_dominate(ptr noalias %A, ptr noalias %C, i1 %c.0) {
+; CHECK-LABEL: @lifetime_for_second_arg_before_multiply_lifetime_does_not_dominate(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[B:%.*]] = alloca <4 x double>, align 32
+; CHECK-NEXT:    call void @init(ptr [[B]])
+; CHECK-NEXT:    br i1 [[C:%.*]], label [[THEN:%.*]], label [[EXIT:%.*]]
+; CHECK:       then:
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 -1, ptr [[B]])
+; CHECK-NEXT:    br label [[EXIT]]
+; CHECK:       exit:
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr double, ptr [[A:%.*]], i64 0
+; CHECK-NEXT:    [[COL_LOAD:%.*]] = load <2 x double>, ptr [[TMP0]], align 8
+; CHECK-NEXT:    [[VEC_GEP:%.*]] = getelementptr double, ptr [[TMP0]], i64 2
+; CHECK-NEXT:    [[COL_LOAD1:%.*]] = load <2 x double>, ptr [[VEC_GEP]], align 8
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr double, ptr [[B]], i64 0
+; CHECK-NEXT:    [[COL_LOAD2:%.*]] = load <2 x double>, ptr [[TMP1]], align 8
+; CHECK-NEXT:    [[VEC_GEP3:%.*]] = getelementptr double, ptr [[TMP1]], i64 2
+; CHECK-NEXT:    [[COL_LOAD4:%.*]] = load <2 x double>, ptr [[VEC_GEP3]], align 8
+; CHECK-NEXT:    [[BLOCK:%.*]] = shufflevector <2 x double> [[COL_LOAD]], <2 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x double> [[COL_LOAD2]], i64 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x double> poison, double [[TMP2]], i64 0
+; CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT]], <1 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP3:%.*]] = fmul contract <1 x double> [[BLOCK]], [[SPLAT_SPLAT]]
+; CHECK-NEXT:    [[BLOCK5:%.*]] = shufflevector <2 x double> [[COL_LOAD1]], <2 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <2 x double> [[COL_LOAD2]], i64 1
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT6:%.*]] = insertelement <1 x double> poison, double [[TMP4]], i64 0
+; CHECK-NEXT:    [[SPLAT_SPLAT7:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT6]], <1 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP5:%.*]] = call contract <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[BLOCK5]], <1 x double> [[SPLAT_SPLAT7]], <1 x double> [[TMP3]])
+; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <1 x double> [[TMP5]], <1 x double> poison, <2 x i32> <i32 0, i32 poison>
+; CHECK-NEXT:    [[TMP7:%.*]] = shufflevector <2 x double> zeroinitializer, <2 x double> [[TMP6]], <2 x i32> <i32 2, i32 1>
+; CHECK-NEXT:    [[BLOCK8:%.*]] = shufflevector <2 x double> [[TMP7]], <2 x double> poison, <1 x i32> <i32 1>
+; CHECK-NEXT:    [[BLOCK9:%.*]] = shufflevector <2 x double> [[COL_LOAD]], <2 x double> poison, <1 x i32> <i32 1>
+; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <2 x double> [[COL_LOAD2]], i64 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT10:%.*]] = insertelement <1 x double> poison, double [[TMP8]], i64 0
+; CHECK-NEXT:    [[SPLAT_SPLAT11:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT10]], <1 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP9:%.*]] = fmul contract <1 x double> [[BLOCK9]], [[SPLAT_SPLAT11]]
+; CHECK-NEXT:    [[BLOCK12:%.*]] = shufflevector <2 x double> [[COL_LOAD1]], <2 x double> poison, <1 x i32> <i32 1>
+; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <2 x double> [[COL_LOAD2]], i64 1
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT13:%.*]] = insertelement <1 x double> poison, double [[TMP10]], i64 0
+; CHECK-NEXT:    [[SPLAT_SPLAT14:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT13]], <1 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP11:%.*]] = call contract <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[BLOCK12]], <1 x double> [[SPLAT_SPLAT14]], <1 x double> [[TMP9]])
+; CHECK-NEXT:    [[TMP12:%.*]] = shufflevector <1 x double> [[TMP11]], <1 x double> poison, <2 x i32> <i32 0, i32 poison>
+; CHECK-NEXT:    [[TMP13:%.*]] = shufflevector <2 x double> [[TMP7]], <2 x double> [[TMP12]], <2 x i32> <i32 0, i32 2>
+; CHECK-NEXT:    [[BLOCK15:%.*]] = shufflevector <2 x double> [[COL_LOAD]], <2 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP14:%.*]] = extractelement <2 x double> [[COL_LOAD4]], i64 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT16:%.*]] = insertelement <1 x double> poison, double [[TMP14]], i64 0
+; CHECK-NEXT:    [[SPLAT_SPLAT17:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT16]], <1 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP15:%.*]] = fmul contract <1 x double> [[BLOCK15]], [[SPLAT_SPLAT17]]
+; CHECK-NEXT:    [[BLOCK18:%.*]] = shufflevector <2 x double> [[COL_LOAD1]], <2 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP16:%.*]] = extractelement <2 x double> [[COL_LOAD4]], i64 1
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT19:%.*]] = insertelement <1 x double> poison, double [[TMP16]], i64 0
+; CHECK-NEXT:    [[SPLAT_SPLAT20:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT19]], <1 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP17:%.*]] = call contract <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[BLOCK18]], <1 x double> [[SPLAT_SPLAT20]], <1 x double> [[TMP15]])
+; CHECK-NEXT:    [[TMP18:%.*]] = shufflevector <1 x double> [[TMP17]], <1 x double> poison, <2 x i32> <i32 0, i32 poison>
+; CHECK-NEXT:    [[TMP19:%.*]] = shufflevector <2 x double> zeroinitializer, <2 x double> [[TMP18]], <2 x i32> <i32 2, i32 1>
+; CHECK-NEXT:    [[BLOCK21:%.*]] = shufflevector <2 x double> [[TMP19]], <2 x double> poison, <1 x i32> <i32 1>
+; CHECK-NEXT:    [[BLOCK22:%.*]] = shufflevector <2 x double> [[COL_LOAD]], <2 x double> poison, <1 x i32> <i32 1>
+; CHECK-NEXT:    [[TMP20:%.*]] = extractelement <2 x double> [[COL_LOAD4]], i64 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT23:%.*]] = insertelement <1 x double> poison, double [[TMP20]], i64 0
+; CHECK-NEXT:    [[SPLAT_SPLAT24:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT23]], <1 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP21:%.*]] = fmul contract <1 x double> [[BLOCK22]], [[SPLAT_SPLAT24]]
+; CHECK-NEXT:    [[BLOCK25:%.*]] = shufflevector <2 x double> [[COL_LOAD1]], <2 x double> poison, <1 x i32> <i32 1>
+; CHECK-NEXT:    [[TMP22:%.*]] = extractelement <2 x double> [[COL_LOAD4]], i64 1
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT26:%.*]] = insertelement <1 x double> poison, double [[TMP22]], i64 0
+; CHECK-NEXT:    [[SPLAT_SPLAT27:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT26]], <1 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP23:%.*]] = call contract <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[BLOCK25]], <1 x double> [[SPLAT_SPLAT27]], <1 x double> [[TMP21]])
+; CHECK-NEXT:    [[TMP24:%.*]] = shufflevector <1 x double> [[TMP23]], <1 x double> poison, <2 x i32> <i32 0, i32 poison>
+; CHECK-NEXT:    [[TMP25:%.*]] = shufflevector <2 x double> [[TMP19]], <2 x double> [[TMP24]], <2 x i32> <i32 0, i32 2>
+; CHECK-NEXT:    [[TMP26:%.*]] = getelementptr double, ptr [[C1:%.*]], i64 0
+; CHECK-NEXT:    store <2 x double> [[TMP13]], ptr [[TMP26]], align 8
+; CHECK-NEXT:    [[VEC_GEP28:%.*]] = getelementptr double, ptr [[TMP26]], i64 2
+; CHECK-NEXT:    store <2 x double> [[TMP25]], ptr [[VEC_GEP28]], align 8
+; CHECK-NEXT:    ret void
+;
+entry:
+  %B = alloca <4 x double>
+  call void @init(ptr %B)
+  %a = load <4 x double>, ptr %A, align 8
+  %b = load <4 x double>, ptr %B, align 8
+  br i1 %c.0, label %then, label %exit
+
+then:
+  call void @llvm.lifetime.end(i64 -1, ptr %B)
+  br label %exit
+
+exit:
+  %m = call <4 x double> @llvm.matrix.multiply(<4 x double> %a, <4 x double> %b, i32 2, i32 2, i32 2)
+  store <4 x double> %m, ptr %C, align 8
+  ret void
+}
+
+define void @lifetime_for_ptr_first_arg_before_multiply(ptr noalias %A, ptr noalias %B, ptr noalias %C, i1 %c.0) {
+; CHECK-LABEL: @lifetime_for_ptr_first_arg_before_multiply(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 [[C:%.*]], label [[THEN:%.*]], label [[EXIT:%.*]]
+; CHECK:       then:
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 -1, ptr [[A:%.*]])
+; CHECK-NEXT:    br label [[EXIT]]
+; CHECK:       exit:
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr double, ptr [[A]], i64 0
+; CHECK-NEXT:    [[COL_LOAD:%.*]] = load <2 x double>, ptr [[TMP0]], align 8
+; CHECK-NEXT:    [[VEC_GEP:%.*]] = getelementptr double, ptr [[TMP0]], i64 2
+; CHECK-NEXT:    [[COL_LOAD1:%.*]] = load <2 x double>, ptr [[VEC_GEP]], align 8
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr double, ptr [[B:%.*]], i64 0
+; CHECK-NEXT:    [[COL_LOAD2:%.*]] = load <2 x double>, ptr [[TMP1]], align 8
+; CHECK-NEXT:    [[VEC_GEP3:%.*]] = getelementptr double, ptr [[TMP1]], i64 2
+; CHECK-NEXT:    [[COL_LOAD4:%.*]] = load <2 x double>, ptr [[VEC_GEP3]], align 8
+; CHECK-NEXT:    [[BLOCK:%.*]] = shufflevector <2 x double> [[COL_LOAD]], <2 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x double> [[COL_LOAD2]], i64 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x double> poison, double [[TMP2]], i64 0
+; CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT]], <1 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP3:%.*]] = fmul contract <1 x double> [[BLOCK]], [[SPLAT_SPLAT]]
+; CHECK-NEXT:    [[BLOCK5:%.*]] = shufflevector <2 x double> [[COL_LOAD1]], <2 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <2 x double> [[COL_LOAD2]], i64 1
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT6:%.*]] = insertelement <1 x double> poison, double [[TMP4]], i64 0
+; CHECK-NEXT:    [[SPLAT_SPLAT7:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT6]], <1 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP5:%.*]] = call contract <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[BLOCK5]], <1 x double> [[SPLAT_SPLAT7]], <1 x double> [[TMP3]])
+; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <1 x double> [[TMP5]], <1 x double> poison, <2 x i32> <i32 0, i32 poison>
+; CHECK-NEXT:    [[TMP7:%.*]] = shufflevector <2 x double> zeroinitializer, <2 x double> [[TMP6]], <2 x i32> <i32 2, i32 1>
+; CHECK-NEXT:    [[BLOCK8:%.*]] = shufflevector <2 x double> [[TMP7]], <2 x double> poison, <1 x i32> <i32 1>
+; CHECK-NEXT:    [[BLOCK9:%.*]] = shufflevector <2 x double> [[COL_LOAD]], <2 x double> poison, <1 x i32> <i32 1>
+; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <2 x double> [[COL_LOAD2]], i64 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT10:%.*]] = insertelement <1 x double> poison, double [[TMP8]], i64 0
+; CHECK-NEXT:    [[SPLAT_SPLAT11:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT10]], <1 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP9:%.*]] = fmul contract <1 x double> [[BLOCK9]], [[SPLAT_SPLAT11]]
+; CHECK-NEXT:    [[BLOCK12:%.*]] = shufflevector <2 x double> [[COL_LOAD1]], <2 x double> poison, <1 x i32> <i32 1>
+; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <2 x double> [[COL_LOAD2]], i64 1
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT13:%.*]] = insertelement <1 x double> poison, double [[TMP10]], i64 0
+; CHECK-NEXT:    [[SPLAT_SPLAT14:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT13]], <1 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP11:%.*]] = call contract <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[BLOCK12]], <1 x double> [[SPLAT_SPLAT14]], <1 x double> [[TMP9]])
+; CHECK-NEXT:    [[TMP12:%.*]] = shufflevector <1 x double> [[TMP11]], <1 x double> poison, <2 x i32> <i32 0, i32 poison>
+; CHECK-NEXT:    [[TMP13:%.*]] = shufflevector <2 x double> [[TMP7]], <2 x double> [[TMP12]], <2 x i32> <i32 0, i32 2>
+; CHECK-NEXT:    [[BLOCK15:%.*]] = shufflevector <2 x double> [[COL_LOAD]], <2 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP14:%.*]] = extractelement <2 x double> [[COL_LOAD4]], i64 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT16:%.*]] = insertelement <1 x double> poison, double [[TMP14]], i64 0
+; CHECK-NEXT:    [[SPLAT_SPLAT17:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT16]], <1 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP15:%.*]] = fmul contract <1 x double> [[BLOCK15]], [[SPLAT_SPLAT17]]
+; CHECK-NEXT:    [[BLOCK18:%.*]] = shufflevector <2 x double> [[COL_LOAD1]], <2 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP16:%.*]] = extractelement <2 x double> [[COL_LOAD4]], i64 1
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT19:%.*]] = insertelement <1 x double> poison, double [[TMP16]], i64 0
+; CHECK-NEXT:    [[SPLAT_SPLAT20:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT19]], <1 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP17:%.*]] = call contract <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[BLOCK18]], <1 x double> [[SPLAT_SPLAT20]], <1 x double> [[TMP15]])
+; CHECK-NEXT:    [[TMP18:%.*]] = shufflevector <1 x double> [[TMP17]], <1 x double> poison, <2 x i32> <i32 0, i32 poison>
+; CHECK-NEXT:    [[TMP19:%.*]] = shufflevector <2 x double> zeroinitializer, <2 x double> [[TMP18]], <2 x i32> <i32 2, i32 1>
+; CHECK-NEXT:    [[BLOCK21:%.*]] = shufflevector <2 x double> [[TMP19]], <2 x double> poison, <1 x i32> <i32 1>
+; CHECK-NEXT:    [[BLOCK22:%.*]] = shufflevector <2 x double> [[COL_LOAD]], <2 x double> poison, <1 x i32> <i32 1>
+; CHECK-NEXT:    [[TMP20:%.*]] = extractelement <2 x double> [[COL_LOAD4]], i64 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT23:%.*]] = insertelement <1 x double> poison, double [[TMP20]], i64 0
+; CHECK-NEXT:    [[SPLAT_SPLAT24:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT23]], <1 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP21:%.*]] = fmul contract <1 x double> [[BLOCK22]], [[SPLAT_SPLAT24]]
+; CHECK-NEXT:    [[BLOCK25:%.*]] = shufflevector <2 x double> [[COL_LOAD1]], <2 x double> poison, <1 x i32> <i32 1>
+; CHECK-NEXT:    [[TMP22:%.*]] = extractelement <2 x double> [[COL_LOAD4]], i64 1
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT26:%.*]] = insertelement <1 x double> poison, double [[TMP22]], i64 0
+; CHECK-NEXT:    [[SPLAT_SPLAT27:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT26]], <1 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP23:%.*]] = call contract <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[BLOCK25]], <1 x double> [[SPLAT_SPLAT27]], <1 x double> [[TMP21]])
+; CHECK-NEXT:    [[TMP24:%.*]] = shufflevector <1 x double> [[TMP23]], <1 x double> poison, <2 x i32> <i32 0, i32 poison>
+; CHECK-NEXT:    [[TMP25:%.*]] = shufflevector <2 x double> [[TMP19]], <2 x double> [[TMP24]], <2 x i32> <i32 0, i32 2>
+; CHECK-NEXT:    [[TMP26:%.*]] = getelementptr double, ptr [[C1:%.*]], i64 0
+; CHECK-NEXT:    store <2 x double> [[TMP13]], ptr [[TMP26]], align 8
+; CHECK-NEXT:    [[VEC_GEP28:%.*]] = getelementptr double, ptr [[TMP26]], i64 2
+; CHECK-NEXT:    store <2 x double> [[TMP25]], ptr [[VEC_GEP28]], align 8
+; CHECK-NEXT:    ret void
+;
+entry:
+  %a = load <4 x double>, ptr %A, align 8
+  %b = load <4 x double>, ptr %B, align 8
+  br i1 %c.0, label %then, label %exit
+
+then:
+  call void @llvm.lifetime.end(i64 -1, ptr %A)
+  br label %exit
+
+exit:
+  %m = call <4 x double> @llvm.matrix.multiply(<4 x double> %a, <4 x double> %b, i32 2, i32 2, i32 2)
+  store <4 x double> %m, ptr %C, align 8
+  ret void
+}
+
+define void @lifetime_for_both_ptr_args_before_multiply(ptr noalias %A, ptr noalias %B, ptr noalias %C, i1 %c.0) {
+; CHECK-LABEL: @lifetime_for_both_ptr_args_before_multiply(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 [[C:%.*]], label [[THEN:%.*]], label [[EXIT:%.*]]
+; CHECK:       then:
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 -1, ptr [[B:%.*]])
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 -1, ptr [[A:%.*]])
+; CHECK-NEXT:    br label [[EXIT]]
+; CHECK:       exit:
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr double, ptr [[A]], i64 0
+; CHECK-NEXT:    [[COL_LOAD:%.*]] = load <2 x double>, ptr [[TMP0]], align 8
+; CHECK-NEXT:    [[VEC_GEP:%.*]] = getelementptr double, ptr [[TMP0]], i64 2
+; CHECK-NEXT:    [[COL_LOAD1:%.*]] = load <2 x double>, ptr [[VEC_GEP]], align 8
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr double, ptr [[B]], i64 0
+; CHECK-NEXT:    [[COL_LOAD2:%.*]] = load <2 x double>, ptr [[TMP1]], align 8
+; CHECK-NEXT:    [[VEC_GEP3:%.*]] = getelementptr double, ptr [[TMP1]], i64 2
+; CHECK-NEXT:    [[COL_LOAD4:%.*]] = load <2 x double>, ptr [[VEC_GEP3]], align 8
+; CHECK-NEXT:    [[BLOCK:%.*]] = shufflevector <2 x double> [[COL_LOAD]], <2 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x double> [[COL_LOAD2]], i64 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x double> poison, double [[TMP2]], i64 0
+; CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT]], <1 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP3:%.*]] = fmul contract <1 x double> [[BLOCK]], [[SPLAT_SPLAT]]
+; CHECK-NEXT:    [[BLOCK5:%.*]] = shufflevector <2 x double> [[COL_LOAD1]], <2 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <2 x double> [[COL_LOAD2]], i64 1
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT6:%.*]] = insertelement <1 x double> poison, double [[TMP4]], i64 0
+; CHECK-NEXT:    [[SPLAT_SPLAT7:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT6]], <1 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP5:%.*]] = call contract <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[BLOCK5]], <1 x double> [[SPLAT_SPLAT7]], <1 x double> [[TMP3]])
+; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <1 x double> [[TMP5]], <1 x double> poison, <2 x i32> <i32 0, i32 poison>
+; CHECK-NEXT:    [[TMP7:%.*]] = shufflevector <2 x double> zeroinitializer, <2 x double> [[TMP6]], <2 x i32> <i32 2, i32 1>
+; CHECK-NEXT:    [[BLOCK8:%.*]] = shufflevector <2 x double> [[TMP7]], <2 x double> poison, <1 x i32> <i32 1>
+; CHECK-NEXT:    [[BLOCK9:%.*]] = shufflevector <2 x double> [[COL_LOAD]], <2 x double> poison, <1 x i32> <i32 1>
+; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <2 x double> [[COL_LOAD2]], i64 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT10:%.*]] = insertelement <1 x double> poison, double [[TMP8]], i64 0
+; CHECK-NEXT:    [[SPLAT_SPLAT11:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT10]], <1 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP9:%.*]] = fmul contract <1 x double> [[BLOCK9]], [[SPLAT_SPLAT11]]
+; CHECK-NEXT:    [[BLOCK12:%.*]] = shufflevector <2 x double> [[COL_LOAD1]], <2 x double> poison, <1 x i32> <i32 1>
+; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <2 x double> [[COL_LOAD2]], i64 1
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT13:%.*]] = insertelement <1 x double> poison, double [[TMP10]], i64 0
+; CHECK-NEXT:    [[SPLAT_SPLAT14:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT13]], <1 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP11:%.*]] = call contract <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[BLOCK12]], <1 x double> [[SPLAT_SPLAT14]], <1 x double> [[TMP9]])
+; CHECK-NEXT:    [[TMP12:%.*]] = shufflevector <1 x double> [[TMP11]], <1 x double> poison, <2 x i32> <i32 0, i32 poison>
+; CHECK-NEXT:    [[TMP13:%.*]] = shufflevector <2 x double> [[TMP7]], <2 x double> [[TMP12]], <2 x i32> <i32 0, i32 2>
+; CHECK-NEXT:    [[BLOCK15:%.*]] = shufflevector <2 x double> [[COL_LOAD]], <2 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP14:%.*]] = extractelement <2 x double> [[COL_LOAD4]], i64 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT16:%.*]] = insertelement <1 x double> poison, double [[TMP14]], i64 0
+; CHECK-NEXT:    [[SPLAT_SPLAT17:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT16]], <1 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP15:%.*]] = fmul contract <1 x double> [[BLOCK15]], [[SPLAT_SPLAT17]]
+; CHECK-NEXT:    [[BLOCK18:%.*]] = shufflevector <2 x double> [[COL_LOAD1]], <2 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP16:%.*]] = extractelement <2 x double> [[COL_LOAD4]], i64 1
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT19:%.*]] = insertelement <1 x double> poison, double [[TMP16]], i64 0
+; CHECK-NEXT:    [[SPLAT_SPLAT20:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT19]], <1 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP17:%.*]] = call contract <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[BLOCK18]], <1 x double> [[SPLAT_SPLAT20]], <1 x double> [[TMP15]])
+; CHECK-NEXT:    [[TMP18:%.*]] = shufflevector <1 x double> [[TMP17]], <1 x double> poison, <2 x i32> <i32 0, i32 poison>
+; CHECK-NEXT:    [[TMP19:%.*]] = shufflevector <2 x double> zeroinitializer, <2 x double> [[TMP18]], <2 x i32> <i32 2, i32 1>
+; CHECK-NEXT:    [[BLOCK21:%.*]] = shufflevector <2 x double> [[TMP19]], <2 x double> poison, <1 x i32> <i32 1>
+; CHECK-NEXT:    [[BLOCK22:%.*]] = shufflevector <2 x double> [[COL_LOAD]], <2 x double> poison, <1 x i32> <i32 1>
+; CHECK-NEXT:    [[TMP20:%.*]] = extractelement <2 x double> [[COL_LOAD4]], i64 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT23:%.*]] = insertelement <1 x double> poison, double [[TMP20]], i64 0
+; CHECK-NEXT:    [[SPLAT_SPLAT24:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT23]], <1 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP21:%.*]] = fmul contract <1 x double> [[BLOCK22]], [[SPLAT_SPLAT24]]
+; CHECK-NEXT:    [[BLOCK25:%.*]] = shufflevector <2 x double> [[COL_LOAD1]], <2 x double> poison, <1 x i32> <i32 1>
+; CHECK-NEXT:    [[TMP22:%.*]] = extractelement <2 x double> [[COL_LOAD4]], i64 1
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT26:%.*]] = insertelement <1 x double> poison, double [[TMP22]], i64 0
+; CHECK-NEXT:    [[SPLAT_SPLAT27:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT26]], <1 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP23:%.*]] = call contract <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[BLOCK25]], <1 x double> [[SPLAT_SPLAT27]], <1 x double> [[TMP21]])
+; CHECK-NEXT:    [[TMP24:%.*]] = shufflevector <1 x double> [[TMP23]], <1 x double> poison, <2 x i32> <i32 0, i32 poison>
+; CHECK-NEXT:    [[TMP25:%.*]] = shufflevector <2 x double> [[TMP19]], <2 x double> [[TMP24]], <2 x i32> <i32 0, i32 2>
+; CHECK-NEXT:    [[TMP26:%.*]] = getelementptr double, ptr [[C1:%.*]], i64 0
+; CHECK-NEXT:    store <2 x double> [[TMP13]], ptr [[TMP26]], align 8
+; CHECK-NEXT:    [[VEC_GEP28:%.*]] = getelementptr double, ptr [[TMP26]], i64 2
+; CHECK-NEXT:    store <2 x double> [[TMP25]], ptr [[VEC_GEP28]], align 8
+; CHECK-NEXT:    ret void
+;
+entry:
+  %a = load <4 x double>, ptr %A, align 8
+  %b = load <4 x double>, ptr %B, align 8
+  br i1 %c.0, label %then, label %exit
+
+then:
+  call void @llvm.lifetime.end(i64 -1, ptr %B)
+  call void @llvm.lifetime.end(i64 -1, ptr %A)
+  br label %exit
+
+exit:
+  %m = call <4 x double> @llvm.matrix.multiply(<4 x double> %a, <4 x double> %b, i32 2, i32 2, i32 2)
+  store <4 x double> %m, ptr %C, align 8
+  ret void
+}
+
+define void @lifetime_for_ptr_select_before_multiply(ptr noalias %A, ptr noalias %B, ptr noalias %C, i1 %c.0, i1 %c.1) {
+; CHECK-LABEL: @lifetime_for_ptr_select_before_multiply(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[P:%.*]] = select i1 [[C_0:%.*]], ptr [[A:%.*]], ptr [[B:%.*]]
+; CHECK-NEXT:    br i1 [[C_1:%.*]], label [[THEN:%.*]], label [[EXIT:%.*]]
+; CHECK:       then:
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 -1, ptr [[P]])
+; CHECK-NEXT:    br label [[EXIT]]
+; CHECK:       exit:
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr double, ptr [[P]], i64 0
+; CHECK-NEXT:    [[COL_LOAD:%.*]] = load <2 x double>, ptr [[TMP0]], align 8
+; CHECK-NEXT:    [[VEC_GEP:%.*]] = getelementptr double, ptr [[TMP0]], i64 2
+; CHECK-NEXT:    [[COL_LOAD1:%.*]] = load <2 x double>, ptr [[VEC_GEP]], align 8
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr double, ptr [[B]], i64 0
+; CHECK-NEXT:    [[COL_LOAD2:%.*]] = load <2 x double>, ptr [[TMP1]], align 8
+; CHECK-NEXT:    [[VEC_GEP3:%.*]] = getelementptr double, ptr [[TMP1]], i64 2
+; CHECK-NEXT:    [[COL_LOAD4:%.*]] = load <2 x double>, ptr [[VEC_GEP3]], align 8
+; CHECK-NEXT:    [[BLOCK:%.*]] = shufflevector <2 x double> [[COL_LOAD]], <2 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x double> [[COL_LOAD2]], i64 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x double> poison, double [[TMP2]], i64 0
+; CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT]], <1 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP3:%.*]] = fmul contract <1 x double> [[BLOCK]], [[SPLAT_SPLAT]]
+; CHECK-NEXT:    [[BLOCK5:%.*]] = shufflevector <2 x double> [[COL_LOAD1]], <2 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <2 x double> [[COL_LOAD2]], i64 1
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT6:%.*]] = insertelement <1 x double> poison, double [[TMP4]], i64 0
+; CHECK-NEXT:    [[SPLAT_SPLAT7:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT6]], <1 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP5:%.*]] = call contract <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[BLOCK5]], <1 x double> [[SPLAT_SPLAT7]], <1 x double> [[TMP3]])
+; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <1 x double> [[TMP5]], <1 x double> poison, <2 x i32> <i32 0, i32 poison>
+; CHECK-NEXT:    [[TMP7:%.*]] = shufflevector <2 x double> zeroinitializer, <2 x double> [[TMP6]], <2 x i32> <i32 2, i32 1>
+; CHECK-NEXT:    [[BLOCK8:%.*]] = shufflevector <2 x double> [[TMP7]], <2 x double> poison, <1 x i32> <i32 1>
+; CHECK-NEXT:    [[BLOCK9:%.*]] = shufflevector <2 x double> [[COL_LOAD]], <2 x double> poison, <1 x i32> <i32 1>
+; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <2 x double> [[COL_LOAD2]], i64 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT10:%.*]] = insertelement <1 x double> poison, double [[TMP8]], i64 0
+; CHECK-NEXT:    [[SPLAT_SPLAT11:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT10]], <1 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP9:%.*]] = fmul contract <1 x double> [[BLOCK9]], [[SPLAT_SPLAT11]]
+; CHECK-NEXT:    [[BLOCK12:%.*]] = shufflevector <2 x double> [[COL_LOAD1]], <2 x double> poison, <1 x i32> <i32 1>
+; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <2 x double> [[COL_LOAD2]], i64 1
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT13:%.*]] = insertelement <1 x double> poison, double [[TMP10]], i64 0
+; CHECK-NEXT:    [[SPLAT_SPLAT14:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT13]], <1 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP11:%.*]] = call contract <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[BLOCK12]], <1 x double> [[SPLAT_SPLAT14]], <1 x double> [[TMP9]])
+; CHECK-NEXT:    [[TMP12:%.*]] = shufflevector <1 x double> [[TMP11]], <1 x double> poison, <2 x i32> <i32 0, i32 poison>
+; CHECK-NEXT:    [[TMP13:%.*]] = shufflevector <2 x double> [[TMP7]], <2 x double> [[TMP12]], <2 x i32> <i32 0, i32 2>
+; CHECK-NEXT:    [[BLOCK15:%.*]] = shufflevector <2 x double> [[COL_LOAD]], <2 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP14:%.*]] = extractelement <2 x double> [[COL_LOAD4]], i64 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT16:%.*]] = insertelement <1 x double> poison, double [[TMP14]], i64 0
+; CHECK-NEXT:    [[SPLAT_SPLAT17:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT16]], <1 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP15:%.*]] = fmul contract <1 x double> [[BLOCK15]], [[SPLAT_SPLAT17]]
+; CHECK-NEXT:    [[BLOCK18:%.*]] = shufflevector <2 x double> [[COL_LOAD1]], <2 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP16:%.*]] = extractelement <2 x double> [[COL_LOAD4]], i64 1
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT19:%.*]] = insertelement <1 x double> poison, double [[TMP16]], i64 0
+; CHECK-NEXT:    [[SPLAT_SPLAT20:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT19]], <1 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP17:%.*]] = call contract <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[BLOCK18]], <1 x double> [[SPLAT_SPLAT20]], <1 x double> [[TMP15]])
+; CHECK-NEXT:    [[TMP18:%.*]] = shufflevector <1 x double> [[TMP17]], <1 x double> poison, <2 x i32> <i32 0, i32 poison>
+; CHECK-NEXT:    [[TMP19:%.*]] = shufflevector <2 x double> zeroinitializer, <2 x double> [[TMP18]], <2 x i32> <i32 2, i32 1>
+; CHECK-NEXT:    [[BLOCK21:%.*]] = shufflevector <2 x double> [[TMP19]], <2 x double> poison, <1 x i32> <i32 1>
+; CHECK-NEXT:    [[BLOCK22:%.*]] = shufflevector <2 x double> [[COL_LOAD]], <2 x double> poison, <1 x i32> <i32 1>
+; CHECK-NEXT:    [[TMP20:%.*]] = extractelement <2 x double> [[COL_LOAD4]], i64 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT23:%.*]] = insertelement <1 x double> poison, double [[TMP20]], i64 0
+; CHECK-NEXT:    [[SPLAT_SPLAT24:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT23]], <1 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP21:%.*]] = fmul contract <1 x double> [[BLOCK22]], [[SPLAT_SPLAT24]]
+; CHECK-NEXT:    [[BLOCK25:%.*]] = shufflevector <2 x double> [[COL_LOAD1]], <2 x double> poison, <1 x i32> <i32 1>
+; CHECK-NEXT:    [[TMP22:%.*]] = extractelement <2 x double> [[COL_LOAD4]], i64 1
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT26:%.*]] = insertelement <1 x double> poison, double [[TMP22]], i64 0
+; CHECK-NEXT:    [[SPLAT_SPLAT27:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT26]], <1 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP23:%.*]] = call contract <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[BLOCK25]], <1 x double> [[SPLAT_SPLAT27]], <1 x double> [[TMP21]])
+; CHECK-NEXT:    [[TMP24:%.*]] = shufflevector <1 x double> [[TMP23]], <1 x double> poison, <2 x i32> <i32 0, i32 poison>
+; CHECK-NEXT:    [[TMP25:%.*]] = shufflevector <2 x double> [[TMP19]], <2 x double> [[TMP24]], <2 x i32> <i32 0, i32 2>
+; CHECK-NEXT:    [[TMP26:%.*]] = getelementptr double, ptr [[C:%.*]], i64 0
+; CHECK-NEXT:    store <2 x double> [[TMP13]], ptr [[TMP26]], align 8
+; CHECK-NEXT:    [[VEC_GEP28:%.*]] = getelementptr double, ptr [[TMP26]], i64 2
+; CHECK-NEXT:    store <2 x double> [[TMP25]], ptr [[VEC_GEP28]], align 8
+; CHECK-NEXT:    ret void
+;
+entry:
+  %P = select i1 %c.0, ptr %A, ptr %B
+  %a = load <4 x double>, ptr %P, align 8
+  %b = load <4 x double>, ptr %B, align 8
+  br i1 %c.1, label %then, label %exit
+
+then:
+  call void @llvm.lifetime.end(i64 -1, ptr %P)
+  br label %exit
+
+exit:
+  %m = call <4 x double> @llvm.matrix.multiply(<4 x double> %a, <4 x double> %b, i32 2, i32 2, i32 2)
+  store <4 x double> %m, ptr %C, align 8
+  ret void
+}
+
+declare void @init(ptr)
+declare void @llvm.lifetime.end(i64, ptr)
+
+declare <4 x double> @llvm.matrix.multiply(<4 x double>, <4 x double>, i32, i32, i32)
-- 
cgit v1.2.3


From ffd31c5e92da9da37cf57ca653e22db38b5af9a3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Stefan=20Gr=C3=A4nitz?= <stefan.graenitz@gmail.com>
Date: Tue, 12 Mar 2024 14:47:09 +0100
Subject: Fix build after #84460: link LLVMTestingSupport explicitly in clang
 unittest

This is supposed to fix the DYLIB-enabled build, i.e. https://lab.llvm.org/buildbot/#/builders/196
---
 clang/unittests/Interpreter/CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang/unittests/Interpreter/CMakeLists.txt b/clang/unittests/Interpreter/CMakeLists.txt
index 498070b43d92..b56e1e21015d 100644
--- a/clang/unittests/Interpreter/CMakeLists.txt
+++ b/clang/unittests/Interpreter/CMakeLists.txt
@@ -4,7 +4,6 @@ set(LLVM_LINK_COMPONENTS
   OrcJIT
   Support
   TargetParser
-  TestingSupport
   )
 
 add_clang_unittest(ClangReplInterpreterTests
@@ -20,6 +19,7 @@ target_link_libraries(ClangReplInterpreterTests PUBLIC
   clangInterpreter
   clangFrontend
   clangSema
+  LLVMTestingSupport
   )
 
 # Exceptions on Windows are not yet supported.
-- 
cgit v1.2.3


From 9f7ed36f92c304050d401f00b013186de15130e8 Mon Sep 17 00:00:00 2001
From: Jonas Paulsson <paulson1@linux.ibm.com>
Date: Tue, 12 Mar 2024 09:53:11 -0400
Subject: Don't do casting of atomic FP loads/stores in FE. (#83446)

The casting of FP atomic loads and stores were always done by the
front-end, even though the AtomicExpandPass will do it if the target
requests it (which is the default).

This patch removes this casting in the front-end entirely.
---
 clang/lib/CodeGen/CGAtomic.cpp                    |  96 ++++++++-----
 clang/test/CodeGen/SystemZ/atomic_fp_load_store.c | 164 ++++++++++++++++++++++
 clang/test/CodeGen/atomic.c                       |   3 +-
 clang/test/CodeGen/c11atomics-ios.c               |   8 +-
 clang/test/OpenMP/atomic_read_codegen.c           |  10 +-
 clang/test/OpenMP/atomic_write_codegen.c          |  13 +-
 6 files changed, 236 insertions(+), 58 deletions(-)
 create mode 100644 clang/test/CodeGen/SystemZ/atomic_fp_load_store.c

diff --git a/clang/lib/CodeGen/CGAtomic.cpp b/clang/lib/CodeGen/CGAtomic.cpp
index a8d846b4f6a5..fb03d013e8af 100644
--- a/clang/lib/CodeGen/CGAtomic.cpp
+++ b/clang/lib/CodeGen/CGAtomic.cpp
@@ -194,12 +194,14 @@ namespace {
     RValue convertAtomicTempToRValue(Address addr, AggValueSlot resultSlot,
                                      SourceLocation loc, bool AsValue) const;
 
-    /// Converts a rvalue to integer value.
-    llvm::Value *convertRValueToInt(RValue RVal) const;
+    llvm::Value *getScalarRValValueOrNull(RValue RVal) const;
 
-    RValue ConvertIntToValueOrAtomic(llvm::Value *IntVal,
-                                     AggValueSlot ResultSlot,
-                                     SourceLocation Loc, bool AsValue) const;
+    /// Converts an rvalue to integer value if needed.
+    llvm::Value *convertRValueToInt(RValue RVal, bool CastFP = true) const;
+
+    RValue ConvertToValueOrAtomic(llvm::Value *IntVal, AggValueSlot ResultSlot,
+                                  SourceLocation Loc, bool AsValue,
+                                  bool CastFP = true) const;
 
     /// Copy an atomic r-value into atomic-layout memory.
     void emitCopyIntoMemory(RValue rvalue) const;
@@ -261,7 +263,8 @@ namespace {
     void EmitAtomicLoadLibcall(llvm::Value *AddForLoaded,
                                llvm::AtomicOrdering AO, bool IsVolatile);
     /// Emits atomic load as LLVM instruction.
-    llvm::Value *EmitAtomicLoadOp(llvm::AtomicOrdering AO, bool IsVolatile);
+    llvm::Value *EmitAtomicLoadOp(llvm::AtomicOrdering AO, bool IsVolatile,
+                                  bool CastFP = true);
     /// Emits atomic compare-and-exchange op as a libcall.
     llvm::Value *EmitAtomicCompareExchangeLibcall(
         llvm::Value *ExpectedAddr, llvm::Value *DesiredAddr,
@@ -1396,12 +1399,13 @@ RValue AtomicInfo::convertAtomicTempToRValue(Address addr,
       LVal.getBaseInfo(), TBAAAccessInfo()));
 }
 
-RValue AtomicInfo::ConvertIntToValueOrAtomic(llvm::Value *IntVal,
-                                             AggValueSlot ResultSlot,
-                                             SourceLocation Loc,
-                                             bool AsValue) const {
+RValue AtomicInfo::ConvertToValueOrAtomic(llvm::Value *Val,
+                                          AggValueSlot ResultSlot,
+                                          SourceLocation Loc, bool AsValue,
+                                          bool CastFP) const {
   // Try not to in some easy cases.
-  assert(IntVal->getType()->isIntegerTy() && "Expected integer value");
+  assert((Val->getType()->isIntegerTy() || Val->getType()->isIEEELikeFPTy()) &&
+         "Expected integer or floating point value");
   if (getEvaluationKind() == TEK_Scalar &&
       (((!LVal.isBitField() ||
          LVal.getBitFieldInfo().Size == ValueSizeInBits) &&
@@ -1410,13 +1414,14 @@ RValue AtomicInfo::ConvertIntToValueOrAtomic(llvm::Value *IntVal,
     auto *ValTy = AsValue
                       ? CGF.ConvertTypeForMem(ValueTy)
                       : getAtomicAddress().getElementType();
-    if (ValTy->isIntegerTy()) {
-      assert(IntVal->getType() == ValTy && "Different integer types.");
-      return RValue::get(CGF.EmitFromMemory(IntVal, ValueTy));
+    if (ValTy->isIntegerTy() || (!CastFP && ValTy->isIEEELikeFPTy())) {
+      assert((!ValTy->isIntegerTy() || Val->getType() == ValTy) &&
+             "Different integer types.");
+      return RValue::get(CGF.EmitFromMemory(Val, ValueTy));
     } else if (ValTy->isPointerTy())
-      return RValue::get(CGF.Builder.CreateIntToPtr(IntVal, ValTy));
-    else if (llvm::CastInst::isBitCastable(IntVal->getType(), ValTy))
-      return RValue::get(CGF.Builder.CreateBitCast(IntVal, ValTy));
+      return RValue::get(CGF.Builder.CreateIntToPtr(Val, ValTy));
+    else if (llvm::CastInst::isBitCastable(Val->getType(), ValTy))
+      return RValue::get(CGF.Builder.CreateBitCast(Val, ValTy));
   }
 
   // Create a temporary.  This needs to be big enough to hold the
@@ -1433,8 +1438,7 @@ RValue AtomicInfo::ConvertIntToValueOrAtomic(llvm::Value *IntVal,
 
   // Slam the integer into the temporary.
   Address CastTemp = castToAtomicIntPointer(Temp);
-  CGF.Builder.CreateStore(IntVal, CastTemp)
-      ->setVolatile(TempIsVolatile);
+  CGF.Builder.CreateStore(Val, CastTemp)->setVolatile(TempIsVolatile);
 
   return convertAtomicTempToRValue(Temp, ResultSlot, Loc, AsValue);
 }
@@ -1453,9 +1457,11 @@ void AtomicInfo::EmitAtomicLoadLibcall(llvm::Value *AddForLoaded,
 }
 
 llvm::Value *AtomicInfo::EmitAtomicLoadOp(llvm::AtomicOrdering AO,
-                                          bool IsVolatile) {
+                                          bool IsVolatile, bool CastFP) {
   // Okay, we're doing this natively.
-  Address Addr = getAtomicAddressAsAtomicIntPointer();
+  Address Addr = getAtomicAddress();
+  if (!(Addr.getElementType()->isIEEELikeFPTy() && !CastFP))
+    Addr = castToAtomicIntPointer(Addr);
   llvm::LoadInst *Load = CGF.Builder.CreateLoad(Addr, "atomic-load");
   Load->setAtomic(AO);
 
@@ -1515,7 +1521,7 @@ RValue AtomicInfo::EmitAtomicLoad(AggValueSlot ResultSlot, SourceLocation Loc,
   }
 
   // Okay, we're doing this natively.
-  auto *Load = EmitAtomicLoadOp(AO, IsVolatile);
+  auto *Load = EmitAtomicLoadOp(AO, IsVolatile, /*CastFP=*/false);
 
   // If we're ignoring an aggregate return, don't do anything.
   if (getEvaluationKind() == TEK_Aggregate && ResultSlot.isIgnored())
@@ -1523,7 +1529,8 @@ RValue AtomicInfo::EmitAtomicLoad(AggValueSlot ResultSlot, SourceLocation Loc,
 
   // Okay, turn that back into the original value or atomic (for non-simple
   // lvalues) type.
-  return ConvertIntToValueOrAtomic(Load, ResultSlot, Loc, AsValue);
+  return ConvertToValueOrAtomic(Load, ResultSlot, Loc, AsValue,
+                                /*CastFP=*/false);
 }
 
 /// Emit a load from an l-value of atomic type.  Note that the r-value
@@ -1586,12 +1593,18 @@ Address AtomicInfo::materializeRValue(RValue rvalue) const {
   return TempLV.getAddress(CGF);
 }
 
-llvm::Value *AtomicInfo::convertRValueToInt(RValue RVal) const {
+llvm::Value *AtomicInfo::getScalarRValValueOrNull(RValue RVal) const {
+  if (RVal.isScalar() && (!hasPadding() || !LVal.isSimple()))
+    return RVal.getScalarVal();
+  return nullptr;
+}
+
+llvm::Value *AtomicInfo::convertRValueToInt(RValue RVal, bool CastFP) const {
   // If we've got a scalar value of the right size, try to avoid going
-  // through memory.
-  if (RVal.isScalar() && (!hasPadding() || !LVal.isSimple())) {
-    llvm::Value *Value = RVal.getScalarVal();
-    if (isa<llvm::IntegerType>(Value->getType()))
+  // through memory. Floats get casted if needed by AtomicExpandPass.
+  if (llvm::Value *Value = getScalarRValValueOrNull(RVal)) {
+    if (isa<llvm::IntegerType>(Value->getType()) ||
+        (!CastFP && Value->getType()->isIEEELikeFPTy()))
       return CGF.EmitToMemory(Value, ValueTy);
     else {
       llvm::IntegerType *InputIntTy = llvm::IntegerType::get(
@@ -1677,8 +1690,8 @@ std::pair<RValue, llvm::Value *> AtomicInfo::EmitAtomicCompareExchange(
   auto Res = EmitAtomicCompareExchangeOp(ExpectedVal, DesiredVal, Success,
                                          Failure, IsWeak);
   return std::make_pair(
-      ConvertIntToValueOrAtomic(Res.first, AggValueSlot::ignored(),
-                                SourceLocation(), /*AsValue=*/false),
+      ConvertToValueOrAtomic(Res.first, AggValueSlot::ignored(),
+                             SourceLocation(), /*AsValue=*/false),
       Res.second);
 }
 
@@ -1787,8 +1800,8 @@ void AtomicInfo::EmitAtomicUpdateOp(
       requiresMemSetZero(getAtomicAddress().getElementType())) {
     CGF.Builder.CreateStore(PHI, NewAtomicIntAddr);
   }
-  auto OldRVal = ConvertIntToValueOrAtomic(PHI, AggValueSlot::ignored(),
-                                           SourceLocation(), /*AsValue=*/false);
+  auto OldRVal = ConvertToValueOrAtomic(PHI, AggValueSlot::ignored(),
+                                        SourceLocation(), /*AsValue=*/false);
   EmitAtomicUpdateValue(CGF, *this, OldRVal, UpdateOp, NewAtomicAddr);
   auto *DesiredVal = CGF.Builder.CreateLoad(NewAtomicIntAddr);
   // Try to write new value using cmpxchg operation.
@@ -1953,13 +1966,22 @@ void CodeGenFunction::EmitAtomicStore(RValue rvalue, LValue dest,
     }
 
     // Okay, we're doing this natively.
-    llvm::Value *intValue = atomics.convertRValueToInt(rvalue);
+    llvm::Value *ValToStore =
+        atomics.convertRValueToInt(rvalue, /*CastFP=*/false);
 
     // Do the atomic store.
-    Address addr = atomics.castToAtomicIntPointer(atomics.getAtomicAddress());
-    intValue = Builder.CreateIntCast(
-        intValue, addr.getElementType(), /*isSigned=*/false);
-    llvm::StoreInst *store = Builder.CreateStore(intValue, addr);
+    Address Addr = atomics.getAtomicAddress();
+    bool ShouldCastToInt = true;
+    if (llvm::Value *Value = atomics.getScalarRValValueOrNull(rvalue))
+      if (isa<llvm::IntegerType>(Value->getType()) ||
+          Value->getType()->isIEEELikeFPTy())
+        ShouldCastToInt = false;
+    if (ShouldCastToInt) {
+      Addr = atomics.castToAtomicIntPointer(Addr);
+      ValToStore = Builder.CreateIntCast(ValToStore, Addr.getElementType(),
+                                         /*isSigned=*/false);
+    }
+    llvm::StoreInst *store = Builder.CreateStore(ValToStore, Addr);
 
     if (AO == llvm::AtomicOrdering::Acquire)
       AO = llvm::AtomicOrdering::Monotonic;
diff --git a/clang/test/CodeGen/SystemZ/atomic_fp_load_store.c b/clang/test/CodeGen/SystemZ/atomic_fp_load_store.c
new file mode 100644
index 000000000000..8a4383e92a1e
--- /dev/null
+++ b/clang/test/CodeGen/SystemZ/atomic_fp_load_store.c
@@ -0,0 +1,164 @@
+// RUN: %clang_cc1 -triple s390x-linux-gnu -O1 -emit-llvm %s -o - | FileCheck %s
+//
+// Test that floating point atomic stores and loads do not get casted to/from
+// integer.
+
+#include <stdatomic.h>
+
+_Atomic float Af;
+_Atomic double Ad;
+_Atomic long double Ald;
+
+//// Atomic stores of floating point values.
+void fun0(float Arg) {
+// CHECK-LABEL: @fun0
+// CHECK:       store atomic float %Arg, ptr @Af seq_cst, align 4
+  Af = Arg;
+}
+
+void fun1(double Arg) {
+// CHECK-LABEL: @fun1
+// CHECK:       store atomic double %Arg, ptr @Ad seq_cst, align 8
+  Ad = Arg;
+}
+
+void fun2(long double Arg) {
+// CHECK-LABEL: @fun2
+// CHECK:       store atomic fp128 %Arg, ptr @Ald seq_cst, align 16
+  Ald = Arg;
+}
+
+void fun3(_Atomic float *Dst, float Arg) {
+// CHECK-LABEL: @fun
+// CHECK:       store atomic float %Arg, ptr %Dst seq_cst, align 4
+  *Dst = Arg;
+}
+
+void fun4(_Atomic double *Dst, double Arg) {
+// CHECK-LABEL: @fun4
+// CHECK:       store atomic double %Arg, ptr %Dst seq_cst, align 8
+  *Dst = Arg;
+}
+
+void fun5(_Atomic long double *Dst, long double Arg) {
+// CHECK-LABEL: @fun5
+// CHECK:       store atomic fp128 %Arg, ptr %Dst seq_cst, align 16
+  *Dst = Arg;
+}
+
+//// Atomic loads of floating point values.
+float fun6() {
+// CHECK-LABEL: @fun6
+// CHECK:       %atomic-load = load atomic float, ptr @Af seq_cst, align 4
+  return Af;
+}
+
+float fun7() {
+// CHECK-LABEL: @fun7
+// CHECK:       %atomic-load = load atomic double, ptr @Ad seq_cst, align 8
+  return Ad;
+}
+
+float fun8() {
+// CHECK-LABEL: @fun8
+// CHECK:       %atomic-load = load atomic fp128, ptr @Ald seq_cst, align 16
+  return Ald;
+}
+
+float fun9(_Atomic float *Src) {
+// CHECK-LABEL: @fun9
+// CHECK:       %atomic-load = load atomic float, ptr %Src seq_cst, align 4
+  return *Src;
+}
+
+double fun10(_Atomic double *Src) {
+// CHECK-LABEL: @fun10
+// CHECK:       %atomic-load = load atomic double, ptr %Src seq_cst, align 8
+  return *Src;
+}
+
+long double fun11(_Atomic long double *Src) {
+// CHECK-LABEL: @fun11
+// CHECK:       %atomic-load = load atomic fp128, ptr %Src seq_cst, align 16
+  return *Src;
+}
+
+//// Same, but with 'volatile' as well:
+
+_Atomic volatile float Af_vol;
+_Atomic volatile double Ad_vol;
+_Atomic volatile long double Ald_vol;
+
+//// Atomic volatile stores of floating point values.
+void fun0_vol(float Arg) {
+// CHECK-LABEL: @fun0_vol
+// CHECK:       store atomic volatile float %Arg, ptr @Af_vol seq_cst, align 4
+  Af_vol = Arg;
+}
+
+void fun1_vol(double Arg) {
+// CHECK-LABEL: @fun1_vol
+// CHECK:       store atomic volatile double %Arg, ptr @Ad_vol seq_cst, align 8
+  Ad_vol = Arg;
+}
+
+void fun2_vol(long double Arg) {
+// CHECK-LABEL: @fun2_vol
+// CHECK:       store atomic volatile fp128 %Arg, ptr @Ald_vol seq_cst, align 16
+  Ald_vol = Arg;
+}
+
+void fun3_vol(_Atomic volatile float *Dst, float Arg) {
+// CHECK-LABEL: @fun3_vol
+// CHECK:       store atomic volatile float %Arg, ptr %Dst seq_cst, align 4
+  *Dst = Arg;
+}
+
+void fun4_vol(_Atomic volatile double *Dst, double Arg) {
+// CHECK-LABEL: @fun4_vol
+// CHECK:       store atomic volatile double %Arg, ptr %Dst seq_cst, align 8
+  *Dst = Arg;
+}
+
+void fun5_vol(_Atomic volatile long double *Dst, long double Arg) {
+// CHECK-LABEL: @fun5_vol
+// CHECK:       store atomic volatile fp128 %Arg, ptr %Dst seq_cst, align 16
+  *Dst = Arg;
+}
+
+//// Atomic volatile loads of floating point values.
+float fun6_vol() {
+// CHECK-LABEL: @fun6_vol
+// CHECK:       %atomic-load = load atomic volatile float, ptr @Af_vol seq_cst, align 4
+  return Af_vol;
+}
+
+float fun7_vol() {
+// CHECK-LABEL: @fun7_vol
+// CHECK:       %atomic-load = load atomic volatile double, ptr @Ad_vol seq_cst, align 8
+  return Ad_vol;
+}
+
+float fun8_vol() {
+// CHECK-LABEL: @fun8_vol
+// CHECK:       %atomic-load = load atomic volatile fp128, ptr @Ald_vol seq_cst, align 16
+  return Ald_vol;
+}
+
+float fun9_vol(_Atomic volatile float *Src) {
+// CHECK-LABEL: @fun9_vol
+// CHECK:       %atomic-load = load atomic volatile float, ptr %Src seq_cst, align 4
+  return *Src;
+}
+
+double fun10_vol(_Atomic volatile double *Src) {
+// CHECK-LABEL: @fun10_vol
+// CHECK:       %atomic-load = load atomic volatile double, ptr %Src seq_cst, align 8
+  return *Src;
+}
+
+long double fun11_vol(_Atomic volatile long double *Src) {
+// CHECK-LABEL: @fun11_vol
+// CHECK:       %atomic-load = load atomic volatile fp128, ptr %Src seq_cst, align 16
+  return *Src;
+}
diff --git a/clang/test/CodeGen/atomic.c b/clang/test/CodeGen/atomic.c
index 9143bedab906..af5c056bbfe6 100644
--- a/clang/test/CodeGen/atomic.c
+++ b/clang/test/CodeGen/atomic.c
@@ -145,6 +145,5 @@ void force_global_uses(void) {
   (void)glob_int;
   // CHECK: load atomic i32, ptr @[[GLOB_INT]] seq_cst
   (void)glob_flt;
-  // CHECK: %[[LOCAL_FLT:.+]] = load atomic i32, ptr @[[GLOB_FLT]] seq_cst
-  // CHECK-NEXT: bitcast i32 %[[LOCAL_FLT]] to float
+  // CHECK: load atomic float, ptr @[[GLOB_FLT]] seq_cst
 }
diff --git a/clang/test/CodeGen/c11atomics-ios.c b/clang/test/CodeGen/c11atomics-ios.c
index bcb6519ab0dc..811820b67fbd 100644
--- a/clang/test/CodeGen/c11atomics-ios.c
+++ b/clang/test/CodeGen/c11atomics-ios.c
@@ -19,15 +19,13 @@ void testFloat(_Atomic(float) *fp) {
   _Atomic(float) x = 2.0f;
 
 // CHECK-NEXT: [[T0:%.*]] = load ptr, ptr [[FP]]
-// CHECK-NEXT: [[T2:%.*]] = load atomic i32, ptr [[T0]] seq_cst, align 4
-// CHECK-NEXT: [[T3:%.*]] = bitcast i32 [[T2]] to float
-// CHECK-NEXT: store float [[T3]], ptr [[F]]
+// CHECK-NEXT: [[T2:%.*]] = load atomic float, ptr [[T0]] seq_cst, align 4
+// CHECK-NEXT: store float [[T2]], ptr [[F]]
   float f = *fp;
 
 // CHECK-NEXT: [[T0:%.*]] = load float, ptr [[F]], align 4
 // CHECK-NEXT: [[T1:%.*]] = load ptr, ptr [[FP]], align 4
-// CHECK-NEXT: [[T2:%.*]] = bitcast float [[T0]] to i32
-// CHECK-NEXT: store atomic i32 [[T2]], ptr [[T1]] seq_cst, align 4
+// CHECK-NEXT: store atomic float [[T0]], ptr [[T1]] seq_cst, align 4
   *fp = f;
 
 // CHECK-NEXT: ret void
diff --git a/clang/test/OpenMP/atomic_read_codegen.c b/clang/test/OpenMP/atomic_read_codegen.c
index b60e1686d4da..0a68c8e2c35a 100644
--- a/clang/test/OpenMP/atomic_read_codegen.c
+++ b/clang/test/OpenMP/atomic_read_codegen.c
@@ -128,13 +128,11 @@ int main(void) {
 // CHECK: store i64
 #pragma omp atomic read
   ullv = ullx;
-// CHECK: load atomic i32, ptr {{.*}} monotonic, align 4
-// CHECK: bitcast i32 {{.*}} to float
+// CHECK: load atomic float, ptr {{.*}} monotonic, align 4
 // CHECK: store float
 #pragma omp atomic read
   fv = fx;
-// CHECK: load atomic i64, ptr {{.*}} monotonic, align 8
-// CHECK: bitcast i64 {{.*}} to double
+// CHECK: load atomic double, ptr {{.*}} monotonic, align 8
 // CHECK: store double
 #pragma omp atomic read
   dv = dx;
@@ -194,11 +192,11 @@ int main(void) {
 // CHECK: store i64
 #pragma omp atomic read
   lv = cix;
-// CHECK: load atomic i32, ptr {{.*}} monotonic, align 4
+// CHECK: load atomic float, ptr {{.*}} monotonic, align 4
 // CHECK: store i64
 #pragma omp atomic read
   ulv = fx;
-// CHECK: load atomic i64, ptr {{.*}} monotonic, align 8
+// CHECK: load atomic double, ptr {{.*}} monotonic, align 8
 // CHECK: store i64
 #pragma omp atomic read
   llv = dx;
diff --git a/clang/test/OpenMP/atomic_write_codegen.c b/clang/test/OpenMP/atomic_write_codegen.c
index 24dfbf9c0e8f..afe8737d30b0 100644
--- a/clang/test/OpenMP/atomic_write_codegen.c
+++ b/clang/test/OpenMP/atomic_write_codegen.c
@@ -131,13 +131,11 @@ int main(void) {
 #pragma omp atomic write
   ullx = ullv;
 // CHECK: load float, ptr
-// CHECK: bitcast float {{.*}} to i32
-// CHECK: store atomic i32 {{.*}}, ptr {{.*}} monotonic, align 4
+// CHECK: store atomic float {{.*}}, ptr {{.*}} monotonic, align 4
 #pragma omp atomic write
   fx = fv;
 // CHECK: load double, ptr
-// CHECK: bitcast double {{.*}} to i64
-// CHECK: store atomic i64 {{.*}}, ptr {{.*}} monotonic, align 8
+// CHECK: store atomic double {{.*}}, ptr {{.*}} monotonic, align 8
 #pragma omp atomic write
   dx = dv;
 // CHECK: [[LD:%.+]] = load x86_fp80, ptr
@@ -215,11 +213,11 @@ int main(void) {
 #pragma omp atomic write
   cix = lv;
 // CHECK: load i64, ptr
-// CHECK: store atomic i32 %{{.+}}, ptr {{.*}} monotonic, align 4
+// CHECK: store atomic float %{{.+}}, ptr {{.*}} monotonic, align 4
 #pragma omp atomic write
   fx = ulv;
 // CHECK: load i64, ptr
-// CHECK: store atomic i64 %{{.+}}, ptr {{.*}} monotonic, align 8
+// CHECK: store atomic double %{{.+}}, ptr {{.*}} monotonic, align 8
 #pragma omp atomic write
   dx = llv;
 // CHECK: load i64, ptr
@@ -491,8 +489,7 @@ int main(void) {
   float2x.x = ulv;
 // CHECK: call i32 @llvm.read_register.i32(
 // CHECK: sitofp i32 %{{.+}} to double
-// CHECK: bitcast double %{{.+}} to i64
-// CHECK: store atomic i64 %{{.+}}, ptr @{{.+}} seq_cst, align 8
+// CHECK: store atomic double %{{.+}}, ptr @{{.+}} seq_cst, align 8
 // CHECK: call{{.*}} @__kmpc_flush(
 #pragma omp atomic write seq_cst
   dv = rix;
-- 
cgit v1.2.3


From afd47587039e5a93919eb962cfe3a230cc91c504 Mon Sep 17 00:00:00 2001
From: Danial Klimkin <dklimkin@google.com>
Date: Tue, 12 Mar 2024 15:01:18 +0100
Subject: Revert "[NVPTX] Add support for atomic add for f16 type" (#84918)

Reverts llvm/llvm-project#84295 due to breakages.
---
 llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp |   3 -
 llvm/lib/Target/NVPTX/NVPTXIntrinsics.td    |  15 ----
 llvm/test/CodeGen/NVPTX/atomics-sm70.ll     | 121 ----------------------------
 llvm/test/CodeGen/NVPTX/atomics.ll          |   7 --
 4 files changed, 146 deletions(-)
 delete mode 100644 llvm/test/CodeGen/NVPTX/atomics-sm70.ll

diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
index c411c8ef9528..c979c03dc1b8 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -6100,9 +6100,6 @@ NVPTXTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
 
   if (AI->isFloatingPointOperation()) {
     if (AI->getOperation() == AtomicRMWInst::BinOp::FAdd) {
-      if (Ty->isHalfTy() && STI.getSmVersion() >= 70 &&
-          STI.getPTXVersion() >= 63)
-        return AtomicExpansionKind::None;
       if (Ty->isFloatTy())
         return AtomicExpansionKind::None;
       if (Ty->isDoubleTy() && STI.hasAtomAddF64())
diff --git a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
index 869b13369e87..477789a164ea 100644
--- a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
+++ b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
@@ -1630,13 +1630,6 @@ defm INT_PTX_ATOM_ADD_GEN_64 : F_ATOMIC_2<i64, Int64Regs, "", ".u64", ".add",
 defm INT_PTX_ATOM_ADD_GEN_64_USE_G : F_ATOMIC_2<i64, Int64Regs, ".global", ".u64",
   ".add", atomic_load_add_64_gen, i64imm, imm>;
 
-defm INT_PTX_ATOM_ADD_G_F16 : F_ATOMIC_2<f16, Int16Regs, ".global", ".f16", ".add.noftz",
-  atomic_load_add_g, f16imm, fpimm, [hasSM<70>, hasPTX<63>]>;
-defm INT_PTX_ATOM_ADD_S_F16 : F_ATOMIC_2<f16, Int16Regs, ".shared", ".f16", ".add.noftz",
-  atomic_load_add_s, f16imm, fpimm, [hasSM<70>, hasPTX<63>]>;
-defm INT_PTX_ATOM_ADD_GEN_F16 : F_ATOMIC_2<f16, Int16Regs, "", ".f16", ".add.noftz",
-  atomic_load_add_gen, f16imm, fpimm, [hasSM<70>, hasPTX<63>]>;
-
 defm INT_PTX_ATOM_ADD_G_F32 : F_ATOMIC_2<f32, Float32Regs, ".global", ".f32", ".add",
   atomic_load_add_g, f32imm, fpimm>;
 defm INT_PTX_ATOM_ADD_S_F32 : F_ATOMIC_2<f32, Float32Regs, ".shared", ".f32", ".add",
@@ -2014,9 +2007,6 @@ multiclass ATOM2P_impl<string AsmStr,  Intrinsic Intr,
                        SDNode Imm, ValueType ImmTy,
                        list<Predicate> Preds> {
   let AddedComplexity = 1 in {
-    def : ATOM23_impl<AsmStr, regT, regclass, Preds,
-                      (ins Int16Regs:$src, regclass:$b),
-                      (Intr (i16 Int16Regs:$src), (regT regclass:$b))>;
     def : ATOM23_impl<AsmStr, regT, regclass, Preds,
                       (ins Int32Regs:$src, regclass:$b),
                       (Intr (i32 Int32Regs:$src), (regT regclass:$b))>;
@@ -2027,9 +2017,6 @@ multiclass ATOM2P_impl<string AsmStr,  Intrinsic Intr,
   // tablegen can't infer argument types from Intrinsic (though it can
   // from Instruction) so we have to enforce specific type on
   // immediates via explicit cast to ImmTy.
-  def : ATOM23_impl<AsmStr, regT, regclass, Preds,
-                    (ins Int16Regs:$src, ImmType:$b),
-                    (Intr (i16 Int16Regs:$src), (ImmTy Imm:$b))>;
   def : ATOM23_impl<AsmStr, regT, regclass, Preds,
                     (ins Int32Regs:$src, ImmType:$b),
                     (Intr (i32 Int32Regs:$src), (ImmTy Imm:$b))>;
@@ -2149,8 +2136,6 @@ multiclass ATOM2_add_impl<string OpStr> {
    defm _s32  : ATOM2S_impl<OpStr, "i", "s32", i32, Int32Regs, i32imm, imm, i32, []>;
    defm _u32  : ATOM2S_impl<OpStr, "i", "u32", i32, Int32Regs, i32imm, imm, i32, []>;
    defm _u64  : ATOM2S_impl<OpStr, "i", "u64", i64, Int64Regs, i64imm, imm, i64, []>;
-   defm _f16  : ATOM2S_impl<OpStr, "f", "f16", f16, Int16Regs, f16imm, fpimm, f16,
-                            [hasSM<70>, hasPTX<63>]>;
    defm _f32  : ATOM2S_impl<OpStr, "f", "f32", f32, Float32Regs, f32imm, fpimm, f32,
                             []>;
    defm _f64  : ATOM2S_impl<OpStr, "f", "f64", f64, Float64Regs, f64imm, fpimm, f64,
diff --git a/llvm/test/CodeGen/NVPTX/atomics-sm70.ll b/llvm/test/CodeGen/NVPTX/atomics-sm70.ll
deleted file mode 100644
index f68bb2049d00..000000000000
--- a/llvm/test/CodeGen/NVPTX/atomics-sm70.ll
+++ /dev/null
@@ -1,121 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
-; RUN: llc < %s -march=nvptx -mcpu=sm_70 -mattr=+ptx63 | FileCheck %s --check-prefixes=CHECK
-; RUN: llc < %s -march=nvptx64 -mcpu=sm_70 -mattr=+ptx63 | FileCheck %s --check-prefixes=CHECK64
-; RUN: llc < %s -march=nvptx -mcpu=sm_70 -mattr=+ptx62 | FileCheck %s --check-prefixes=CHECKPTX62
-; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -march=nvptx -mcpu=sm_70 -mattr=+ptx63 | %ptxas-verify -arch=sm_70 %}
-; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_70 -mattr=+ptx63 | %ptxas-verify -arch=sm_70 %}
-; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -march=nvptx -mcpu=sm_70 -mattr=+ptx62 | %ptxas-verify -arch=sm_70 %}
-
-target triple = "nvptx64-nvidia-cuda"
-
-define void @test(ptr %dp0, ptr addrspace(1) %dp1, ptr addrspace(3) %dp3, half %val) {
-; CHECK-LABEL: test(
-; CHECK:       {
-; CHECK-NEXT:    .reg .b16 %rs<5>;
-; CHECK-NEXT:    .reg .b32 %r<4>;
-; CHECK-EMPTY:
-; CHECK-NEXT:  // %bb.0:
-; CHECK-NEXT:    ld.param.u32 %r1, [test_param_0];
-; CHECK-NEXT:    ld.param.b16 %rs1, [test_param_3];
-; CHECK-NEXT:    atom.add.noftz.f16 %rs2, [%r1], %rs1;
-; CHECK-NEXT:    ld.param.u32 %r2, [test_param_1];
-; CHECK-NEXT:    atom.global.add.noftz.f16 %rs3, [%r2], %rs1;
-; CHECK-NEXT:    ld.param.u32 %r3, [test_param_2];
-; CHECK-NEXT:    atom.shared.add.noftz.f16 %rs4, [%r3], %rs1;
-; CHECK-NEXT:    ret;
-;
-; CHECK64-LABEL: test(
-; CHECK64:       {
-; CHECK64-NEXT:    .reg .b16 %rs<5>;
-; CHECK64-NEXT:    .reg .b64 %rd<4>;
-; CHECK64-EMPTY:
-; CHECK64-NEXT:  // %bb.0:
-; CHECK64-NEXT:    ld.param.u64 %rd1, [test_param_0];
-; CHECK64-NEXT:    ld.param.b16 %rs1, [test_param_3];
-; CHECK64-NEXT:    atom.add.noftz.f16 %rs2, [%rd1], %rs1;
-; CHECK64-NEXT:    ld.param.u64 %rd2, [test_param_1];
-; CHECK64-NEXT:    atom.global.add.noftz.f16 %rs3, [%rd2], %rs1;
-; CHECK64-NEXT:    ld.param.u64 %rd3, [test_param_2];
-; CHECK64-NEXT:    atom.shared.add.noftz.f16 %rs4, [%rd3], %rs1;
-; CHECK64-NEXT:    ret;
-;
-; CHECKPTX62-LABEL: test(
-; CHECKPTX62:       {
-; CHECKPTX62-NEXT:    .reg .pred %p<4>;
-; CHECKPTX62-NEXT:    .reg .b16 %rs<14>;
-; CHECKPTX62-NEXT:    .reg .b32 %r<49>;
-; CHECKPTX62-EMPTY:
-; CHECKPTX62-NEXT:  // %bb.0:
-; CHECKPTX62-NEXT:    ld.param.b16 %rs1, [test_param_3];
-; CHECKPTX62-NEXT:    ld.param.u32 %r20, [test_param_2];
-; CHECKPTX62-NEXT:    ld.param.u32 %r19, [test_param_1];
-; CHECKPTX62-NEXT:    ld.param.u32 %r21, [test_param_0];
-; CHECKPTX62-NEXT:    and.b32 %r1, %r21, -4;
-; CHECKPTX62-NEXT:    and.b32 %r22, %r21, 3;
-; CHECKPTX62-NEXT:    shl.b32 %r2, %r22, 3;
-; CHECKPTX62-NEXT:    mov.b32 %r23, 65535;
-; CHECKPTX62-NEXT:    shl.b32 %r24, %r23, %r2;
-; CHECKPTX62-NEXT:    not.b32 %r3, %r24;
-; CHECKPTX62-NEXT:    ld.u32 %r46, [%r1];
-; CHECKPTX62-NEXT:  $L__BB0_1: // %atomicrmw.start
-; CHECKPTX62-NEXT:    // =>This Inner Loop Header: Depth=1
-; CHECKPTX62-NEXT:    shr.u32 %r25, %r46, %r2;
-; CHECKPTX62-NEXT:    cvt.u16.u32 %rs2, %r25;
-; CHECKPTX62-NEXT:    add.rn.f16 %rs4, %rs2, %rs1;
-; CHECKPTX62-NEXT:    cvt.u32.u16 %r26, %rs4;
-; CHECKPTX62-NEXT:    shl.b32 %r27, %r26, %r2;
-; CHECKPTX62-NEXT:    and.b32 %r28, %r46, %r3;
-; CHECKPTX62-NEXT:    or.b32 %r29, %r28, %r27;
-; CHECKPTX62-NEXT:    atom.cas.b32 %r6, [%r1], %r46, %r29;
-; CHECKPTX62-NEXT:    setp.ne.s32 %p1, %r6, %r46;
-; CHECKPTX62-NEXT:    mov.u32 %r46, %r6;
-; CHECKPTX62-NEXT:    @%p1 bra $L__BB0_1;
-; CHECKPTX62-NEXT:  // %bb.2: // %atomicrmw.end
-; CHECKPTX62-NEXT:    and.b32 %r7, %r19, -4;
-; CHECKPTX62-NEXT:    shl.b32 %r30, %r19, 3;
-; CHECKPTX62-NEXT:    and.b32 %r8, %r30, 24;
-; CHECKPTX62-NEXT:    shl.b32 %r32, %r23, %r8;
-; CHECKPTX62-NEXT:    not.b32 %r9, %r32;
-; CHECKPTX62-NEXT:    ld.global.u32 %r47, [%r7];
-; CHECKPTX62-NEXT:  $L__BB0_3: // %atomicrmw.start9
-; CHECKPTX62-NEXT:    // =>This Inner Loop Header: Depth=1
-; CHECKPTX62-NEXT:    shr.u32 %r33, %r47, %r8;
-; CHECKPTX62-NEXT:    cvt.u16.u32 %rs6, %r33;
-; CHECKPTX62-NEXT:    add.rn.f16 %rs8, %rs6, %rs1;
-; CHECKPTX62-NEXT:    cvt.u32.u16 %r34, %rs8;
-; CHECKPTX62-NEXT:    shl.b32 %r35, %r34, %r8;
-; CHECKPTX62-NEXT:    and.b32 %r36, %r47, %r9;
-; CHECKPTX62-NEXT:    or.b32 %r37, %r36, %r35;
-; CHECKPTX62-NEXT:    atom.global.cas.b32 %r12, [%r7], %r47, %r37;
-; CHECKPTX62-NEXT:    setp.ne.s32 %p2, %r12, %r47;
-; CHECKPTX62-NEXT:    mov.u32 %r47, %r12;
-; CHECKPTX62-NEXT:    @%p2 bra $L__BB0_3;
-; CHECKPTX62-NEXT:  // %bb.4: // %atomicrmw.end8
-; CHECKPTX62-NEXT:    and.b32 %r13, %r20, -4;
-; CHECKPTX62-NEXT:    shl.b32 %r38, %r20, 3;
-; CHECKPTX62-NEXT:    and.b32 %r14, %r38, 24;
-; CHECKPTX62-NEXT:    shl.b32 %r40, %r23, %r14;
-; CHECKPTX62-NEXT:    not.b32 %r15, %r40;
-; CHECKPTX62-NEXT:    ld.shared.u32 %r48, [%r13];
-; CHECKPTX62-NEXT:  $L__BB0_5: // %atomicrmw.start27
-; CHECKPTX62-NEXT:    // =>This Inner Loop Header: Depth=1
-; CHECKPTX62-NEXT:    shr.u32 %r41, %r48, %r14;
-; CHECKPTX62-NEXT:    cvt.u16.u32 %rs10, %r41;
-; CHECKPTX62-NEXT:    add.rn.f16 %rs12, %rs10, %rs1;
-; CHECKPTX62-NEXT:    cvt.u32.u16 %r42, %rs12;
-; CHECKPTX62-NEXT:    shl.b32 %r43, %r42, %r14;
-; CHECKPTX62-NEXT:    and.b32 %r44, %r48, %r15;
-; CHECKPTX62-NEXT:    or.b32 %r45, %r44, %r43;
-; CHECKPTX62-NEXT:    atom.shared.cas.b32 %r18, [%r13], %r48, %r45;
-; CHECKPTX62-NEXT:    setp.ne.s32 %p3, %r18, %r48;
-; CHECKPTX62-NEXT:    mov.u32 %r48, %r18;
-; CHECKPTX62-NEXT:    @%p3 bra $L__BB0_5;
-; CHECKPTX62-NEXT:  // %bb.6: // %atomicrmw.end26
-; CHECKPTX62-NEXT:    ret;
-  %r1 = atomicrmw fadd ptr %dp0, half %val seq_cst
-  %r2 = atomicrmw fadd ptr addrspace(1) %dp1, half %val seq_cst
-  %ret = atomicrmw fadd ptr addrspace(3) %dp3, half %val seq_cst
-  ret void
-}
-
-attributes #1 = { argmemonly nounwind }
diff --git a/llvm/test/CodeGen/NVPTX/atomics.ll b/llvm/test/CodeGen/NVPTX/atomics.ll
index 6f2b5dcf47f1..e99d0fd05e34 100644
--- a/llvm/test/CodeGen/NVPTX/atomics.ll
+++ b/llvm/test/CodeGen/NVPTX/atomics.ll
@@ -175,13 +175,6 @@ define float @atomicrmw_add_f32_generic(ptr %addr, float %val) {
   ret float %ret
 }
 
-; CHECK-LABEL: atomicrmw_add_f16_generic
-define half @atomicrmw_add_f16_generic(ptr %addr, half %val) {
-; CHECK: atom.cas
-  %ret = atomicrmw fadd ptr %addr, half %val seq_cst
-  ret half %ret
-}
-
 ; CHECK-LABEL: atomicrmw_add_f32_addrspace1
 define float @atomicrmw_add_f32_addrspace1(ptr addrspace(1) %addr, float %val) {
 ; CHECK: atom.global.add.f32
-- 
cgit v1.2.3


From f8fab2126ffab713f4ab4619360b6941be6d4e35 Mon Sep 17 00:00:00 2001
From: Kupa-Martin <84517188+Kupa-Martin@users.noreply.github.com>
Date: Tue, 12 Mar 2024 11:21:34 -0300
Subject: [Clang][Sema] Fix type of enumerators in incomplete enumerations
 (#84068)

Enumerators dont have the type of their enumeration before the closing
brace. In these cases Expr::getEnumCoercedType() incorrectly returned
the enumeration type.

Introduced in PR #81418
Fixes #84712
---
 clang/lib/AST/Expr.cpp                             | 13 ++++++++-----
 clang/test/Sema/enum-constant-type.cpp             | 12 ++++++++++++
 clang/test/Sema/warn-compare-enum-types-mismatch.c | 11 ++++++++++-
 3 files changed, 30 insertions(+), 6 deletions(-)
 create mode 100644 clang/test/Sema/enum-constant-type.cpp

diff --git a/clang/lib/AST/Expr.cpp b/clang/lib/AST/Expr.cpp
index b4de2155adce..f5ad402e3bd7 100644
--- a/clang/lib/AST/Expr.cpp
+++ b/clang/lib/AST/Expr.cpp
@@ -264,11 +264,14 @@ namespace {
 }
 
 QualType Expr::getEnumCoercedType(const ASTContext &Ctx) const {
-  if (isa<EnumType>(this->getType()))
-    return this->getType();
-  else if (const auto *ECD = this->getEnumConstantDecl())
-    return Ctx.getTypeDeclType(cast<EnumDecl>(ECD->getDeclContext()));
-  return this->getType();
+  if (isa<EnumType>(getType()))
+    return getType();
+  if (const auto *ECD = getEnumConstantDecl()) {
+    const auto *ED = cast<EnumDecl>(ECD->getDeclContext());
+    if (ED->isCompleteDefinition())
+      return Ctx.getTypeDeclType(ED);
+  }
+  return getType();
 }
 
 SourceLocation Expr::getExprLoc() const {
diff --git a/clang/test/Sema/enum-constant-type.cpp b/clang/test/Sema/enum-constant-type.cpp
new file mode 100644
index 000000000000..5db3a859a395
--- /dev/null
+++ b/clang/test/Sema/enum-constant-type.cpp
@@ -0,0 +1,12 @@
+// RUN: %clang_cc1 -x c++ -fsyntax-only -verify %s -Wenum-compare
+// expected-no-diagnostics
+
+enum E1 {
+  E11 = 0
+};
+
+enum E2 {
+  E21 = 0,
+  E22 = E11,
+  E23 = E21 + E22
+};
diff --git a/clang/test/Sema/warn-compare-enum-types-mismatch.c b/clang/test/Sema/warn-compare-enum-types-mismatch.c
index 2b72aae16b97..47dd592488e6 100644
--- a/clang/test/Sema/warn-compare-enum-types-mismatch.c
+++ b/clang/test/Sema/warn-compare-enum-types-mismatch.c
@@ -1,12 +1,21 @@
 // RUN: %clang_cc1 -x c -fsyntax-only -verify -Wenum-compare -Wno-unused-comparison %s
 // RUN: %clang_cc1 -x c++ -fsyntax-only -verify -Wenum-compare -Wno-unused-comparison %s
 
+// In C enumerators (i.e enumeration constants) have type int (until C23). In
+// order to support diagnostics such as -Wenum-compare we pretend they have the
+// type of their enumeration.
+
 typedef enum EnumA {
   A
 } EnumA;
 
 enum EnumB {
-  B
+  B,
+  B1 = 1,
+  // In C++ this comparison doesnt warn as enumerators dont have the type of
+  // their enumeration before the closing brace. We mantain the same behavior
+  // in C.
+  B2 = A == B1
 };
 
 enum {
-- 
cgit v1.2.3


From a4aac22683a44264bb3883242b1c6b711f534e8b Mon Sep 17 00:00:00 2001
From: harishch4 <harishcse44@gmail.com>
Date: Tue, 12 Mar 2024 20:04:35 +0530
Subject: [Flang][OpenMp] Fix to threadprivate not working with
 host-association. (#74966)

This patch considers host-associated variables to generate threadprivate
Ops.

Fixes: #60763 #84561
---
 flang/lib/Lower/HostAssociations.cpp               | 14 ++++++-
 flang/lib/Lower/OpenMP/OpenMP.cpp                  | 18 ++++++---
 .../OpenMP/threadprivate-host-association-2.f90    | 44 ++++++++++++++++++++++
 .../OpenMP/threadprivate-host-association.f90      | 42 +++++++++++++++++++++
 4 files changed, 111 insertions(+), 7 deletions(-)
 create mode 100644 flang/test/Lower/OpenMP/threadprivate-host-association-2.f90
 create mode 100644 flang/test/Lower/OpenMP/threadprivate-host-association.f90

diff --git a/flang/lib/Lower/HostAssociations.cpp b/flang/lib/Lower/HostAssociations.cpp
index b9e13ccad1c9..414673b00f44 100644
--- a/flang/lib/Lower/HostAssociations.cpp
+++ b/flang/lib/Lower/HostAssociations.cpp
@@ -14,6 +14,7 @@
 #include "flang/Lower/CallInterface.h"
 #include "flang/Lower/ConvertType.h"
 #include "flang/Lower/ConvertVariable.h"
+#include "flang/Lower/OpenMP.h"
 #include "flang/Lower/PFTBuilder.h"
 #include "flang/Lower/SymbolMap.h"
 #include "flang/Optimizer/Builder/Character.h"
@@ -542,7 +543,10 @@ void Fortran::lower::HostAssociations::addSymbolsToBind(
          "must be initially empty");
   this->hostScope = &hostScope;
   for (const auto *s : symbols)
-    if (Fortran::lower::symbolIsGlobal(*s)) {
+    // GlobalOp are created for non-global threadprivate variable,
+    //  so considering them as globals.
+    if (Fortran::lower::symbolIsGlobal(*s) ||
+        (*s).test(Fortran::semantics::Symbol::Flag::OmpThreadprivate)) {
       // The ultimate symbol is stored here so that global symbols from the
       // host scope can later be searched in this set.
       globalSymbols.insert(&s->GetUltimate());
@@ -590,9 +594,15 @@ void Fortran::lower::HostAssociations::internalProcedureBindings(
     for (auto &hostVariable : pft::getScopeVariableList(*hostScope))
       if ((hostVariable.isAggregateStore() && hostVariable.isGlobal()) ||
           (hostVariable.hasSymbol() &&
-           globalSymbols.contains(&hostVariable.getSymbol().GetUltimate())))
+           globalSymbols.contains(&hostVariable.getSymbol().GetUltimate()))) {
         Fortran::lower::instantiateVariable(converter, hostVariable, symMap,
                                             storeMap);
+        // Generate threadprivate Op for host associated variables.
+        if (hostVariable.hasSymbol() &&
+            hostVariable.getSymbol().test(
+                Fortran::semantics::Symbol::Flag::OmpThreadprivate))
+          Fortran::lower::genThreadprivateOp(converter, hostVariable);
+      }
   }
   if (tupleSymbols.empty())
     return;
diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp
index 5cff95c7d125..4f0bb80cd7fd 100644
--- a/flang/lib/Lower/OpenMP/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP/OpenMP.cpp
@@ -170,9 +170,10 @@ static void threadPrivatizeVars(Fortran::lower::AbstractConverter &converter,
   };
 
   llvm::SetVector<const Fortran::semantics::Symbol *> threadprivateSyms;
-  converter.collectSymbolSet(
-      eval, threadprivateSyms,
-      Fortran::semantics::Symbol::Flag::OmpThreadprivate);
+  converter.collectSymbolSet(eval, threadprivateSyms,
+                             Fortran::semantics::Symbol::Flag::OmpThreadprivate,
+                             /*collectSymbols=*/true,
+                             /*collectHostAssociatedSymbols=*/true);
   std::set<Fortran::semantics::SourceName> threadprivateSymNames;
 
   // For a COMMON block, the ThreadprivateOp is generated for itself instead of
@@ -2276,8 +2277,15 @@ void Fortran::lower::genThreadprivateOp(
     // variable in main program, and it has implicit SAVE attribute. Take it as
     // with SAVE attribute, so to create GlobalOp for it to simplify the
     // translation to LLVM IR.
-    fir::GlobalOp global = globalInitialization(converter, firOpBuilder, sym,
-                                                var, currentLocation);
+    // Avoids performing multiple globalInitializations.
+    fir::GlobalOp global;
+    auto module = converter.getModuleOp();
+    std::string globalName = converter.mangleName(sym);
+    if (module.lookupSymbol<fir::GlobalOp>(globalName))
+      global = module.lookupSymbol<fir::GlobalOp>(globalName);
+    else
+      global = globalInitialization(converter, firOpBuilder, sym, var,
+                                    currentLocation);
 
     mlir::Value symValue = firOpBuilder.create<fir::AddrOfOp>(
         currentLocation, global.resultType(), global.getSymbol());
diff --git a/flang/test/Lower/OpenMP/threadprivate-host-association-2.f90 b/flang/test/Lower/OpenMP/threadprivate-host-association-2.f90
new file mode 100644
index 000000000000..b47bff5bebb0
--- /dev/null
+++ b/flang/test/Lower/OpenMP/threadprivate-host-association-2.f90
@@ -0,0 +1,44 @@
+! This test checks lowering of OpenMP Threadprivate Directive.
+! Test for threadprivate variable in host association.
+
+!RUN: %flang_fc1 -emit-hlfir -fopenmp %s -o - | FileCheck %s
+
+!CHECK: func.func @_QQmain() attributes {fir.bindc_name = "main"} {
+!CHECK:   %[[A:.*]] = fir.alloca i32 {bindc_name = "a", uniq_name = "_QFEa"}
+!CHECK:   %[[A_DECL:.*]]:2 = hlfir.declare %[[A]] {uniq_name = "_QFEa"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK:   %[[A_ADDR:.*]] = fir.address_of(@_QFEa) : !fir.ref<i32>
+!CHECK:   %[[TP_A:.*]] = omp.threadprivate %[[A_ADDR]] : !fir.ref<i32> -> !fir.ref<i32>
+!CHECK:   %[[TP_A_DECL:.*]]:2 = hlfir.declare %[[TP_A]] {uniq_name = "_QFEa"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK:   fir.call @_QFPsub() fastmath<contract> : () -> ()
+!CHECK:   return
+!CHECK: }
+!CHECK: func.func private @_QFPsub() attributes {fir.internal_proc, llvm.linkage = #llvm.linkage<internal>} {
+!CHECK:   %[[A:.*]] = fir.alloca i32 {bindc_name = "a", uniq_name = "_QFEa"}
+!CHECK:   %[[A_DECL:.*]]:2 = hlfir.declare %[[A]] {uniq_name = "_QFEa"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK:   %[[A_ADDR:.*]] = fir.address_of(@_QFEa) : !fir.ref<i32>
+!CHECK:   %[[TP_A:.*]] = omp.threadprivate %[[A_ADDR]] : !fir.ref<i32> -> !fir.ref<i32>
+!CHECK:   %[[TP_A_DECL:.*]]:2 = hlfir.declare %[[TP_A]] {uniq_name = "_QFEa"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK:   omp.parallel {
+!CHECK:     %[[PAR_TP_A:.*]] = omp.threadprivate %[[A_ADDR]] : !fir.ref<i32> -> !fir.ref<i32>
+!CHECK:     %[[PAR_TP_A_DECL:.*]]:2 = hlfir.declare %[[PAR_TP_A]] {uniq_name = "_QFEa"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK:     %{{.*}} = fir.load %[[PAR_TP_A_DECL]]#0 : !fir.ref<i32>
+!CHECK:     omp.terminator
+!CHECK:   }
+!CHECK:   return
+!CHECK: }
+!CHECK: fir.global internal @_QFEa : i32 {
+!CHECK:   %[[A:.*]] = fir.undefined i32
+!CHECK:   fir.has_value %[[A]] : i32
+!CHECK: }
+
+program main
+   integer :: a
+   !$omp threadprivate(a)
+   call sub()
+contains
+   subroutine sub()
+      !$omp parallel
+      print *, a
+      !$omp end parallel
+   end
+end
diff --git a/flang/test/Lower/OpenMP/threadprivate-host-association.f90 b/flang/test/Lower/OpenMP/threadprivate-host-association.f90
new file mode 100644
index 000000000000..98f7b51bb971
--- /dev/null
+++ b/flang/test/Lower/OpenMP/threadprivate-host-association.f90
@@ -0,0 +1,42 @@
+! This test checks lowering of OpenMP Threadprivate Directive.
+! Test for threadprivate variable in host association.
+
+!RUN: %flang_fc1 -emit-hlfir -fopenmp %s -o - | FileCheck %s
+
+!CHECK: func.func @_QQmain() attributes {fir.bindc_name = "main"} {
+!CHECK:   %[[A:.*]] = fir.address_of(@_QFEa) : !fir.ref<i32>
+!CHECK:   %[[A_DECL:.*]]:2 = hlfir.declare %[[A]] {uniq_name = "_QFEa"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK:   %[[TP_A:.*]] = omp.threadprivate %[[A_DECL]]#1 : !fir.ref<i32> -> !fir.ref<i32>
+!CHECK:   %[[TP_A_DECL:.*]]:2 = hlfir.declare %[[TP_A]] {uniq_name = "_QFEa"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK:   fir.call @_QFPsub() fastmath<contract> : () -> ()
+!CHECK:   return
+!CHECK: }
+!CHECK: func.func private @_QFPsub() attributes {fir.internal_proc, llvm.linkage = #llvm.linkage<internal>} {
+!CHECK:   %[[A:.*]] = fir.address_of(@_QFEa) : !fir.ref<i32>
+!CHECK:   %[[A_DECL:.*]]:2 = hlfir.declare %[[A]] {uniq_name = "_QFEa"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK:   %[[TP_A:.*]] = omp.threadprivate %[[A_DECL]]#1 : !fir.ref<i32> -> !fir.ref<i32>
+!CHECK:   %[[TP_A_DECL:.*]]:2 = hlfir.declare %[[TP_A]] {uniq_name = "_QFEa"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK:   omp.parallel {
+!CHECK:     %[[PAR_TP_A:.*]] = omp.threadprivate %[[A_DECL]]#1 : !fir.ref<i32> -> !fir.ref<i32>
+!CHECK:     %[[PAR_TP_A_DECL:.*]]:2 = hlfir.declare %[[PAR_TP_A]] {uniq_name = "_QFEa"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK:     %{{.*}} = fir.load %[[PAR_TP_A_DECL]]#0 : !fir.ref<i32>
+!CHECK:     omp.terminator
+!CHECK:   }
+!CHECK:   return
+!CHECK: }
+!CHECK: fir.global internal @_QFEa : i32 {
+!CHECK:   %[[A:.*]] = fir.zero_bits i32
+!CHECK:   fir.has_value %[[A]] : i32
+!CHECK: }
+
+program main
+   integer, save :: a
+   !$omp threadprivate(a)
+   call sub()
+contains
+   subroutine sub()
+      !$omp parallel
+      print *, a
+      !$omp end parallel
+   end
+end
-- 
cgit v1.2.3


From a3ad5faa321848376b277db369313c80d3df2152 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo@fhahn.com>
Date: Tue, 12 Mar 2024 14:44:04 +0000
Subject: [LAA] Fix typo IndidrectUnsafe -> IndirectUnsafe.

Fix type in textual analysis output.
---
 llvm/lib/Analysis/LoopAccessAnalysis.cpp                              | 2 +-
 .../LoopAccessAnalysis/loops-with-indirect-reads-and-writes.ll        | 4 ++--
 .../Analysis/LoopAccessAnalysis/underlying-object-loop-varying-phi.ll | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
index dd6b88fee415..c25eede96a18 100644
--- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
@@ -2260,7 +2260,7 @@ MemoryDepChecker::getInstructionsForAccess(Value *Ptr, bool isWrite) const {
 const char *MemoryDepChecker::Dependence::DepName[] = {
     "NoDep",
     "Unknown",
-    "IndidrectUnsafe",
+    "IndirectUnsafe",
     "Forward",
     "ForwardButPreventsForwarding",
     "Backward",
diff --git a/llvm/test/Analysis/LoopAccessAnalysis/loops-with-indirect-reads-and-writes.ll b/llvm/test/Analysis/LoopAccessAnalysis/loops-with-indirect-reads-and-writes.ll
index e643efc4bfc5..fd4f417e57b6 100644
--- a/llvm/test/Analysis/LoopAccessAnalysis/loops-with-indirect-reads-and-writes.ll
+++ b/llvm/test/Analysis/LoopAccessAnalysis/loops-with-indirect-reads-and-writes.ll
@@ -24,7 +24,7 @@ define void @test_indirect_read_write_loop_also_modifies_pointer_array(ptr nound
 ; CHECK-NEXT:      Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
 ; CHECK-NEXT:  Unsafe indirect dependence.
 ; CHECK-NEXT:      Dependences:
-; CHECK-NEXT:        IndidrectUnsafe:
+; CHECK-NEXT:        IndirectUnsafe:
 ; CHECK-NEXT:            %l.2 = load i64, ptr %l.1, align 8, !tbaa !4 ->
 ; CHECK-NEXT:            store i64 %inc, ptr %l.1, align 8, !tbaa !4
 ; CHECK-EMPTY:
@@ -229,7 +229,7 @@ define void @test_indirect_read_write_loop_does_not_modify_pointer_array(ptr nou
 ; CHECK-NEXT:      Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
 ; CHECK-NEXT:  Unsafe indirect dependence.
 ; CHECK-NEXT:      Dependences:
-; CHECK-NEXT:        IndidrectUnsafe:
+; CHECK-NEXT:        IndirectUnsafe:
 ; CHECK-NEXT:            %l.2 = load i64, ptr %l.1, align 8, !tbaa !4 ->
 ; CHECK-NEXT:            store i64 %inc, ptr %l.1, align 8, !tbaa !4
 ; CHECK-EMPTY:
diff --git a/llvm/test/Analysis/LoopAccessAnalysis/underlying-object-loop-varying-phi.ll b/llvm/test/Analysis/LoopAccessAnalysis/underlying-object-loop-varying-phi.ll
index 106dc8c13a49..402081fb939f 100644
--- a/llvm/test/Analysis/LoopAccessAnalysis/underlying-object-loop-varying-phi.ll
+++ b/llvm/test/Analysis/LoopAccessAnalysis/underlying-object-loop-varying-phi.ll
@@ -10,7 +10,7 @@ define void @indirect_ptr_recurrences_read_write(ptr %A, ptr %B) {
 ; CHECK-NEXT:      Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
 ; CHECK-NEXT:  Unsafe indirect dependence.
 ; CHECK-NEXT:      Dependences:
-; CHECK-NEXT:        IndidrectUnsafe:
+; CHECK-NEXT:        IndirectUnsafe:
 ; CHECK-NEXT:            %l = load i32, ptr %ptr.recur, align 4, !tbaa !4 ->
 ; CHECK-NEXT:            store i32 %xor, ptr %ptr.recur, align 4, !tbaa !4
 ; CHECK-EMPTY:
-- 
cgit v1.2.3


From 2cae13d60590c999c37828d709ff4ba58e5f261b Mon Sep 17 00:00:00 2001
From: bvlgah <octopus.busts_0w@icloud.com>
Date: Tue, 12 Mar 2024 22:46:18 +0800
Subject: [DominanceFrontierBase] Fix doc of compare()'s  return value.
 (#81352)

This is a trivial fix ( I guess it has not been noticed because of no
use). Currently, the doc says the function returns `true` if two
instances of `DominanceFrontierBase` matches, otherwise `false` is
returned. I have checked the implementation


https://github.com/llvm/llvm-project/blob/9308d6688c673606fee1625d777a52539ae72015/llvm/include/llvm/Analysis/DominanceFrontierImpl.h#L71-L94

which examines whether two dominance frontier mappings are equal, and
the actual value it returns contradicts the description in the doc.
---
 llvm/include/llvm/Analysis/DominanceFrontier.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/llvm/include/llvm/Analysis/DominanceFrontier.h b/llvm/include/llvm/Analysis/DominanceFrontier.h
index b65cdc9cdb3c..772fdc9ddee0 100644
--- a/llvm/include/llvm/Analysis/DominanceFrontier.h
+++ b/llvm/include/llvm/Analysis/DominanceFrontier.h
@@ -101,8 +101,8 @@ public:
   /// return true;
   bool compareDomSet(DomSetType &DS1, const DomSetType &DS2) const;
 
-  /// compare - Return true if the other dominance frontier base matches
-  /// this dominance frontier base. Otherwise return false.
+  /// compare - Return false if the other dominance frontier base matches
+  /// this dominance frontier base. Otherwise return true.
   bool compare(DominanceFrontierBase &Other) const;
 
   /// print - Convert to human readable form
-- 
cgit v1.2.3


From 0aefd702f6c5346f216d29c704c4d0e4ec7397ac Mon Sep 17 00:00:00 2001
From: Nico Weber <thakis@chromium.org>
Date: Tue, 12 Mar 2024 10:48:50 -0400
Subject: [gn] port bde7a6b791872b

---
 llvm/utils/gn/secondary/clang/unittests/Interpreter/BUILD.gn | 1 +
 1 file changed, 1 insertion(+)

diff --git a/llvm/utils/gn/secondary/clang/unittests/Interpreter/BUILD.gn b/llvm/utils/gn/secondary/clang/unittests/Interpreter/BUILD.gn
index 4107bbc12be2..c2999a67f58a 100644
--- a/llvm/utils/gn/secondary/clang/unittests/Interpreter/BUILD.gn
+++ b/llvm/utils/gn/secondary/clang/unittests/Interpreter/BUILD.gn
@@ -9,6 +9,7 @@ unittest("ClangReplInterpreterTests") {
     "//clang/lib/Interpreter",
     "//llvm/lib/IR",
     "//llvm/lib/TargetParser",
+    "//llvm/lib/Testing/Support",
   ]
   sources = [
     "CodeCompletionTest.cpp",
-- 
cgit v1.2.3


From 15f3f446c504d1bb85282fb3bd98db6eab69829d Mon Sep 17 00:00:00 2001
From: Stephen Tozer <stephen.tozer@sony.com>
Date: Tue, 12 Mar 2024 14:53:13 +0000
Subject: [RemoveDIs][NFC] Rename common interface functions for
 DPValues->DbgRecords (#84793)

As part of the effort to rename the DbgRecord classes, this patch
renames the widely-used functions that operate on DbgRecords but refer
to DbgValues or DPValues in their names to refer to DbgRecords instead;
all such functions are defined in one of `BasicBlock.h`,
`Instruction.h`, and `DebugProgramInstruction.h`.

This patch explicitly does not change the names of any comments or
variables, except for where they use the exact name of one of the
renamed functions. The reason for this is reviewability; this patch can
be trivially examined to determine that the only changes are direct
string substitutions and any results from clang-format responding to the
changed line lengths. Future patches will cover renaming variables and
comments, and then renaming the classes themselves.
---
 llvm/docs/RemoveDIsDebugInfo.md                    |   4 +-
 llvm/include/llvm/IR/BasicBlock.h                  |  18 +--
 llvm/include/llvm/IR/DebugProgramInstruction.h     |  22 ++--
 llvm/include/llvm/IR/Instruction.h                 |  18 +--
 llvm/lib/AsmParser/LLParser.cpp                    |   2 +-
 llvm/lib/CodeGen/AssignmentTrackingAnalysis.cpp    |  26 ++--
 llvm/lib/CodeGen/CodeGenPrepare.cpp                |  10 +-
 llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp       |   2 +-
 llvm/lib/CodeGen/SelectOptimize.cpp                |   6 +-
 llvm/lib/CodeGen/SelectionDAG/FastISel.cpp         |   4 +-
 .../CodeGen/SelectionDAG/SelectionDAGBuilder.cpp   |   2 +-
 llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp |   2 +-
 llvm/lib/IR/AsmWriter.cpp                          |   4 +-
 llvm/lib/IR/BasicBlock.cpp                         |  79 ++++++------
 llvm/lib/IR/DIBuilder.cpp                          |   6 +-
 llvm/lib/IR/DebugInfo.cpp                          |  10 +-
 llvm/lib/IR/DebugProgramInstruction.cpp            |  24 ++--
 llvm/lib/IR/Instruction.cpp                        |  22 ++--
 llvm/lib/IR/LLVMContextImpl.h                      |   8 +-
 llvm/lib/IR/Verifier.cpp                           |   6 +-
 llvm/lib/Transforms/Coroutines/CoroFrame.cpp       |   8 +-
 llvm/lib/Transforms/Coroutines/CoroSplit.cpp       |   2 +-
 llvm/lib/Transforms/IPO/IROutliner.cpp             |   2 +-
 llvm/lib/Transforms/IPO/MergeFunctions.cpp         |   4 +-
 .../InstCombine/InstructionCombining.cpp           |   6 +-
 llvm/lib/Transforms/Scalar/ADCE.cpp                |   4 +-
 llvm/lib/Transforms/Scalar/CallSiteSplitting.cpp   |   2 +-
 llvm/lib/Transforms/Scalar/JumpThreading.cpp       |   6 +-
 llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp  |   2 +-
 llvm/lib/Transforms/Scalar/SROA.cpp                |   4 +-
 llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp  |   2 +-
 .../lib/Transforms/Scalar/SpeculativeExecution.cpp |   6 +-
 llvm/lib/Transforms/Utils/BasicBlockUtils.cpp      |   6 +-
 llvm/lib/Transforms/Utils/CloneFunction.cpp        |   6 +-
 llvm/lib/Transforms/Utils/CodeExtractor.cpp        |   6 +-
 llvm/lib/Transforms/Utils/InlineFunction.cpp       |   6 +-
 llvm/lib/Transforms/Utils/Local.cpp                |  24 ++--
 llvm/lib/Transforms/Utils/LoopRotationUtils.cpp    |  10 +-
 llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp    |   2 +-
 llvm/lib/Transforms/Utils/LoopUtils.cpp            |   4 +-
 llvm/lib/Transforms/Utils/MemoryTaggingSupport.cpp |   2 +-
 llvm/lib/Transforms/Utils/SimplifyCFG.cpp          |  20 +--
 llvm/lib/Transforms/Utils/ValueMapper.cpp          |   2 +-
 llvm/tools/llvm-reduce/deltas/ReduceDbgRecords.cpp |   2 +-
 llvm/unittests/IR/BasicBlockDbgInfoTest.cpp        | 136 ++++++++++-----------
 llvm/unittests/IR/DebugInfoTest.cpp                |  26 ++--
 llvm/unittests/IR/IRBuilderTest.cpp                |  12 +-
 llvm/unittests/IR/ValueTest.cpp                    |   8 +-
 llvm/unittests/Transforms/Utils/DebugifyTest.cpp   |   2 +-
 llvm/unittests/Transforms/Utils/LocalTest.cpp      |   2 +-
 50 files changed, 298 insertions(+), 301 deletions(-)

diff --git a/llvm/docs/RemoveDIsDebugInfo.md b/llvm/docs/RemoveDIsDebugInfo.md
index a0577678e20f..a9857733c70d 100644
--- a/llvm/docs/RemoveDIsDebugInfo.md
+++ b/llvm/docs/RemoveDIsDebugInfo.md
@@ -82,11 +82,11 @@ Utilities such as `findDbgUsers` and the like now have an optional argument that
 
 ## Examining debug info records at positions
 
-Call `Instruction::getDbgValueRange()` to get the range of `DPValue` objects that are attached to an instruction.
+Call `Instruction::getDbgRecordRange()` to get the range of `DPValue` objects that are attached to an instruction.
 
 ## Moving around, deleting
 
-You can use `DPValue::removeFromParent` to unlink a `DPValue` from it's marker, and then `BasicBlock::insertDPValueBefore` or `BasicBlock::insertDPValueAfter` to re-insert the `DPValue` somewhere else. You cannot insert a `DPValue` at an arbitary point in a list of `DPValue`s (if you're doing this with `dbg.value`s then it's unlikely to be correct).
+You can use `DPValue::removeFromParent` to unlink a `DPValue` from it's marker, and then `BasicBlock::insertDbgRecordBefore` or `BasicBlock::insertDbgRecordAfter` to re-insert the `DPValue` somewhere else. You cannot insert a `DPValue` at an arbitary point in a list of `DPValue`s (if you're doing this with `dbg.value`s then it's unlikely to be correct).
 
 Erase `DPValue`s by calling `eraseFromParent` or `deleteInstr` if it's already been removed.
 
diff --git a/llvm/include/llvm/IR/BasicBlock.h b/llvm/include/llvm/IR/BasicBlock.h
index 179305e9260f..5bac113c9b7b 100644
--- a/llvm/include/llvm/IR/BasicBlock.h
+++ b/llvm/include/llvm/IR/BasicBlock.h
@@ -97,16 +97,16 @@ public:
   /// instruction of this block. These are equivalent to dbg.value intrinsics
   /// that exist at the end of a basic block with no terminator (a transient
   /// state that occurs regularly).
-  void setTrailingDPValues(DPMarker *M);
+  void setTrailingDbgRecords(DPMarker *M);
 
   /// Fetch the collection of DPValues that "trail" after the last instruction
-  /// of this block, see \ref setTrailingDPValues. If there are none, returns
+  /// of this block, see \ref setTrailingDbgRecords. If there are none, returns
   /// nullptr.
-  DPMarker *getTrailingDPValues();
+  DPMarker *getTrailingDbgRecords();
 
   /// Delete any trailing DPValues at the end of this block, see
-  /// \ref setTrailingDPValues.
-  void deleteTrailingDPValues();
+  /// \ref setTrailingDbgRecords.
+  void deleteTrailingDbgRecords();
 
   void dumpDbgValues() const;
 
@@ -121,10 +121,10 @@ public:
   DPMarker *getNextMarker(Instruction *I);
 
   /// Insert a DPValue into a block at the position given by \p I.
-  void insertDPValueAfter(DbgRecord *DPV, Instruction *I);
+  void insertDbgRecordAfter(DbgRecord *DPV, Instruction *I);
 
   /// Insert a DPValue into a block at the position given by \p Here.
-  void insertDPValueBefore(DbgRecord *DPV, InstListType::iterator Here);
+  void insertDbgRecordBefore(DbgRecord *DPV, InstListType::iterator Here);
 
   /// Eject any debug-info trailing at the end of a block. DPValues can
   /// transiently be located "off the end" of a block if the blocks terminator
@@ -137,8 +137,8 @@ public:
   /// happens in RemoveDIs debug-info mode, some special patching-up needs to
   /// occur: inserting into the middle of a sequence of dbg.value intrinsics
   /// does not have an equivalent with DPValues.
-  void reinsertInstInDPValues(Instruction *I,
-                              std::optional<DbgRecord::self_iterator> Pos);
+  void reinsertInstInDbgRecords(Instruction *I,
+                                std::optional<DbgRecord::self_iterator> Pos);
 
 private:
   void setParent(Function *parent);
diff --git a/llvm/include/llvm/IR/DebugProgramInstruction.h b/llvm/include/llvm/IR/DebugProgramInstruction.h
index a8faf415a3ea..507b652feeb0 100644
--- a/llvm/include/llvm/IR/DebugProgramInstruction.h
+++ b/llvm/include/llvm/IR/DebugProgramInstruction.h
@@ -577,9 +577,9 @@ public:
   void print(raw_ostream &ROS, ModuleSlotTracker &MST, bool IsForDebug) const;
 
   /// Produce a range over all the DPValues in this Marker.
-  iterator_range<simple_ilist<DbgRecord>::iterator> getDbgValueRange();
+  iterator_range<simple_ilist<DbgRecord>::iterator> getDbgRecordRange();
   iterator_range<simple_ilist<DbgRecord>::const_iterator>
-  getDbgValueRange() const;
+  getDbgRecordRange() const;
   /// Transfer any DPValues from \p Src into this DPMarker. If \p InsertAtHead
   /// is true, place them before existing DPValues, otherwise afterwards.
   void absorbDebugValues(DPMarker &Src, bool InsertAtHead);
@@ -590,11 +590,11 @@ public:
                          DPMarker &Src, bool InsertAtHead);
   /// Insert a DPValue into this DPMarker, at the end of the list. If
   /// \p InsertAtHead is true, at the start.
-  void insertDPValue(DbgRecord *New, bool InsertAtHead);
+  void insertDbgRecord(DbgRecord *New, bool InsertAtHead);
   /// Insert a DPValue prior to a DPValue contained within this marker.
-  void insertDPValue(DbgRecord *New, DbgRecord *InsertBefore);
+  void insertDbgRecord(DbgRecord *New, DbgRecord *InsertBefore);
   /// Insert a DPValue after a DPValue contained within this marker.
-  void insertDPValueAfter(DbgRecord *New, DbgRecord *InsertAfter);
+  void insertDbgRecordAfter(DbgRecord *New, DbgRecord *InsertAfter);
   /// Clone all DPMarkers from \p From into this marker. There are numerous
   /// options to customise the source/destination, due to gnarliness, see class
   /// comment.
@@ -606,11 +606,11 @@ public:
                      std::optional<simple_ilist<DbgRecord>::iterator> FromHere,
                      bool InsertAtHead = false);
   /// Erase all DPValues in this DPMarker.
-  void dropDbgValues();
+  void dropDbgRecords();
   /// Erase a single DbgRecord from this marker. In an ideal future, we would
   /// never erase an assignment in this way, but it's the equivalent to
   /// erasing a debug intrinsic from a block.
-  void dropOneDbgValue(DbgRecord *DR);
+  void dropOneDbgRecord(DbgRecord *DR);
 
   /// We generally act like all llvm Instructions have a range of DPValues
   /// attached to them, but in reality sometimes we don't allocate the DPMarker
@@ -621,7 +621,7 @@ public:
   /// that.
   static DPMarker EmptyDPMarker;
   static iterator_range<simple_ilist<DbgRecord>::iterator>
-  getEmptyDPValueRange() {
+  getEmptyDbgRecordRange() {
     return make_range(EmptyDPMarker.StoredDPValues.end(),
                       EmptyDPMarker.StoredDPValues.end());
   }
@@ -637,10 +637,10 @@ inline raw_ostream &operator<<(raw_ostream &OS, const DPMarker &Marker) {
 /// of DPMarker. Thus: it's pre-declared by users like Instruction, then an
 /// inlineable body defined here.
 inline iterator_range<simple_ilist<DbgRecord>::iterator>
-getDbgValueRange(DPMarker *DbgMarker) {
+getDbgRecordRange(DPMarker *DbgMarker) {
   if (!DbgMarker)
-    return DPMarker::getEmptyDPValueRange();
-  return DbgMarker->getDbgValueRange();
+    return DPMarker::getEmptyDbgRecordRange();
+  return DbgMarker->getDbgRecordRange();
 }
 
 } // namespace llvm
diff --git a/llvm/include/llvm/IR/Instruction.h b/llvm/include/llvm/IR/Instruction.h
index 75f399ec2fcd..817abd6afbca 100644
--- a/llvm/include/llvm/IR/Instruction.h
+++ b/llvm/include/llvm/IR/Instruction.h
@@ -41,7 +41,7 @@ template <> struct ilist_alloc_traits<Instruction> {
   static inline void deleteNode(Instruction *V);
 };
 
-iterator_range<simple_ilist<DbgRecord>::iterator> getDbgValueRange(DPMarker *);
+iterator_range<simple_ilist<DbgRecord>::iterator> getDbgRecordRange(DPMarker *);
 
 class Instruction : public User,
                     public ilist_node_with_parent<Instruction, BasicBlock,
@@ -79,29 +79,29 @@ public:
       bool InsertAtHead = false);
 
   /// Return a range over the DPValues attached to this instruction.
-  iterator_range<simple_ilist<DbgRecord>::iterator> getDbgValueRange() const {
-    return llvm::getDbgValueRange(DbgMarker);
+  iterator_range<simple_ilist<DbgRecord>::iterator> getDbgRecordRange() const {
+    return llvm::getDbgRecordRange(DbgMarker);
   }
 
   /// Return an iterator to the position of the "Next" DPValue after this
   /// instruction, or std::nullopt. This is the position to pass to
-  /// BasicBlock::reinsertInstInDPValues when re-inserting an instruction.
+  /// BasicBlock::reinsertInstInDbgRecords when re-inserting an instruction.
   std::optional<simple_ilist<DbgRecord>::iterator> getDbgReinsertionPosition();
 
   /// Returns true if any DPValues are attached to this instruction.
-  bool hasDbgValues() const;
+  bool hasDbgRecords() const;
 
   /// Transfer any DPValues on the position \p It onto this instruction,
   /// by simply adopting the sequence of DPValues (which is efficient) if
   /// possible, by merging two sequences otherwise.
-  void adoptDbgValues(BasicBlock *BB, InstListType::iterator It,
-                      bool InsertAtHead);
+  void adoptDbgRecords(BasicBlock *BB, InstListType::iterator It,
+                       bool InsertAtHead);
 
   /// Erase any DPValues attached to this instruction.
-  void dropDbgValues();
+  void dropDbgRecords();
 
   /// Erase a single DPValue \p I that is attached to this instruction.
-  void dropOneDbgValue(DbgRecord *I);
+  void dropOneDbgRecord(DbgRecord *I);
 
   /// Handle the debug-info implications of this instruction being removed. Any
   /// attached DPValues need to "fall" down onto the next instruction.
diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp
index 78bcd94e23fa..2e0f5ba82220 100644
--- a/llvm/lib/AsmParser/LLParser.cpp
+++ b/llvm/lib/AsmParser/LLParser.cpp
@@ -6527,7 +6527,7 @@ bool LLParser::parseBasicBlock(PerFunctionState &PFS) {
 
     // Attach any preceding debug values to this instruction.
     for (DbgRecordPtr &DR : TrailingDbgRecord)
-      BB->insertDPValueBefore(DR.release(), Inst->getIterator());
+      BB->insertDbgRecordBefore(DR.release(), Inst->getIterator());
     TrailingDbgRecord.clear();
   } while (!Inst->isTerminator());
 
diff --git a/llvm/lib/CodeGen/AssignmentTrackingAnalysis.cpp b/llvm/lib/CodeGen/AssignmentTrackingAnalysis.cpp
index 3b84624c3d4d..a4b819a735c6 100644
--- a/llvm/lib/CodeGen/AssignmentTrackingAnalysis.cpp
+++ b/llvm/lib/CodeGen/AssignmentTrackingAnalysis.cpp
@@ -225,7 +225,7 @@ void FunctionVarLocs::init(FunctionVarLocsBuilder &Builder) {
     // Any VarLocInfos attached to a DPValue should now be remapped to their
     // marker Instruction, in order of DPValue appearance and prior to any
     // VarLocInfos attached directly to that instruction.
-    for (const DPValue &DPV : DPValue::filter(I->getDbgValueRange())) {
+    for (const DPValue &DPV : DPValue::filter(I->getDbgRecordRange())) {
       // Even though DPV defines a variable location, VarLocsBeforeInst can
       // still be empty if that VarLoc was redundant.
       if (!Builder.VarLocsBeforeInst.count(&DPV))
@@ -829,7 +829,7 @@ class MemLocFragmentFill {
   void process(BasicBlock &BB, VarFragMap &LiveSet) {
     BBInsertBeforeMap[&BB].clear();
     for (auto &I : BB) {
-      for (DPValue &DPV : DPValue::filter(I.getDbgValueRange())) {
+      for (DPValue &DPV : DPValue::filter(I.getDbgRecordRange())) {
         if (const auto *Locs = FnVarLocs->getWedge(&DPV)) {
           for (const VarLocInfo &Loc : *Locs) {
             addDef(Loc, &DPV, *I.getParent(), LiveSet);
@@ -1494,15 +1494,15 @@ const char *locStr(AssignmentTrackingLowering::LocKind Loc) {
 
 VarLocInsertPt getNextNode(const DbgRecord *DPV) {
   auto NextIt = ++(DPV->getIterator());
-  if (NextIt == DPV->getMarker()->getDbgValueRange().end())
+  if (NextIt == DPV->getMarker()->getDbgRecordRange().end())
     return DPV->getMarker()->MarkedInstr;
   return &*NextIt;
 }
 VarLocInsertPt getNextNode(const Instruction *Inst) {
   const Instruction *Next = Inst->getNextNode();
-  if (!Next->hasDbgValues())
+  if (!Next->hasDbgRecords())
     return Next;
-  return &*Next->getDbgValueRange().begin();
+  return &*Next->getDbgRecordRange().begin();
 }
 VarLocInsertPt getNextNode(VarLocInsertPt InsertPt) {
   if (isa<const Instruction *>(InsertPt))
@@ -1888,7 +1888,7 @@ void AssignmentTrackingLowering::resetInsertionPoint(DPValue &After) {
 void AssignmentTrackingLowering::process(BasicBlock &BB, BlockInfo *LiveSet) {
   // If the block starts with DPValues, we need to process those DPValues as
   // their own frame without processing any instructions first.
-  bool ProcessedLeadingDPValues = !BB.begin()->hasDbgValues();
+  bool ProcessedLeadingDPValues = !BB.begin()->hasDbgRecords();
   for (auto II = BB.begin(), EI = BB.end(); II != EI;) {
     assert(VarsTouchedThisFrame.empty());
     // Process the instructions in "frames". A "frame" includes a single
@@ -1914,11 +1914,11 @@ void AssignmentTrackingLowering::process(BasicBlock &BB, BlockInfo *LiveSet) {
     // II is now either a debug intrinsic, a non-debug instruction with no
     // attached DPValues, or a non-debug instruction with attached unprocessed
     // DPValues.
-    if (II != EI && II->hasDbgValues()) {
+    if (II != EI && II->hasDbgRecords()) {
       // Skip over non-variable debug records (i.e., labels). They're going to
       // be read from IR (possibly re-ordering them within the debug record
       // range) rather than from the analysis results.
-      for (DPValue &DPV : DPValue::filter(II->getDbgValueRange())) {
+      for (DPValue &DPV : DPValue::filter(II->getDbgRecordRange())) {
         resetInsertionPoint(DPV);
         processDPValue(DPV, LiveSet);
         assert(LiveSet->isValid());
@@ -2175,7 +2175,7 @@ static AssignmentTrackingLowering::OverlapMap buildOverlapMapAndRecordDeclares(
   };
   for (auto &BB : Fn) {
     for (auto &I : BB) {
-      for (DPValue &DPV : DPValue::filter(I.getDbgValueRange()))
+      for (DPValue &DPV : DPValue::filter(I.getDbgRecordRange()))
         ProcessDbgRecord(&DPV, DPDeclares);
       if (auto *DII = dyn_cast<DbgVariableIntrinsic>(&I)) {
         ProcessDbgRecord(DII, InstDeclares);
@@ -2465,7 +2465,7 @@ bool AssignmentTrackingLowering::emitPromotedVarLocs(
   for (auto &BB : Fn) {
     for (auto &I : BB) {
       // Skip instructions other than dbg.values and dbg.assigns.
-      for (DPValue &DPV : DPValue::filter(I.getDbgValueRange()))
+      for (DPValue &DPV : DPValue::filter(I.getDbgRecordRange()))
         if (DPV.isDbgValue() || DPV.isDbgAssign())
           TranslateDbgRecord(&DPV);
       auto *DVI = dyn_cast<DbgValueInst>(&I);
@@ -2567,7 +2567,7 @@ removeRedundantDbgLocsUsingBackwardScan(const BasicBlock *BB,
       }
     };
     HandleLocsForWedge(&I);
-    for (DPValue &DPV : reverse(DPValue::filter(I.getDbgValueRange())))
+    for (DPValue &DPV : reverse(DPValue::filter(I.getDbgRecordRange())))
       HandleLocsForWedge(&DPV);
   }
 
@@ -2632,7 +2632,7 @@ removeRedundantDbgLocsUsingForwardScan(const BasicBlock *BB,
       }
     };
 
-    for (DPValue &DPV : DPValue::filter(I.getDbgValueRange()))
+    for (DPValue &DPV : DPValue::filter(I.getDbgRecordRange()))
       HandleLocsForWedge(&DPV);
     HandleLocsForWedge(&I);
   }
@@ -2718,7 +2718,7 @@ removeUndefDbgLocsFromEntryBlock(const BasicBlock *BB,
         Changed = true;
       }
     };
-    for (DPValue &DPV : DPValue::filter(I.getDbgValueRange()))
+    for (DPValue &DPV : DPValue::filter(I.getDbgRecordRange()))
       HandleLocsForWedge(&DPV);
     HandleLocsForWedge(&I);
   }
diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp
index 36f6cc83be2c..59a0c64d3c9f 100644
--- a/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -2983,7 +2983,7 @@ class TypePromotionTransaction {
           Inst->insertBefore(*Point.BB, Position);
       }
 
-      Inst->getParent()->reinsertInstInDPValues(Inst, BeforeDPValue);
+      Inst->getParent()->reinsertInstInDbgRecords(Inst, BeforeDPValue);
     }
   };
 
@@ -8506,7 +8506,7 @@ bool CodeGenPrepare::fixupDbgValue(Instruction *I) {
 
 bool CodeGenPrepare::fixupDPValuesOnInst(Instruction &I) {
   bool AnyChange = false;
-  for (DPValue &DPV : DPValue::filter(I.getDbgValueRange()))
+  for (DPValue &DPV : DPValue::filter(I.getDbgRecordRange()))
     AnyChange |= fixupDPValue(DPV);
   return AnyChange;
 }
@@ -8550,9 +8550,9 @@ static void DbgInserterHelper(DPValue *DPV, Instruction *VI) {
   DPV->removeFromParent();
   BasicBlock *VIBB = VI->getParent();
   if (isa<PHINode>(VI))
-    VIBB->insertDPValueBefore(DPV, VIBB->getFirstInsertionPt());
+    VIBB->insertDbgRecordBefore(DPV, VIBB->getFirstInsertionPt());
   else
-    VIBB->insertDPValueAfter(DPV, VI);
+    VIBB->insertDbgRecordAfter(DPV, VI);
 }
 
 // A llvm.dbg.value may be using a value before its definition, due to
@@ -8620,7 +8620,7 @@ bool CodeGenPrepare::placeDbgValues(Function &F) {
       // If this isn't a dbg.value, process any attached DPValue records
       // attached to this instruction.
       for (DPValue &DPV : llvm::make_early_inc_range(
-               DPValue::filter(Insn.getDbgValueRange()))) {
+               DPValue::filter(Insn.getDbgRecordRange()))) {
         if (DPV.Type != DPValue::LocationType::Value)
           continue;
         DbgProcessor(&DPV, &Insn);
diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
index 365870f540da..94fdb37e283b 100644
--- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
@@ -3277,7 +3277,7 @@ void IRTranslator::translateDbgDeclareRecord(Value *Address, bool HasArgList,
 
 void IRTranslator::translateDbgInfo(const Instruction &Inst,
                                       MachineIRBuilder &MIRBuilder) {
-  for (DbgRecord &DR : Inst.getDbgValueRange()) {
+  for (DbgRecord &DR : Inst.getDbgRecordRange()) {
     if (DPLabel *DPL = dyn_cast<DPLabel>(&DR)) {
       MIRBuilder.setDebugLoc(DPL->getDebugLoc());
       assert(DPL->getLabel() && "Missing label");
diff --git a/llvm/lib/CodeGen/SelectOptimize.cpp b/llvm/lib/CodeGen/SelectOptimize.cpp
index 5609f481b22a..40898d284a09 100644
--- a/llvm/lib/CodeGen/SelectOptimize.cpp
+++ b/llvm/lib/CodeGen/SelectOptimize.cpp
@@ -648,10 +648,10 @@ void SelectOptimizeImpl::convertProfitableSIGroups(SelectGroups &ProfSIGroups) {
     // Duplicate implementation for DPValues, the non-instruction debug-info
     // record. Helper lambda for moving DPValues to the end block.
     auto TransferDPValues = [&](Instruction &I) {
-      for (auto &DPValue : llvm::make_early_inc_range(I.getDbgValueRange())) {
+      for (auto &DPValue : llvm::make_early_inc_range(I.getDbgRecordRange())) {
         DPValue.removeFromParent();
-        EndBlock->insertDPValueBefore(&DPValue,
-                                      EndBlock->getFirstInsertionPt());
+        EndBlock->insertDbgRecordBefore(&DPValue,
+                                        EndBlock->getFirstInsertionPt());
       }
     };
 
diff --git a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
index 246762dd7ab6..cce91dbd9531 100644
--- a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -1181,14 +1181,14 @@ bool FastISel::selectCall(const User *I) {
 }
 
 void FastISel::handleDbgInfo(const Instruction *II) {
-  if (!II->hasDbgValues())
+  if (!II->hasDbgRecords())
     return;
 
   // Clear any metadata.
   MIMD = MIMetadata();
 
   // Reverse order of debug records, because fast-isel walks through backwards.
-  for (DbgRecord &DR : llvm::reverse(II->getDbgValueRange())) {
+  for (DbgRecord &DR : llvm::reverse(II->getDbgRecordRange())) {
     flushLocalValueMap();
     recomputeInsertPt();
 
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 22e57d0d99e9..b6a35f7ad4c4 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -1255,7 +1255,7 @@ void SelectionDAGBuilder::visitDbgInfo(const Instruction &I) {
   bool SkipDPValues = DAG.getFunctionVarLocs();
   // Is there is any debug-info attached to this instruction, in the form of
   // DbgRecord non-instruction debug-info records.
-  for (DbgRecord &DR : I.getDbgValueRange()) {
+  for (DbgRecord &DR : I.getDbgRecordRange()) {
     if (DPLabel *DPL = dyn_cast<DPLabel>(&DR)) {
       assert(DPL->getLabel() && "Missing label");
       SDDbgLabel *SDV =
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index 1c14e4da8e9d..c78c3ed294e4 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -1461,7 +1461,7 @@ static void processDbgDeclares(FunctionLoweringInfo &FuncInfo) {
     if (DI && processDbgDeclare(FuncInfo, DI->getAddress(), DI->getExpression(),
                                 DI->getVariable(), DI->getDebugLoc()))
       FuncInfo.PreprocessedDbgDeclares.insert(DI);
-    for (const DPValue &DPV : DPValue::filter(I.getDbgValueRange())) {
+    for (const DPValue &DPV : DPValue::filter(I.getDbgRecordRange())) {
       if (DPV.Type == DPValue::LocationType::Declare &&
           processDbgDeclare(FuncInfo, DPV.getVariableLocationOp(0),
                             DPV.getExpression(), DPV.getVariable(),
diff --git a/llvm/lib/IR/AsmWriter.cpp b/llvm/lib/IR/AsmWriter.cpp
index f2562c926e3b..1beb4c069a69 100644
--- a/llvm/lib/IR/AsmWriter.cpp
+++ b/llvm/lib/IR/AsmWriter.cpp
@@ -1131,7 +1131,7 @@ void SlotTracker::processFunctionMetadata(const Function &F) {
   processGlobalObjectMetadata(F);
   for (auto &BB : F) {
     for (auto &I : BB) {
-      for (const DbgRecord &DR : I.getDbgValueRange())
+      for (const DbgRecord &DR : I.getDbgRecordRange())
         processDbgRecordMetadata(DR);
       processInstructionMetadata(I);
     }
@@ -4097,7 +4097,7 @@ void AssemblyWriter::printBasicBlock(const BasicBlock *BB) {
 
   // Output all of the instructions in the basic block...
   for (const Instruction &I : *BB) {
-    for (const DbgRecord &DR : I.getDbgValueRange())
+    for (const DbgRecord &DR : I.getDbgRecordRange())
       printDbgRecordLine(DR);
     printInstructionLine(I);
   }
diff --git a/llvm/lib/IR/BasicBlock.cpp b/llvm/lib/IR/BasicBlock.cpp
index 673e2f68249c..7ead7ce3bf08 100644
--- a/llvm/lib/IR/BasicBlock.cpp
+++ b/llvm/lib/IR/BasicBlock.cpp
@@ -52,11 +52,11 @@ DPMarker *BasicBlock::createMarker(InstListType::iterator It) {
          "Tried to create a marker in a non new debug-info block!");
   if (It != end())
     return createMarker(&*It);
-  DPMarker *DPM = getTrailingDPValues();
+  DPMarker *DPM = getTrailingDbgRecords();
   if (DPM)
     return DPM;
   DPM = new DPMarker();
-  setTrailingDPValues(DPM);
+  setTrailingDbgRecords(DPM);
   return DPM;
 }
 
@@ -91,7 +91,7 @@ void BasicBlock::convertToNewDbgValues() {
     DPMarker *Marker = I.DbgMarker;
 
     for (DbgRecord *DPV : DPVals)
-      Marker->insertDPValue(DPV, false);
+      Marker->insertDbgRecord(DPV, false);
 
     DPVals.clear();
   }
@@ -109,7 +109,7 @@ void BasicBlock::convertFromNewDbgValues() {
       continue;
 
     DPMarker &Marker = *Inst.DbgMarker;
-    for (DbgRecord &DR : Marker.getDbgValueRange())
+    for (DbgRecord &DR : Marker.getDbgRecordRange())
       InstList.insert(Inst.getIterator(),
                       DR.createDebugIntrinsic(getModule(), nullptr));
 
@@ -119,7 +119,7 @@ void BasicBlock::convertFromNewDbgValues() {
   // Assume no trailing DPValues: we could technically create them at the end
   // of the block, after a terminator, but this would be non-cannonical and
   // indicates that something else is broken somewhere.
-  assert(!getTrailingDPValues());
+  assert(!getTrailingDbgRecords());
 }
 
 #ifndef NDEBUG
@@ -711,7 +711,7 @@ void BasicBlock::flushTerminatorDbgValues() {
     return;
 
   // Are there any dangling DPValues?
-  DPMarker *TrailingDPValues = getTrailingDPValues();
+  DPMarker *TrailingDPValues = getTrailingDbgRecords();
   if (!TrailingDPValues)
     return;
 
@@ -719,7 +719,7 @@ void BasicBlock::flushTerminatorDbgValues() {
   createMarker(Term);
   Term->DbgMarker->absorbDebugValues(*TrailingDPValues, false);
   TrailingDPValues->eraseFromParent();
-  deleteTrailingDPValues();
+  deleteTrailingDbgRecords();
 }
 
 void BasicBlock::spliceDebugInfoEmptyBlock(BasicBlock::iterator Dest,
@@ -754,13 +754,13 @@ void BasicBlock::spliceDebugInfoEmptyBlock(BasicBlock::iterator Dest,
   // occur when a block is optimised away and the terminator has been moved
   // somewhere else.
   if (Src->empty()) {
-    DPMarker *SrcTrailingDPValues = Src->getTrailingDPValues();
+    DPMarker *SrcTrailingDPValues = Src->getTrailingDbgRecords();
     if (!SrcTrailingDPValues)
       return;
 
-    Dest->adoptDbgValues(Src, Src->end(), InsertAtHead);
-    // adoptDbgValues should have released the trailing DPValues.
-    assert(!Src->getTrailingDPValues());
+    Dest->adoptDbgRecords(Src, Src->end(), InsertAtHead);
+    // adoptDbgRecords should have released the trailing DPValues.
+    assert(!Src->getTrailingDbgRecords());
     return;
   }
 
@@ -771,7 +771,7 @@ void BasicBlock::spliceDebugInfoEmptyBlock(BasicBlock::iterator Dest,
     return;
 
   // Is there actually anything to transfer?
-  if (!First->hasDbgValues())
+  if (!First->hasDbgRecords())
     return;
 
   createMarker(Dest)->absorbDebugValues(*First->DbgMarker, InsertAtHead);
@@ -817,16 +817,16 @@ void BasicBlock::spliceDebugInfo(BasicBlock::iterator Dest, BasicBlock *Src,
   // move the DPValues onto "First". They'll then be moved naturally in the
   // splice process.
   DPMarker *MoreDanglingDPValues = nullptr;
-  DPMarker *OurTrailingDPValues = getTrailingDPValues();
+  DPMarker *OurTrailingDPValues = getTrailingDbgRecords();
   if (Dest == end() && !Dest.getHeadBit() && OurTrailingDPValues) {
     // Are the "+" DPValues not supposed to move? If so, detach them
     // temporarily.
-    if (!First.getHeadBit() && First->hasDbgValues()) {
+    if (!First.getHeadBit() && First->hasDbgRecords()) {
       MoreDanglingDPValues = Src->getMarker(First);
       MoreDanglingDPValues->removeFromParent();
     }
 
-    if (First->hasDbgValues()) {
+    if (First->hasDbgRecords()) {
       // Place them at the front, it would look like this:
       //            Dest
       //              |
@@ -834,7 +834,7 @@ void BasicBlock::spliceDebugInfo(BasicBlock::iterator Dest, BasicBlock *Src,
       // Src-block: ~~~~~~~~++++B---B---B---B:::C
       //                        |               |
       //                       First           Last
-      First->adoptDbgValues(this, end(), true);
+      First->adoptDbgRecords(this, end(), true);
     } else {
       // No current marker, create one and absorb in. (FIXME: we can avoid an
       // allocation in the future).
@@ -842,7 +842,7 @@ void BasicBlock::spliceDebugInfo(BasicBlock::iterator Dest, BasicBlock *Src,
       CurMarker->absorbDebugValues(*OurTrailingDPValues, false);
       OurTrailingDPValues->eraseFromParent();
     }
-    deleteTrailingDPValues();
+    deleteTrailingDbgRecords();
     First.setHeadBit(true);
   }
 
@@ -854,7 +854,7 @@ void BasicBlock::spliceDebugInfo(BasicBlock::iterator Dest, BasicBlock *Src,
   if (!MoreDanglingDPValues)
     return;
 
-  // FIXME: we could avoid an allocation here sometimes. (adoptDbgValues
+  // FIXME: we could avoid an allocation here sometimes. (adoptDbgRecords
   // requires an iterator).
   DPMarker *LastMarker = Src->createMarker(Last);
   LastMarker->absorbDebugValues(*MoreDanglingDPValues, true);
@@ -946,11 +946,11 @@ void BasicBlock::spliceDebugInfoImpl(BasicBlock::iterator Dest, BasicBlock *Src,
   if (ReadFromTail && Src->getMarker(Last)) {
     DPMarker *FromLast = Src->getMarker(Last);
     if (LastIsEnd) {
-      Dest->adoptDbgValues(Src, Last, true);
-      // adoptDbgValues will release any trailers.
-      assert(!Src->getTrailingDPValues());
+      Dest->adoptDbgRecords(Src, Last, true);
+      // adoptDbgRecords will release any trailers.
+      assert(!Src->getTrailingDbgRecords());
     } else {
-      // FIXME: can we use adoptDbgValues here to reduce allocations?
+      // FIXME: can we use adoptDbgRecords here to reduce allocations?
       DPMarker *OntoDest = createMarker(Dest);
       OntoDest->absorbDebugValues(*FromLast, true);
     }
@@ -959,9 +959,9 @@ void BasicBlock::spliceDebugInfoImpl(BasicBlock::iterator Dest, BasicBlock *Src,
   // If we're _not_ reading from the head of First, i.e. the "++++" DPValues,
   // move their markers onto Last. They remain in the Src block. No action
   // needed.
-  if (!ReadFromHead && First->hasDbgValues()) {
+  if (!ReadFromHead && First->hasDbgRecords()) {
     if (Last != Src->end()) {
-      Last->adoptDbgValues(Src, First, true);
+      Last->adoptDbgRecords(Src, First, true);
     } else {
       DPMarker *OntoLast = Src->createMarker(Last);
       DPMarker *FromFirst = Src->createMarker(First);
@@ -990,11 +990,11 @@ void BasicBlock::spliceDebugInfoImpl(BasicBlock::iterator Dest, BasicBlock *Src,
     // any trailing debug-info at the end of the block would "normally" have
     // been pushed in front of "First". Move it there now.
     DPMarker *FirstMarker = getMarker(First);
-    DPMarker *TrailingDPValues = getTrailingDPValues();
+    DPMarker *TrailingDPValues = getTrailingDbgRecords();
     if (TrailingDPValues) {
       FirstMarker->absorbDebugValues(*TrailingDPValues, true);
       TrailingDPValues->eraseFromParent();
-      deleteTrailingDPValues();
+      deleteTrailingDbgRecords();
     }
   }
 }
@@ -1027,21 +1027,21 @@ void BasicBlock::splice(iterator Dest, BasicBlock *Src, iterator First,
   flushTerminatorDbgValues();
 }
 
-void BasicBlock::insertDPValueAfter(DbgRecord *DPV, Instruction *I) {
+void BasicBlock::insertDbgRecordAfter(DbgRecord *DPV, Instruction *I) {
   assert(IsNewDbgInfoFormat);
   assert(I->getParent() == this);
 
   iterator NextIt = std::next(I->getIterator());
   DPMarker *NextMarker = createMarker(NextIt);
-  NextMarker->insertDPValue(DPV, true);
+  NextMarker->insertDbgRecord(DPV, true);
 }
 
-void BasicBlock::insertDPValueBefore(DbgRecord *DPV,
-                                     InstListType::iterator Where) {
+void BasicBlock::insertDbgRecordBefore(DbgRecord *DPV,
+                                       InstListType::iterator Where) {
   assert(Where == end() || Where->getParent() == this);
   bool InsertAtHead = Where.getHeadBit();
   DPMarker *M = createMarker(Where);
-  M->insertDPValue(DPV, InsertAtHead);
+  M->insertDbgRecord(DPV, InsertAtHead);
 }
 
 DPMarker *BasicBlock::getNextMarker(Instruction *I) {
@@ -1050,13 +1050,13 @@ DPMarker *BasicBlock::getNextMarker(Instruction *I) {
 
 DPMarker *BasicBlock::getMarker(InstListType::iterator It) {
   if (It == end()) {
-    DPMarker *DPM = getTrailingDPValues();
+    DPMarker *DPM = getTrailingDbgRecords();
     return DPM;
   }
   return It->DbgMarker;
 }
 
-void BasicBlock::reinsertInstInDPValues(
+void BasicBlock::reinsertInstInDbgRecords(
     Instruction *I, std::optional<DPValue::self_iterator> Pos) {
   // "I" was originally removed from a position where it was
   // immediately in front of Pos. Any DPValues on that position then "fell down"
@@ -1123,15 +1123,14 @@ void BasicBlock::validateInstrOrdering() const {
 }
 #endif
 
-void BasicBlock::setTrailingDPValues(DPMarker *foo) {
-  getContext().pImpl->setTrailingDPValues(this, foo);
+void BasicBlock::setTrailingDbgRecords(DPMarker *foo) {
+  getContext().pImpl->setTrailingDbgRecords(this, foo);
 }
 
-DPMarker *BasicBlock::getTrailingDPValues() {
-  return getContext().pImpl->getTrailingDPValues(this);
+DPMarker *BasicBlock::getTrailingDbgRecords() {
+  return getContext().pImpl->getTrailingDbgRecords(this);
 }
 
-void BasicBlock::deleteTrailingDPValues() {
-  getContext().pImpl->deleteTrailingDPValues(this);
+void BasicBlock::deleteTrailingDbgRecords() {
+  getContext().pImpl->deleteTrailingDbgRecords(this);
 }
-
diff --git a/llvm/lib/IR/DIBuilder.cpp b/llvm/lib/IR/DIBuilder.cpp
index c0643f63c972..c673abd8bc30 100644
--- a/llvm/lib/IR/DIBuilder.cpp
+++ b/llvm/lib/IR/DIBuilder.cpp
@@ -1095,7 +1095,7 @@ void DIBuilder::insertDPValue(DPValue *DPV, BasicBlock *InsertBB,
   else if (InsertBB)
     InsertPt = InsertBB->end();
   InsertPt.setHeadBit(InsertAtHead);
-  InsertBB->insertDPValueBefore(DPV, InsertPt);
+  InsertBB->insertDbgRecordBefore(DPV, InsertPt);
 }
 
 Instruction *DIBuilder::insertDbgIntrinsic(llvm::Function *IntrinsicFn,
@@ -1137,9 +1137,9 @@ DbgInstPtr DIBuilder::insertLabel(DILabel *LabelInfo, const DILocation *DL,
   if (M.IsNewDbgInfoFormat) {
     DPLabel *DPL = new DPLabel(LabelInfo, DL);
     if (InsertBB && InsertBefore)
-      InsertBB->insertDPValueBefore(DPL, InsertBefore->getIterator());
+      InsertBB->insertDbgRecordBefore(DPL, InsertBefore->getIterator());
     else if (InsertBB)
-      InsertBB->insertDPValueBefore(DPL, InsertBB->end());
+      InsertBB->insertDbgRecordBefore(DPL, InsertBB->end());
     return DPL;
   }
 
diff --git a/llvm/lib/IR/DebugInfo.cpp b/llvm/lib/IR/DebugInfo.cpp
index 68fd244e2569..e63b1e67dad7 100644
--- a/llvm/lib/IR/DebugInfo.cpp
+++ b/llvm/lib/IR/DebugInfo.cpp
@@ -241,7 +241,7 @@ void DebugInfoFinder::processInstruction(const Module &M,
   if (auto DbgLoc = I.getDebugLoc())
     processLocation(M, DbgLoc.get());
 
-  for (const DbgRecord &DPR : I.getDbgValueRange())
+  for (const DbgRecord &DPR : I.getDbgRecordRange())
     processDbgRecord(M, DPR);
 }
 
@@ -579,7 +579,7 @@ bool llvm::stripDebugInfo(Function &F) {
         // DIAssignID are debug info metadata primitives.
         I.setMetadata(LLVMContext::MD_DIAssignID, nullptr);
       }
-      I.dropDbgValues();
+      I.dropDbgRecords();
     }
   }
   return Changed;
@@ -896,7 +896,7 @@ bool llvm::stripNonLineTableDebugInfo(Module &M) {
           I.setMetadata("heapallocsite", nullptr);
 
         // Strip any DPValues attached.
-        I.dropDbgValues();
+        I.dropDbgRecords();
       }
     }
   }
@@ -1828,7 +1828,7 @@ void at::deleteAll(Function *F) {
   SmallVector<DPValue *, 12> DPToDelete;
   for (BasicBlock &BB : *F) {
     for (Instruction &I : BB) {
-      for (DPValue &DPV : DPValue::filter(I.getDbgValueRange()))
+      for (DPValue &DPV : DPValue::filter(I.getDbgRecordRange()))
         if (DPV.isDbgAssign())
           DPToDelete.push_back(&DPV);
       if (auto *DAI = dyn_cast<DbgAssignIntrinsic>(&I))
@@ -2257,7 +2257,7 @@ bool AssignmentTrackingPass::runOnFunction(Function &F) {
   };
   for (auto &BB : F) {
     for (auto &I : BB) {
-      for (DPValue &DPV : DPValue::filter(I.getDbgValueRange())) {
+      for (DPValue &DPV : DPValue::filter(I.getDbgRecordRange())) {
         if (DPV.isDbgDeclare())
           ProcessDeclare(&DPV, DPVDeclares);
       }
diff --git a/llvm/lib/IR/DebugProgramInstruction.cpp b/llvm/lib/IR/DebugProgramInstruction.cpp
index 5ff1e8c19db6..019b00c2e208 100644
--- a/llvm/lib/IR/DebugProgramInstruction.cpp
+++ b/llvm/lib/IR/DebugProgramInstruction.cpp
@@ -218,7 +218,7 @@ DPValue *DPValue::createLinkedDPVAssign(Instruction *LinkedInstr, Value *Val,
   auto *NewDPVAssign = DPValue::createDPVAssign(Val, Variable, Expression,
                                                 cast<DIAssignID>(Link), Address,
                                                 AddressExpression, DI);
-  LinkedInstr->getParent()->insertDPValueAfter(NewDPVAssign, LinkedInstr);
+  LinkedInstr->getParent()->insertDbgRecordAfter(NewDPVAssign, LinkedInstr);
   return NewDPVAssign;
 }
 
@@ -515,7 +515,7 @@ void DbgRecord::insertBefore(DbgRecord *InsertBefore) {
   assert(InsertBefore->getMarker() &&
          "Cannot insert a DbgRecord before a DbgRecord that does not have a "
          "DPMarker!");
-  InsertBefore->getMarker()->insertDPValue(this, InsertBefore);
+  InsertBefore->getMarker()->insertDbgRecord(this, InsertBefore);
 }
 void DbgRecord::insertAfter(DbgRecord *InsertAfter) {
   assert(!getMarker() &&
@@ -523,7 +523,7 @@ void DbgRecord::insertAfter(DbgRecord *InsertAfter) {
   assert(InsertAfter->getMarker() &&
          "Cannot insert a DbgRecord after a DbgRecord that does not have a "
          "DPMarker!");
-  InsertAfter->getMarker()->insertDPValueAfter(this, InsertAfter);
+  InsertAfter->getMarker()->insertDbgRecordAfter(this, InsertAfter);
 }
 void DbgRecord::moveBefore(DbgRecord *MoveBefore) {
   assert(getMarker() &&
@@ -544,7 +544,7 @@ void DbgRecord::moveAfter(DbgRecord *MoveAfter) {
 // DPValues.
 DPMarker DPMarker::EmptyDPMarker;
 
-void DPMarker::dropDbgValues() {
+void DPMarker::dropDbgRecords() {
   while (!StoredDPValues.empty()) {
     auto It = StoredDPValues.begin();
     DbgRecord *DR = &*It;
@@ -553,7 +553,7 @@ void DPMarker::dropDbgValues() {
   }
 }
 
-void DPMarker::dropOneDbgValue(DbgRecord *DR) {
+void DPMarker::dropOneDbgRecord(DbgRecord *DR) {
   assert(DR->getMarker() == this);
   StoredDPValues.erase(DR->getIterator());
   DR->deleteRecord();
@@ -587,7 +587,7 @@ void DPMarker::removeMarker() {
     // marker becomes the trailing marker of a degenerate block.
     BasicBlock::iterator NextIt = std::next(Owner->getIterator());
     if (NextIt == getParent()->end()) {
-      getParent()->setTrailingDPValues(this);
+      getParent()->setTrailingDbgRecords(this);
       MarkedInstr = nullptr;
     } else {
       NextIt->DbgMarker = this;
@@ -605,15 +605,15 @@ void DPMarker::removeFromParent() {
 void DPMarker::eraseFromParent() {
   if (MarkedInstr)
     removeFromParent();
-  dropDbgValues();
+  dropDbgRecords();
   delete this;
 }
 
-iterator_range<DbgRecord::self_iterator> DPMarker::getDbgValueRange() {
+iterator_range<DbgRecord::self_iterator> DPMarker::getDbgRecordRange() {
   return make_range(StoredDPValues.begin(), StoredDPValues.end());
 }
 iterator_range<DbgRecord::const_self_iterator>
-DPMarker::getDbgValueRange() const {
+DPMarker::getDbgRecordRange() const {
   return make_range(StoredDPValues.begin(), StoredDPValues.end());
 }
 
@@ -627,18 +627,18 @@ void DbgRecord::eraseFromParent() {
   deleteRecord();
 }
 
-void DPMarker::insertDPValue(DbgRecord *New, bool InsertAtHead) {
+void DPMarker::insertDbgRecord(DbgRecord *New, bool InsertAtHead) {
   auto It = InsertAtHead ? StoredDPValues.begin() : StoredDPValues.end();
   StoredDPValues.insert(It, *New);
   New->setMarker(this);
 }
-void DPMarker::insertDPValue(DbgRecord *New, DbgRecord *InsertBefore) {
+void DPMarker::insertDbgRecord(DbgRecord *New, DbgRecord *InsertBefore) {
   assert(InsertBefore->getMarker() == this &&
          "DPValue 'InsertBefore' must be contained in this DPMarker!");
   StoredDPValues.insert(InsertBefore->getIterator(), *New);
   New->setMarker(this);
 }
-void DPMarker::insertDPValueAfter(DbgRecord *New, DbgRecord *InsertAfter) {
+void DPMarker::insertDbgRecordAfter(DbgRecord *New, DbgRecord *InsertAfter) {
   assert(InsertAfter->getMarker() == this &&
          "DPValue 'InsertAfter' must be contained in this DPMarker!");
   StoredDPValues.insert(++(InsertAfter->getIterator()), *New);
diff --git a/llvm/lib/IR/Instruction.cpp b/llvm/lib/IR/Instruction.cpp
index 6b8c6e0c85ed..e0892398f434 100644
--- a/llvm/lib/IR/Instruction.cpp
+++ b/llvm/lib/IR/Instruction.cpp
@@ -161,7 +161,7 @@ void Instruction::insertBefore(BasicBlock &BB,
       // maintenence code that you intend the PHI to be ahead of everything,
       // including any debug-info.
       assert(!isa<PHINode>(this) && "Inserting PHI after debug-records!");
-      adoptDbgValues(&BB, InsertPos, false);
+      adoptDbgRecords(&BB, InsertPos, false);
     }
   }
 
@@ -232,7 +232,7 @@ void Instruction::moveBeforeImpl(BasicBlock &BB, InstListType::iterator I,
     // If we're inserting at point I, and not in front of the DPValues attached
     // there, then we should absorb the DPValues attached to I.
     if (!InsertAtHead && NextMarker && !NextMarker->empty()) {
-      adoptDbgValues(&BB, I, false);
+      adoptDbgRecords(&BB, I, false);
     }
   }
 
@@ -244,7 +244,7 @@ iterator_range<DbgRecord::self_iterator> Instruction::cloneDebugInfoFrom(
     const Instruction *From, std::optional<DbgRecord::self_iterator> FromHere,
     bool InsertAtHead) {
   if (!From->DbgMarker)
-    return DPMarker::getEmptyDPValueRange();
+    return DPMarker::getEmptyDbgRecordRange();
 
   assert(getParent()->IsNewDbgInfoFormat);
   assert(getParent()->IsNewDbgInfoFormat ==
@@ -270,15 +270,15 @@ Instruction::getDbgReinsertionPosition() {
   return NextMarker->StoredDPValues.begin();
 }
 
-bool Instruction::hasDbgValues() const { return !getDbgValueRange().empty(); }
+bool Instruction::hasDbgRecords() const { return !getDbgRecordRange().empty(); }
 
-void Instruction::adoptDbgValues(BasicBlock *BB, BasicBlock::iterator It,
-                                 bool InsertAtHead) {
+void Instruction::adoptDbgRecords(BasicBlock *BB, BasicBlock::iterator It,
+                                  bool InsertAtHead) {
   DPMarker *SrcMarker = BB->getMarker(It);
   auto ReleaseTrailingDPValues = [BB, It, SrcMarker]() {
     if (BB->end() == It) {
       SrcMarker->eraseFromParent();
-      BB->deleteTrailingDPValues();
+      BB->deleteTrailingDbgRecords();
     }
   };
 
@@ -314,13 +314,13 @@ void Instruction::adoptDbgValues(BasicBlock *BB, BasicBlock::iterator It,
   }
 }
 
-void Instruction::dropDbgValues() {
+void Instruction::dropDbgRecords() {
   if (DbgMarker)
-    DbgMarker->dropDbgValues();
+    DbgMarker->dropDbgRecords();
 }
 
-void Instruction::dropOneDbgValue(DbgRecord *DPV) {
-  DbgMarker->dropOneDbgValue(DPV);
+void Instruction::dropOneDbgRecord(DbgRecord *DPV) {
+  DbgMarker->dropOneDbgRecord(DPV);
 }
 
 bool Instruction::comesBefore(const Instruction *Other) const {
diff --git a/llvm/lib/IR/LLVMContextImpl.h b/llvm/lib/IR/LLVMContextImpl.h
index 547a02a6490e..c841b28ca438 100644
--- a/llvm/lib/IR/LLVMContextImpl.h
+++ b/llvm/lib/IR/LLVMContextImpl.h
@@ -1687,18 +1687,16 @@ public:
   SmallDenseMap<BasicBlock *, DPMarker *> TrailingDPValues;
 
   // Set, get and delete operations for TrailingDPValues.
-  void setTrailingDPValues(BasicBlock *B, DPMarker *M) {
+  void setTrailingDbgRecords(BasicBlock *B, DPMarker *M) {
     assert(!TrailingDPValues.count(B));
     TrailingDPValues[B] = M;
   }
 
-  DPMarker *getTrailingDPValues(BasicBlock *B) {
+  DPMarker *getTrailingDbgRecords(BasicBlock *B) {
     return TrailingDPValues.lookup(B);
   }
 
-  void deleteTrailingDPValues(BasicBlock *B) {
-    TrailingDPValues.erase(B);
-  }
+  void deleteTrailingDbgRecords(BasicBlock *B) { TrailingDPValues.erase(B); }
 };
 
 } // end namespace llvm
diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp
index 0e6c01802cfb..2b9dc745d7bf 100644
--- a/llvm/lib/IR/Verifier.cpp
+++ b/llvm/lib/IR/Verifier.cpp
@@ -682,9 +682,9 @@ void Verifier::visitDbgRecords(Instruction &I) {
     return;
   CheckDI(I.DbgMarker->MarkedInstr == &I, "Instruction has invalid DbgMarker",
           &I);
-  CheckDI(!isa<PHINode>(&I) || !I.hasDbgValues(),
+  CheckDI(!isa<PHINode>(&I) || !I.hasDbgRecords(),
           "PHI Node must not have any attached DbgRecords", &I);
-  for (DbgRecord &DR : I.getDbgValueRange()) {
+  for (DbgRecord &DR : I.getDbgRecordRange()) {
     CheckDI(DR.getMarker() == I.DbgMarker, "DbgRecord had invalid DbgMarker",
             &I, &DR);
     if (auto *Loc =
@@ -3046,7 +3046,7 @@ void Verifier::visitBasicBlock(BasicBlock &BB) {
 
   // Confirm that no issues arise from the debug program.
   if (BB.IsNewDbgInfoFormat)
-    CheckDI(!BB.getTrailingDPValues(), "Basic Block has trailing DbgRecords!",
+    CheckDI(!BB.getTrailingDbgRecords(), "Basic Block has trailing DbgRecords!",
             &BB);
 }
 
diff --git a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp
index e091ecbf5400..7c29d443df51 100644
--- a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp
+++ b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp
@@ -1277,7 +1277,7 @@ static void buildFrameDebugInfo(Function &F, coro::Shape &Shape,
                                   FrameDIVar, DBuilder.createExpression(),
                                   DILoc, DPValue::LocationType::Declare);
     BasicBlock::iterator It = Shape.getInsertPtAfterFramePtr();
-    It->getParent()->insertDPValueBefore(NewDPV, It);
+    It->getParent()->insertDbgRecordBefore(NewDPV, It);
   } else {
     DBuilder.insertDeclare(Shape.FramePtr, FrameDIVar,
                            DBuilder.createExpression(), DILoc,
@@ -1891,7 +1891,7 @@ static void insertSpills(const FrameDataInfo &FrameData, coro::Shape &Shape) {
                 new DPValue(ValueAsMetadata::get(CurrentReload),
                             DDI->getVariable(), DDI->getExpression(),
                             DDI->getDebugLoc(), DPValue::LocationType::Declare);
-            Builder.GetInsertPoint()->getParent()->insertDPValueBefore(
+            Builder.GetInsertPoint()->getParent()->insertDbgRecordBefore(
                 NewDPV, Builder.GetInsertPoint());
           } else {
             DIBuilder(*CurrentBlock->getParent()->getParent(), AllowUnresolved)
@@ -1925,7 +1925,7 @@ static void insertSpills(const FrameDataInfo &FrameData, coro::Shape &Shape) {
       U->replaceUsesOfWith(Def, CurrentReload);
       // Instructions are added to Def's user list if the attached
       // debug records use Def. Update those now.
-      for (DPValue &DPV : DPValue::filter(U->getDbgValueRange()))
+      for (DPValue &DPV : DPValue::filter(U->getDbgRecordRange()))
         DPV.replaceVariableLocationOp(Def, CurrentReload, true);
     }
   }
@@ -2996,7 +2996,7 @@ void coro::salvageDebugInfo(
       InsertPt = F->getEntryBlock().begin();
     if (InsertPt) {
       DPV.removeFromParent();
-      (*InsertPt)->getParent()->insertDPValueBefore(&DPV, *InsertPt);
+      (*InsertPt)->getParent()->insertDbgRecordBefore(&DPV, *InsertPt);
     }
   }
 }
diff --git a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp
index 58b95e43b899..086971a1f213 100644
--- a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp
+++ b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp
@@ -684,7 +684,7 @@ collectDbgVariableIntrinsics(Function &F) {
   SmallVector<DbgVariableIntrinsic *, 8> Intrinsics;
   SmallVector<DPValue *> DPValues;
   for (auto &I : instructions(F)) {
-    for (DPValue &DPV : DPValue::filter(I.getDbgValueRange()))
+    for (DPValue &DPV : DPValue::filter(I.getDbgRecordRange()))
       DPValues.push_back(&DPV);
     if (auto *DVI = dyn_cast<DbgVariableIntrinsic>(&I))
       Intrinsics.push_back(DVI);
diff --git a/llvm/lib/Transforms/IPO/IROutliner.cpp b/llvm/lib/Transforms/IPO/IROutliner.cpp
index 03d4d503b80a..37f4a8749fd8 100644
--- a/llvm/lib/Transforms/IPO/IROutliner.cpp
+++ b/llvm/lib/Transforms/IPO/IROutliner.cpp
@@ -725,7 +725,7 @@ static void moveFunctionData(Function &Old, Function &New,
       // program, it will cause incorrect reporting from a debugger if we keep
       // the same debug instructions. Drop non-intrinsic DPValues here,
       // collect intrinsics for removal later.
-      Val.dropDbgValues();
+      Val.dropDbgRecords();
 
       // We must handle the scoping of called functions differently than
       // other outlined instructions.
diff --git a/llvm/lib/Transforms/IPO/MergeFunctions.cpp b/llvm/lib/Transforms/IPO/MergeFunctions.cpp
index 591be6be092c..ed5352e7d04a 100644
--- a/llvm/lib/Transforms/IPO/MergeFunctions.cpp
+++ b/llvm/lib/Transforms/IPO/MergeFunctions.cpp
@@ -643,7 +643,7 @@ void MergeFunctions::filterInstsUnrelatedToPDI(
        BI != BIE; ++BI) {
     // Examine DPValues as they happen "before" the instruction. Are they
     // connected to parameters?
-    for (DPValue &DPV : DPValue::filter(BI->getDbgValueRange())) {
+    for (DPValue &DPV : DPValue::filter(BI->getDbgRecordRange())) {
       if (DPV.isDbgValue() || DPV.isDbgAssign()) {
         ExamineDbgValue(&DPV, PDPVRelated);
       } else {
@@ -686,7 +686,7 @@ void MergeFunctions::filterInstsUnrelatedToPDI(
 
   // Collect the set of unrelated instructions and debug records.
   for (Instruction &I : *GEntryBlock) {
-    for (DPValue &DPV : DPValue::filter(I.getDbgValueRange()))
+    for (DPValue &DPV : DPValue::filter(I.getDbgRecordRange()))
       IsPDIRelated(&DPV, PDPVRelated, PDPVUnrelatedWL);
     IsPDIRelated(&I, PDIRelated, PDIUnrelatedWL);
   }
diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index 1a831805dc72..1688005de210 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -3426,7 +3426,7 @@ void InstCombinerImpl::handleUnreachableFrom(
     if (Inst.isEHPad() || Inst.getType()->isTokenTy())
       continue;
     // RemoveDIs: erase debug-info on this instruction manually.
-    Inst.dropDbgValues();
+    Inst.dropDbgRecords();
     eraseInstFromFunction(Inst);
     MadeIRChange = true;
   }
@@ -4697,7 +4697,7 @@ void InstCombinerImpl::tryToSinkInstructionDPValues(
     // latest assignment.
     for (const Instruction *Inst : DupSet) {
       for (DPValue &DPV :
-           llvm::reverse(DPValue::filter(Inst->getDbgValueRange()))) {
+           llvm::reverse(DPValue::filter(Inst->getDbgRecordRange()))) {
         DebugVariable DbgUserVariable =
             DebugVariable(DPV.getVariable(), DPV.getExpression(),
                           DPV.getDebugLoc()->getInlinedAt());
@@ -4762,7 +4762,7 @@ void InstCombinerImpl::tryToSinkInstructionDPValues(
   //   InsertPtInst
   assert(InsertPos.getHeadBit());
   for (DPValue *DPVClone : DPVClones) {
-    InsertPos->getParent()->insertDPValueBefore(DPVClone, InsertPos);
+    InsertPos->getParent()->insertDbgRecordBefore(DPVClone, InsertPos);
     LLVM_DEBUG(dbgs() << "SINK: " << *DPVClone << '\n');
   }
 }
diff --git a/llvm/lib/Transforms/Scalar/ADCE.cpp b/llvm/lib/Transforms/Scalar/ADCE.cpp
index 95a9527126c1..4d901310efe5 100644
--- a/llvm/lib/Transforms/Scalar/ADCE.cpp
+++ b/llvm/lib/Transforms/Scalar/ADCE.cpp
@@ -548,7 +548,7 @@ ADCEChanged AggressiveDeadCodeElimination::removeDeadInstructions() {
     // attached to this instruction, and drop any for scopes that aren't alive,
     // like the rest of this loop does. Extending support to assignment tracking
     // is future work.
-    for (DbgRecord &DR : make_early_inc_range(I.getDbgValueRange())) {
+    for (DbgRecord &DR : make_early_inc_range(I.getDbgRecordRange())) {
       // Avoid removing a DPV that is linked to instructions because it holds
       // information about an existing store.
       if (DPValue *DPV = dyn_cast<DPValue>(&DR); DPV && DPV->isDbgAssign())
@@ -556,7 +556,7 @@ ADCEChanged AggressiveDeadCodeElimination::removeDeadInstructions() {
           continue;
       if (AliveScopes.count(DR.getDebugLoc()->getScope()))
         continue;
-      I.dropOneDbgValue(&DR);
+      I.dropOneDbgRecord(&DR);
     }
 
     // Check if the instruction is alive.
diff --git a/llvm/lib/Transforms/Scalar/CallSiteSplitting.cpp b/llvm/lib/Transforms/Scalar/CallSiteSplitting.cpp
index 47f663fa0cf0..b8571ba07489 100644
--- a/llvm/lib/Transforms/Scalar/CallSiteSplitting.cpp
+++ b/llvm/lib/Transforms/Scalar/CallSiteSplitting.cpp
@@ -403,7 +403,7 @@ static void splitCallSite(CallBase &CB,
       NewPN->insertBefore(*TailBB, TailBB->begin());
       CurrentI->replaceAllUsesWith(NewPN);
     }
-    CurrentI->dropDbgValues();
+    CurrentI->dropDbgRecords();
     CurrentI->eraseFromParent();
     // We are done once we handled the first original instruction in TailBB.
     if (CurrentI == OriginalBeginInst)
diff --git a/llvm/lib/Transforms/Scalar/JumpThreading.cpp b/llvm/lib/Transforms/Scalar/JumpThreading.cpp
index 221b122caba2..1058a015017e 100644
--- a/llvm/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/llvm/lib/Transforms/Scalar/JumpThreading.cpp
@@ -401,7 +401,7 @@ static bool replaceFoldableUses(Instruction *Cond, Value *ToVal,
     Changed |= replaceNonLocalUsesWith(Cond, ToVal);
   for (Instruction &I : reverse(*KnownAtEndOfBB)) {
     // Replace any debug-info record users of Cond with ToVal.
-    for (DPValue &DPV : DPValue::filter(I.getDbgValueRange()))
+    for (DPValue &DPV : DPValue::filter(I.getDbgRecordRange()))
       DPV.replaceVariableLocationOp(Cond, ToVal, true);
 
     // Reached the Cond whose uses we are trying to replace, so there are no
@@ -2111,7 +2111,7 @@ JumpThreadingPass::cloneInstructions(BasicBlock::iterator BI,
 
   // There may be DPValues on the terminator, clone directly from marker
   // to marker as there isn't an instruction there.
-  if (BE != RangeBB->end() && BE->hasDbgValues()) {
+  if (BE != RangeBB->end() && BE->hasDbgRecords()) {
     // Dump them at the end.
     DPMarker *Marker = RangeBB->getMarker(BE);
     DPMarker *EndMarker = NewBB->createMarker(NewBB->end());
@@ -3118,7 +3118,7 @@ bool JumpThreadingPass::threadGuard(BasicBlock *BB, IntrinsicInst *Guard,
       NewPN->insertBefore(InsertionPoint);
       Inst->replaceAllUsesWith(NewPN);
     }
-    Inst->dropDbgValues();
+    Inst->dropDbgRecords();
     Inst->eraseFromParent();
   }
   return true;
diff --git a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index 4238098181af..8b078ddc4e74 100644
--- a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -6711,7 +6711,7 @@ static void DbgGatherSalvagableDVI(
         SalvageableDVISCEVs.push_back(std::move(NewRec));
         return true;
       };
-      for (DPValue &DPV : DPValue::filter(I.getDbgValueRange())) {
+      for (DPValue &DPV : DPValue::filter(I.getDbgRecordRange())) {
         if (DPV.isDbgValue() || DPV.isDbgAssign())
           ProcessDbgValue(&DPV);
       }
diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp
index 190fee11618b..e238f311a15b 100644
--- a/llvm/lib/Transforms/Scalar/SROA.cpp
+++ b/llvm/lib/Transforms/Scalar/SROA.cpp
@@ -5041,8 +5041,8 @@ static void insertNewDbgInst(DIBuilder &DIB, DPValue *Orig, AllocaInst *NewAddr,
   if (Orig->isDbgDeclare()) {
     DPValue *DPV = DPValue::createDPVDeclare(
         NewAddr, Orig->getVariable(), NewFragmentExpr, Orig->getDebugLoc());
-    BeforeInst->getParent()->insertDPValueBefore(DPV,
-                                                 BeforeInst->getIterator());
+    BeforeInst->getParent()->insertDbgRecordBefore(DPV,
+                                                   BeforeInst->getIterator());
     return;
   }
   if (!NewAddr->hasMetadata(LLVMContext::MD_DIAssignID)) {
diff --git a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
index 3d146086d31a..9fcaf2a89e56 100644
--- a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
+++ b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
@@ -1260,7 +1260,7 @@ static BasicBlock *buildClonedLoopBlocks(
   Module *M = ClonedPH->getParent()->getParent();
   for (auto *ClonedBB : NewBlocks)
     for (Instruction &I : *ClonedBB) {
-      RemapDPValueRange(M, I.getDbgValueRange(), VMap,
+      RemapDPValueRange(M, I.getDbgRecordRange(), VMap,
                         RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
       RemapInstruction(&I, VMap,
                        RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
diff --git a/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp b/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp
index 260f31b59ed2..8686570dfd2f 100644
--- a/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp
+++ b/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp
@@ -293,7 +293,7 @@ bool SpeculativeExecutionPass::considerHoistingFromTo(
   for (const auto &I : FromBlock) {
     // Make note of any DPValues that need hoisting. DPLabels
     // get left behind just like llvm.dbg.labels.
-    for (DPValue &DPV : DPValue::filter(I.getDbgValueRange())) {
+    for (DPValue &DPV : DPValue::filter(I.getDbgRecordRange())) {
       if (HasNoUnhoistedInstr(DPV.location_ops()))
         DPValuesToHoist[DPV.getInstruction()].push_back(&DPV);
     }
@@ -320,8 +320,8 @@ bool SpeculativeExecutionPass::considerHoistingFromTo(
     if (DPValuesToHoist.contains(&*I)) {
       for (auto *DPV : DPValuesToHoist[&*I]) {
         DPV->removeFromParent();
-        ToBlock.insertDPValueBefore(DPV,
-                                    ToBlock.getTerminator()->getIterator());
+        ToBlock.insertDbgRecordBefore(DPV,
+                                      ToBlock.getTerminator()->getIterator());
       }
     }
     // We have to increment I before moving Current as moving Current
diff --git a/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp b/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
index 5aa59acfa6df..2006b40e26d0 100644
--- a/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
+++ b/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
@@ -386,7 +386,7 @@ static bool DPValuesRemoveRedundantDbgInstrsUsingBackwardScan(BasicBlock *BB) {
   SmallVector<DPValue *, 8> ToBeRemoved;
   SmallDenseSet<DebugVariable> VariableSet;
   for (auto &I : reverse(*BB)) {
-    for (DbgRecord &DR : reverse(I.getDbgValueRange())) {
+    for (DbgRecord &DR : reverse(I.getDbgRecordRange())) {
       if (isa<DPLabel>(DR)) {
         // Emulate existing behaviour (see comment below for dbg.declares).
         // FIXME: Don't do this.
@@ -504,7 +504,7 @@ static bool DPValuesRemoveRedundantDbgInstrsUsingForwardScan(BasicBlock *BB) {
   DenseMap<DebugVariable, std::pair<SmallVector<Value *, 4>, DIExpression *>>
       VariableMap;
   for (auto &I : *BB) {
-    for (DPValue &DPV : DPValue::filter(I.getDbgValueRange())) {
+    for (DPValue &DPV : DPValue::filter(I.getDbgRecordRange())) {
       if (DPV.getType() == DPValue::LocationType::Declare)
         continue;
       DebugVariable Key(DPV.getVariable(), std::nullopt,
@@ -553,7 +553,7 @@ static bool DPValuesRemoveUndefDbgAssignsFromEntryBlock(BasicBlock *BB) {
   // Remove undef dbg.assign intrinsics that are encountered before
   // any non-undef intrinsics from the entry block.
   for (auto &I : *BB) {
-    for (DPValue &DPV : DPValue::filter(I.getDbgValueRange())) {
+    for (DPValue &DPV : DPValue::filter(I.getDbgRecordRange())) {
       if (!DPV.isDbgValue() && !DPV.isDbgAssign())
         continue;
       bool IsDbgValueKind =
diff --git a/llvm/lib/Transforms/Utils/CloneFunction.cpp b/llvm/lib/Transforms/Utils/CloneFunction.cpp
index c0f333364fa5..6931d1997aa6 100644
--- a/llvm/lib/Transforms/Utils/CloneFunction.cpp
+++ b/llvm/lib/Transforms/Utils/CloneFunction.cpp
@@ -276,7 +276,7 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
     // attached debug-info records.
     for (Instruction &II : *BB) {
       RemapInstruction(&II, VMap, RemapFlag, TypeMapper, Materializer);
-      RemapDPValueRange(II.getModule(), II.getDbgValueRange(), VMap, RemapFlag,
+      RemapDPValueRange(II.getModule(), II.getDbgRecordRange(), VMap, RemapFlag,
                         TypeMapper, Materializer);
     }
 
@@ -889,7 +889,7 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc,
   Function::iterator Begin = cast<BasicBlock>(VMap[StartingBB])->getIterator();
   for (BasicBlock &BB : make_range(Begin, NewFunc->end())) {
     for (Instruction &I : BB) {
-      RemapDPValueRange(I.getModule(), I.getDbgValueRange(), VMap,
+      RemapDPValueRange(I.getModule(), I.getDbgRecordRange(), VMap,
                         ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges,
                         TypeMapper, Materializer);
     }
@@ -990,7 +990,7 @@ void llvm::remapInstructionsInBlocks(ArrayRef<BasicBlock *> Blocks,
   // Rewrite the code to refer to itself.
   for (auto *BB : Blocks) {
     for (auto &Inst : *BB) {
-      RemapDPValueRange(Inst.getModule(), Inst.getDbgValueRange(), VMap,
+      RemapDPValueRange(Inst.getModule(), Inst.getDbgRecordRange(), VMap,
                         RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
       RemapInstruction(&Inst, VMap,
                        RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp
index ab2d25c3f17c..0fac5fc02e3f 100644
--- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp
+++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp
@@ -1602,7 +1602,7 @@ static void fixupDebugInfoPostExtraction(Function &OldFunc, Function &NewFunc,
   };
 
   auto UpdateDbgRecordsOnInst = [&](Instruction &I) -> void {
-    for (DbgRecord &DR : I.getDbgValueRange()) {
+    for (DbgRecord &DR : I.getDbgRecordRange()) {
       if (DPLabel *DPL = dyn_cast<DPLabel>(&DR)) {
         UpdateDbgLabel(DPL);
         continue;
@@ -1659,7 +1659,7 @@ static void fixupDebugInfoPostExtraction(Function &OldFunc, Function &NewFunc,
   for (auto *DII : DebugIntrinsicsToDelete)
     DII->eraseFromParent();
   for (auto *DPV : DPVsToDelete)
-    DPV->getMarker()->MarkedInstr->dropOneDbgValue(DPV);
+    DPV->getMarker()->MarkedInstr->dropOneDbgRecord(DPV);
   DIB.finalizeSubprogram(NewSP);
 
   // Fix up the scope information attached to the line locations in the new
@@ -1668,7 +1668,7 @@ static void fixupDebugInfoPostExtraction(Function &OldFunc, Function &NewFunc,
     if (const DebugLoc &DL = I.getDebugLoc())
       I.setDebugLoc(
           DebugLoc::replaceInlinedAtSubprogram(DL, *NewSP, Ctx, Cache));
-    for (DbgRecord &DR : I.getDbgValueRange())
+    for (DbgRecord &DR : I.getDbgRecordRange())
       DR.setDebugLoc(DebugLoc::replaceInlinedAtSubprogram(DR.getDebugLoc(),
                                                           *NewSP, Ctx, Cache));
 
diff --git a/llvm/lib/Transforms/Utils/InlineFunction.cpp b/llvm/lib/Transforms/Utils/InlineFunction.cpp
index 0e8e72678de6..1bbe76a92187 100644
--- a/llvm/lib/Transforms/Utils/InlineFunction.cpp
+++ b/llvm/lib/Transforms/Utils/InlineFunction.cpp
@@ -1728,7 +1728,7 @@ static void fixupLineNumbers(Function *Fn, Function::iterator FI,
     for (BasicBlock::iterator BI = FI->begin(), BE = FI->end(); BI != BE;
          ++BI) {
       UpdateInst(*BI);
-      for (DbgRecord &DPV : BI->getDbgValueRange()) {
+      for (DbgRecord &DPV : BI->getDbgRecordRange()) {
         UpdateDPV(&DPV);
       }
     }
@@ -1741,7 +1741,7 @@ static void fixupLineNumbers(Function *Fn, Function::iterator FI,
           BI = BI->eraseFromParent();
           continue;
         } else {
-          BI->dropDbgValues();
+          BI->dropDbgRecords();
         }
         ++BI;
       }
@@ -1829,7 +1829,7 @@ static void fixupAssignments(Function::iterator Start, Function::iterator End) {
   // attachment or use, replace it with a new version.
   for (auto BBI = Start; BBI != End; ++BBI) {
     for (Instruction &I : *BBI) {
-      for (DPValue &DPV : DPValue::filter(I.getDbgValueRange())) {
+      for (DPValue &DPV : DPValue::filter(I.getDbgRecordRange())) {
         if (DPV.isDbgAssign())
           DPV.setAssignId(GetNewID(DPV.getAssignID()));
       }
diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp
index a44536e34c92..7b74caac9e08 100644
--- a/llvm/lib/Transforms/Utils/Local.cpp
+++ b/llvm/lib/Transforms/Utils/Local.cpp
@@ -1657,7 +1657,7 @@ static void insertDbgValueOrDPValue(DIBuilder &Builder, Value *DV,
     // DPValue directly instead of a dbg.value intrinsic.
     ValueAsMetadata *DVAM = ValueAsMetadata::get(DV);
     DPValue *DV = new DPValue(DVAM, DIVar, DIExpr, NewLoc.get());
-    Instr->getParent()->insertDPValueBefore(DV, Instr);
+    Instr->getParent()->insertDbgRecordBefore(DV, Instr);
   }
 }
 
@@ -1675,7 +1675,7 @@ static void insertDbgValueOrDPValueAfter(DIBuilder &Builder, Value *DV,
     // DPValue directly instead of a dbg.value intrinsic.
     ValueAsMetadata *DVAM = ValueAsMetadata::get(DV);
     DPValue *DV = new DPValue(DVAM, DIVar, DIExpr, NewLoc.get());
-    Instr->getParent()->insertDPValueAfter(DV, &*Instr);
+    Instr->getParent()->insertDbgRecordAfter(DV, &*Instr);
   }
 }
 
@@ -1794,7 +1794,7 @@ void llvm::ConvertDebugDeclareToDebugValue(DPValue *DPV, StoreInst *SI,
   DV = UndefValue::get(DV->getType());
   ValueAsMetadata *DVAM = ValueAsMetadata::get(DV);
   DPValue *NewDPV = new DPValue(DVAM, DIVar, DIExpr, NewLoc.get());
-  SI->getParent()->insertDPValueBefore(NewDPV, SI->getIterator());
+  SI->getParent()->insertDbgRecordBefore(NewDPV, SI->getIterator());
 }
 
 /// Inserts a llvm.dbg.value intrinsic after a phi that has an associated
@@ -1856,7 +1856,7 @@ void llvm::ConvertDebugDeclareToDebugValue(DPValue *DPV, LoadInst *LI,
   // Create a DPValue directly and insert.
   ValueAsMetadata *LIVAM = ValueAsMetadata::get(LI);
   DPValue *DV = new DPValue(LIVAM, DIVar, DIExpr, NewLoc.get());
-  LI->getParent()->insertDPValueAfter(DV, LI);
+  LI->getParent()->insertDbgRecordAfter(DV, LI);
 }
 
 /// Determine whether this alloca is either a VLA or an array.
@@ -1911,7 +1911,7 @@ bool llvm::LowerDbgDeclare(Function &F) {
     for (Instruction &BI : FI) {
       if (auto *DDI = dyn_cast<DbgDeclareInst>(&BI))
         Dbgs.push_back(DDI);
-      for (DPValue &DPV : DPValue::filter(BI.getDbgValueRange())) {
+      for (DPValue &DPV : DPValue::filter(BI.getDbgRecordRange())) {
         if (DPV.getType() == DPValue::LocationType::Declare)
           DPVs.push_back(&DPV);
       }
@@ -1996,7 +1996,7 @@ static void insertDPValuesForPHIs(BasicBlock *BB,
   // Map existing PHI nodes to their DPValues.
   DenseMap<Value *, DPValue *> DbgValueMap;
   for (auto &I : *BB) {
-    for (DPValue &DPV : DPValue::filter(I.getDbgValueRange())) {
+    for (DPValue &DPV : DPValue::filter(I.getDbgRecordRange())) {
       for (Value *V : DPV.location_ops())
         if (auto *Loc = dyn_cast_or_null<PHINode>(V))
           DbgValueMap.insert({Loc, &DPV});
@@ -2044,7 +2044,7 @@ static void insertDPValuesForPHIs(BasicBlock *BB,
     auto InsertionPt = Parent->getFirstInsertionPt();
     assert(InsertionPt != Parent->end() && "Ill-formed basic block");
 
-    Parent->insertDPValueBefore(NewDbgII, InsertionPt);
+    Parent->insertDbgRecordBefore(NewDbgII, InsertionPt);
   }
 }
 
@@ -2620,7 +2620,7 @@ static bool rewriteDebugUsers(
         LLVM_DEBUG(dbgs() << "MOVE:  " << *DPV << '\n');
         DPV->removeFromParent();
         // Ensure there's a marker.
-        DomPoint.getParent()->insertDPValueAfter(DPV, &DomPoint);
+        DomPoint.getParent()->insertDbgRecordAfter(DPV, &DomPoint);
         Changed = true;
       } else if (!DT.dominates(&DomPoint, MarkedInstr)) {
         UndefOrSalvageDPV.insert(DPV);
@@ -2766,7 +2766,7 @@ bool llvm::handleUnreachableTerminator(
     Instruction *I, SmallVectorImpl<Value *> &PoisonedValues) {
   bool Changed = false;
   // RemoveDIs: erase debug-info on this instruction manually.
-  I->dropDbgValues();
+  I->dropDbgRecords();
   for (Use &U : I->operands()) {
     Value *Op = U.get();
     if (isa<Instruction>(Op) && !Op->getType()->isTokenTy()) {
@@ -2797,7 +2797,7 @@ llvm::removeAllNonTerminatorAndEHPadInstructions(BasicBlock *BB) {
     if (Inst->isEHPad() || Inst->getType()->isTokenTy()) {
       // EHPads can't have DPValues attached to them, but it might be possible
       // for things with token type.
-      Inst->dropDbgValues();
+      Inst->dropDbgRecords();
       EndInst = Inst;
       continue;
     }
@@ -2806,7 +2806,7 @@ llvm::removeAllNonTerminatorAndEHPadInstructions(BasicBlock *BB) {
     else
       ++NumDeadInst;
     // RemoveDIs: erasing debug-info must be done manually.
-    Inst->dropDbgValues();
+    Inst->dropDbgRecords();
     Inst->eraseFromParent();
   }
   return {NumDeadInst, NumDeadDbgInst};
@@ -3582,7 +3582,7 @@ void llvm::hoistAllInstructionsInto(BasicBlock *DomBlock, Instruction *InsertPt,
     if (I->isUsedByMetadata())
       dropDebugUsers(*I);
     // RemoveDIs: drop debug-info too as the following code does.
-    I->dropDbgValues();
+    I->dropDbgRecords();
     if (I->isDebugOrPseudoInst()) {
       // Remove DbgInfo and pseudo probe Intrinsics.
       II = I->eraseFromParent();
diff --git a/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp b/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp
index cec47810e044..8c6af7afa875 100644
--- a/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp
+++ b/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp
@@ -554,7 +554,7 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
         DbgIntrinsics.insert(makeHash(DII));
         // Until RemoveDIs supports dbg.declares in DPValue format, we'll need
         // to collect DPValues attached to any other debug intrinsics.
-        for (const DPValue &DPV : DPValue::filter(DII->getDbgValueRange()))
+        for (const DPValue &DPV : DPValue::filter(DII->getDbgRecordRange()))
           DbgIntrinsics.insert(makeHash(&DPV));
       } else {
         break;
@@ -564,7 +564,7 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
     // Build DPValue hashes for DPValues attached to the terminator, which isn't
     // considered in the loop above.
     for (const DPValue &DPV :
-         DPValue::filter(OrigPreheader->getTerminator()->getDbgValueRange()))
+         DPValue::filter(OrigPreheader->getTerminator()->getDbgRecordRange()))
       DbgIntrinsics.insert(makeHash(&DPV));
 
     // Remember the local noalias scope declarations in the header. After the
@@ -599,7 +599,7 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
     // (Stored as a range because it gives us a natural way of testing whether
     //  there were DPValues on the next instruction before we hoisted things).
     iterator_range<DPValue::self_iterator> NextDbgInsts =
-      (I != E) ? I->getDbgValueRange() : DPMarker::getEmptyDPValueRange();
+        (I != E) ? I->getDbgRecordRange() : DPMarker::getEmptyDbgRecordRange();
 
     while (I != E) {
       Instruction *Inst = &*I++;
@@ -636,7 +636,7 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
               DPV.eraseFromParent();
         }
 
-        NextDbgInsts = I->getDbgValueRange();
+        NextDbgInsts = I->getDbgRecordRange();
 
         Inst->moveBefore(LoopEntryBranch);
 
@@ -655,7 +655,7 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
         auto Range = C->cloneDebugInfoFrom(Inst, NextDbgInsts.begin());
         RemapDPValueRange(M, Range, ValueMap,
                           RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
-        NextDbgInsts = DPMarker::getEmptyDPValueRange();
+        NextDbgInsts = DPMarker::getEmptyDbgRecordRange();
         // Erase anything we've seen before.
         for (DPValue &DPV : make_early_inc_range(DPValue::filter(Range)))
           if (DbgIntrinsics.count(makeHash(&DPV)))
diff --git a/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
index 650f055356c0..ecd76b7c1fbf 100644
--- a/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
@@ -917,7 +917,7 @@ bool llvm::UnrollRuntimeLoopRemainder(
     for (Instruction &I : *BB) {
       RemapInstruction(&I, VMap,
                        RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
-      RemapDPValueRange(M, I.getDbgValueRange(), VMap,
+      RemapDPValueRange(M, I.getDbgRecordRange(), VMap,
                         RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
     }
   }
diff --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp b/llvm/lib/Transforms/Utils/LoopUtils.cpp
index 7491a99b03f6..05b02a42c58a 100644
--- a/llvm/lib/Transforms/Utils/LoopUtils.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp
@@ -634,7 +634,7 @@ void llvm::deleteDeadLoop(Loop *L, DominatorTree *DT, ScalarEvolution *SE,
         // RemoveDIs: do the same as below for DPValues.
         if (Block->IsNewDbgInfoFormat) {
           for (DPValue &DPV : llvm::make_early_inc_range(
-                   DPValue::filter(I.getDbgValueRange()))) {
+                   DPValue::filter(I.getDbgRecordRange()))) {
             DebugVariable Key(DPV.getVariable(), DPV.getExpression(),
                               DPV.getDebugLoc().get());
             if (!DeadDebugSet.insert(Key).second)
@@ -677,7 +677,7 @@ void llvm::deleteDeadLoop(Loop *L, DominatorTree *DT, ScalarEvolution *SE,
     // repeatedly inserted before the first instruction. To replicate this
     // behaviour, do it backwards.
     for (DPValue *DPV : llvm::reverse(DeadDPValues))
-      ExitBlock->insertDPValueBefore(DPV, InsertDbgValueBefore);
+      ExitBlock->insertDbgRecordBefore(DPV, InsertDbgValueBefore);
   }
 
   // Remove the block from the reference counting scheme, so that we can
diff --git a/llvm/lib/Transforms/Utils/MemoryTaggingSupport.cpp b/llvm/lib/Transforms/Utils/MemoryTaggingSupport.cpp
index bfe474d82045..ed06d3e7f452 100644
--- a/llvm/lib/Transforms/Utils/MemoryTaggingSupport.cpp
+++ b/llvm/lib/Transforms/Utils/MemoryTaggingSupport.cpp
@@ -110,7 +110,7 @@ Instruction *getUntagLocationIfFunctionExit(Instruction &Inst) {
 
 void StackInfoBuilder::visit(Instruction &Inst) {
   // Visit non-intrinsic debug-info records attached to Inst.
-  for (DPValue &DPV : DPValue::filter(Inst.getDbgValueRange())) {
+  for (DPValue &DPV : DPValue::filter(Inst.getDbgRecordRange())) {
     auto AddIfInteresting = [&](Value *V) {
       if (auto *AI = dyn_cast_or_null<AllocaInst>(V)) {
         if (!isInterestingAlloca(*AI))
diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 5b9a38c0b74e..0f3d1403481d 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -1535,7 +1535,7 @@ static bool shouldHoistCommonInstructions(Instruction *I1, Instruction *I2,
 static void
 hoistLockstepIdenticalDPValues(Instruction *TI, Instruction *I1,
                                SmallVectorImpl<Instruction *> &OtherInsts) {
-  if (!I1->hasDbgValues())
+  if (!I1->hasDbgRecords())
     return;
   using CurrentAndEndIt =
       std::pair<DbgRecord::self_iterator, DbgRecord::self_iterator>;
@@ -1557,12 +1557,12 @@ hoistLockstepIdenticalDPValues(Instruction *TI, Instruction *I1,
 
   // Collect the iterators.
   Itrs.push_back(
-      {I1->getDbgValueRange().begin(), I1->getDbgValueRange().end()});
+      {I1->getDbgRecordRange().begin(), I1->getDbgRecordRange().end()});
   for (Instruction *Other : OtherInsts) {
-    if (!Other->hasDbgValues())
+    if (!Other->hasDbgRecords())
       return;
     Itrs.push_back(
-        {Other->getDbgValueRange().begin(), Other->getDbgValueRange().end()});
+        {Other->getDbgRecordRange().begin(), Other->getDbgRecordRange().end()});
   }
 
   // Iterate in lock-step until any of the DbgRecord lists are exausted. If
@@ -1576,7 +1576,7 @@ hoistLockstepIdenticalDPValues(Instruction *TI, Instruction *I1,
       DbgRecord &DR = *Pair.first++;
       if (HoistDPVs) {
         DR.removeFromParent();
-        TI->getParent()->insertDPValueBefore(&DR, TI->getIterator());
+        TI->getParent()->insertDbgRecordBefore(&DR, TI->getIterator());
       }
     }
   }
@@ -3207,10 +3207,10 @@ bool SimplifyCFGOpt::SpeculativelyExecuteBB(BranchInst *BI,
   // instructions, in the same way that dbg.value intrinsics are dropped at the
   // end of this block.
   for (auto &It : make_range(ThenBB->begin(), ThenBB->end()))
-    for (DbgRecord &DR : make_early_inc_range(It.getDbgValueRange()))
+    for (DbgRecord &DR : make_early_inc_range(It.getDbgRecordRange()))
       // Drop all records except assign-kind DPValues (dbg.assign equivalent).
       if (DPValue *DPV = dyn_cast<DPValue>(&DR); !DPV || !DPV->isDbgAssign())
-        It.dropOneDbgValue(&DR);
+        It.dropOneDbgRecord(&DR);
   BB->splice(BI->getIterator(), ThenBB, ThenBB->begin(),
              std::prev(ThenBB->end()));
 
@@ -3849,7 +3849,7 @@ static bool performBranchToCommonDestFolding(BranchInst *BI, BranchInst *PBI,
   if (PredBlock->IsNewDbgInfoFormat) {
     PredBlock->getTerminator()->cloneDebugInfoFrom(BB->getTerminator());
     for (DPValue &DPV :
-         DPValue::filter(PredBlock->getTerminator()->getDbgValueRange())) {
+         DPValue::filter(PredBlock->getTerminator()->getDbgRecordRange())) {
       RemapDPValue(M, &DPV, VMap,
                    RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
     }
@@ -5308,7 +5308,7 @@ bool SimplifyCFGOpt::simplifyUnreachable(UnreachableInst *UI) {
 
   // Debug-info records on the unreachable inst itself should be deleted, as
   // below we delete everything past the final executable instruction.
-  UI->dropDbgValues();
+  UI->dropDbgRecords();
 
   // If there are any instructions immediately before the unreachable that can
   // be removed, do so.
@@ -5328,7 +5328,7 @@ bool SimplifyCFGOpt::simplifyUnreachable(UnreachableInst *UI) {
 
     // If we're deleting this, we're deleting any subsequent dbg.values, so
     // delete DPValue records of variable information.
-    BBI->dropDbgValues();
+    BBI->dropDbgRecords();
 
     // Delete this instruction (any uses are guaranteed to be dead)
     BBI->replaceAllUsesWith(PoisonValue::get(BBI->getType()));
diff --git a/llvm/lib/Transforms/Utils/ValueMapper.cpp b/llvm/lib/Transforms/Utils/ValueMapper.cpp
index 91ab2795a4b9..3da161043d6c 100644
--- a/llvm/lib/Transforms/Utils/ValueMapper.cpp
+++ b/llvm/lib/Transforms/Utils/ValueMapper.cpp
@@ -1066,7 +1066,7 @@ void Mapper::remapFunction(Function &F) {
   for (BasicBlock &BB : F) {
     for (Instruction &I : BB) {
       remapInstruction(&I);
-      for (DbgRecord &DR : I.getDbgValueRange())
+      for (DbgRecord &DR : I.getDbgRecordRange())
         remapDPValue(DR);
     }
   }
diff --git a/llvm/tools/llvm-reduce/deltas/ReduceDbgRecords.cpp b/llvm/tools/llvm-reduce/deltas/ReduceDbgRecords.cpp
index 94b12eb34cf6..2f3d4cac9fa0 100644
--- a/llvm/tools/llvm-reduce/deltas/ReduceDbgRecords.cpp
+++ b/llvm/tools/llvm-reduce/deltas/ReduceDbgRecords.cpp
@@ -29,7 +29,7 @@ static void extractDbgRecordsFromModule(Oracle &O, ReducerWorkItem &WorkItem) {
   for (auto &F : M)
     for (auto &BB : F)
       for (auto &I : BB)
-        for (DbgRecord &DR : llvm::make_early_inc_range(I.getDbgValueRange()))
+        for (DbgRecord &DR : llvm::make_early_inc_range(I.getDbgRecordRange()))
           if (!O.shouldKeep())
             DR.eraseFromParent();
 }
diff --git a/llvm/unittests/IR/BasicBlockDbgInfoTest.cpp b/llvm/unittests/IR/BasicBlockDbgInfoTest.cpp
index b773bffd7a03..e23c7eaa4930 100644
--- a/llvm/unittests/IR/BasicBlockDbgInfoTest.cpp
+++ b/llvm/unittests/IR/BasicBlockDbgInfoTest.cpp
@@ -80,9 +80,9 @@ TEST(BasicBlockDbgInfoTest, InsertAfterSelf) {
   Instruction *Inst1 = &*BB.begin();
   Instruction *Inst2 = &*std::next(BB.begin());
   Instruction *RetInst = &*std::next(Inst2->getIterator());
-  EXPECT_TRUE(Inst1->hasDbgValues());
-  EXPECT_TRUE(Inst2->hasDbgValues());
-  EXPECT_FALSE(RetInst->hasDbgValues());
+  EXPECT_TRUE(Inst1->hasDbgRecords());
+  EXPECT_TRUE(Inst2->hasDbgRecords());
+  EXPECT_FALSE(RetInst->hasDbgRecords());
 
   // If we move Inst2 to be after Inst1, then it comes _immediately_ after. Were
   // we in dbg.value form we would then have:
@@ -94,14 +94,14 @@ TEST(BasicBlockDbgInfoTest, InsertAfterSelf) {
   Inst2->moveAfter(Inst1);
 
   // Inst1 should only have one DPValue on it.
-  EXPECT_TRUE(Inst1->hasDbgValues());
-  auto Range1 = Inst1->getDbgValueRange();
+  EXPECT_TRUE(Inst1->hasDbgRecords());
+  auto Range1 = Inst1->getDbgRecordRange();
   EXPECT_EQ(std::distance(Range1.begin(), Range1.end()), 1u);
   // Inst2 should have none.
-  EXPECT_FALSE(Inst2->hasDbgValues());
+  EXPECT_FALSE(Inst2->hasDbgRecords());
   // While the return inst should now have one on it.
-  EXPECT_TRUE(RetInst->hasDbgValues());
-  auto Range2 = RetInst->getDbgValueRange();
+  EXPECT_TRUE(RetInst->hasDbgRecords());
+  auto Range2 = RetInst->getDbgRecordRange();
   EXPECT_EQ(std::distance(Range2.begin(), Range2.end()), 1u);
 
   M->convertFromNewDbgValues();
@@ -171,12 +171,12 @@ TEST(BasicBlockDbgInfoTest, MarkerOperations) {
   EXPECT_TRUE(Marker2->StoredDPValues.empty());
 
   // This should appear in Marker1.
-  BB.insertDPValueBefore(DPV1, BB.begin());
+  BB.insertDbgRecordBefore(DPV1, BB.begin());
   EXPECT_EQ(Marker1->StoredDPValues.size(), 1u);
   EXPECT_EQ(DPV1, &*Marker1->StoredDPValues.begin());
 
   // This should attach to Marker2.
-  BB.insertDPValueAfter(DPV2, &*BB.begin());
+  BB.insertDbgRecordAfter(DPV2, &*BB.begin());
   EXPECT_EQ(Marker2->StoredDPValues.size(), 1u);
   EXPECT_EQ(DPV2, &*Marker2->StoredDPValues.begin());
 
@@ -189,23 +189,23 @@ TEST(BasicBlockDbgInfoTest, MarkerOperations) {
   EXPECT_EQ(Marker2->StoredDPValues.size(), 2u);
   // They should also be in the correct order.
   SmallVector<DbgRecord *, 2> DPVs;
-  for (DbgRecord &DPV : Marker2->getDbgValueRange())
+  for (DbgRecord &DPV : Marker2->getDbgRecordRange())
     DPVs.push_back(&DPV);
   EXPECT_EQ(DPVs[0], DPV1);
   EXPECT_EQ(DPVs[1], DPV2);
 
   // If we remove the end instruction, the DPValues should fall down into
   // the trailing marker.
-  EXPECT_EQ(BB.getTrailingDPValues(), nullptr);
+  EXPECT_EQ(BB.getTrailingDbgRecords(), nullptr);
   Instr2->removeFromParent();
   EXPECT_TRUE(BB.empty());
-  EndMarker = BB.getTrailingDPValues();
+  EndMarker = BB.getTrailingDbgRecords();
   ASSERT_NE(EndMarker, nullptr);
   EXPECT_EQ(EndMarker->StoredDPValues.size(), 2u);
   // Again, these should arrive in the correct order.
 
   DPVs.clear();
-  for (DbgRecord &DPV : EndMarker->getDbgValueRange())
+  for (DbgRecord &DPV : EndMarker->getDbgRecordRange())
     DPVs.push_back(&DPV);
   EXPECT_EQ(DPVs[0], DPV1);
   EXPECT_EQ(DPVs[1], DPV2);
@@ -222,11 +222,11 @@ TEST(BasicBlockDbgInfoTest, MarkerOperations) {
   EXPECT_EQ(Instr1->DbgMarker->StoredDPValues.size(), 2u);
   // We should de-allocate the trailing marker when something is inserted
   // at end().
-  EXPECT_EQ(BB.getTrailingDPValues(), nullptr);
+  EXPECT_EQ(BB.getTrailingDbgRecords(), nullptr);
 
   // Remove Instr1: now the DPValues will fall down again,
   Instr1->removeFromParent();
-  EndMarker = BB.getTrailingDPValues();
+  EndMarker = BB.getTrailingDbgRecords();
   EXPECT_EQ(EndMarker->StoredDPValues.size(), 2u);
 
   // Inserting a terminator, however it's intended, should dislodge the
@@ -235,7 +235,7 @@ TEST(BasicBlockDbgInfoTest, MarkerOperations) {
   // end forever.
   Instr2->insertBefore(BB, BB.begin());
   EXPECT_EQ(Instr2->DbgMarker->StoredDPValues.size(), 2u);
-  EXPECT_EQ(BB.getTrailingDPValues(), nullptr);
+  EXPECT_EQ(BB.getTrailingDbgRecords(), nullptr);
 
   // Teardown,
   Instr1->insertBefore(BB, BB.begin());
@@ -393,7 +393,7 @@ TEST(BasicBlockDbgInfoTest, InstrDbgAccess) {
   ASSERT_EQ(CInst->DbgMarker->StoredDPValues.size(), 1u);
   DbgRecord *DPV1 = &*CInst->DbgMarker->StoredDPValues.begin();
   ASSERT_TRUE(DPV1);
-  EXPECT_FALSE(BInst->hasDbgValues());
+  EXPECT_FALSE(BInst->hasDbgRecords());
 
   // Clone DPValues from one inst to another. Other arguments to clone are
   // tested in DPMarker test.
@@ -405,23 +405,23 @@ TEST(BasicBlockDbgInfoTest, InstrDbgAccess) {
   EXPECT_NE(DPV1, DPV2);
 
   // We should be able to get a range over exactly the same information.
-  auto Range2 = BInst->getDbgValueRange();
+  auto Range2 = BInst->getDbgRecordRange();
   EXPECT_EQ(Range1.begin(), Range2.begin());
   EXPECT_EQ(Range1.end(), Range2.end());
 
   // We should be able to query if there are DPValues,
-  EXPECT_TRUE(BInst->hasDbgValues());
-  EXPECT_TRUE(CInst->hasDbgValues());
-  EXPECT_FALSE(DInst->hasDbgValues());
+  EXPECT_TRUE(BInst->hasDbgRecords());
+  EXPECT_TRUE(CInst->hasDbgRecords());
+  EXPECT_FALSE(DInst->hasDbgRecords());
 
   // Dropping should be easy,
-  BInst->dropDbgValues();
-  EXPECT_FALSE(BInst->hasDbgValues());
+  BInst->dropDbgRecords();
+  EXPECT_FALSE(BInst->hasDbgRecords());
   EXPECT_EQ(BInst->DbgMarker->StoredDPValues.size(), 0u);
 
   // And we should be able to drop individual DPValues.
-  CInst->dropOneDbgValue(DPV1);
-  EXPECT_FALSE(CInst->hasDbgValues());
+  CInst->dropOneDbgRecord(DPV1);
+  EXPECT_FALSE(CInst->hasDbgRecords());
   EXPECT_EQ(CInst->DbgMarker->StoredDPValues.size(), 0u);
 
   UseNewDbgInfoFormat = false;
@@ -539,7 +539,7 @@ protected:
   void TearDown() override { UseNewDbgInfoFormat = false; }
 
   bool InstContainsDPValue(Instruction *I, DPValue *DPV) {
-    for (DbgRecord &D : I->getDbgValueRange()) {
+    for (DbgRecord &D : I->getDbgRecordRange()) {
       if (&D == DPV) {
         // Confirm too that the links between the records are correct.
         EXPECT_EQ(DPV->Marker, I->DbgMarker);
@@ -552,7 +552,7 @@ protected:
 
   bool CheckDPVOrder(Instruction *I, SmallVector<DPValue *> CheckVals) {
     SmallVector<DbgRecord *> Vals;
-    for (DbgRecord &D : I->getDbgValueRange())
+    for (DbgRecord &D : I->getDbgRecordRange())
       Vals.push_back(&D);
 
     EXPECT_EQ(Vals.size(), CheckVals.size());
@@ -1161,7 +1161,7 @@ TEST(BasicBlockDbgInfoTest, DbgSpliceTrailing) {
 
   // Begin by forcing entry block to have dangling DPValue.
   Entry.getTerminator()->eraseFromParent();
-  ASSERT_NE(Entry.getTrailingDPValues(), nullptr);
+  ASSERT_NE(Entry.getTrailingDbgRecords(), nullptr);
   EXPECT_TRUE(Entry.empty());
 
   // Now transfer the entire contents of the exit block into the entry.
@@ -1222,12 +1222,12 @@ TEST(BasicBlockDbgInfoTest, RemoveInstAndReinsert) {
   ASSERT_TRUE(isa<ReturnInst>(RetInst));
 
   // add and sub should both have one DPValue on add and ret.
-  EXPECT_FALSE(SubInst->hasDbgValues());
-  EXPECT_TRUE(AddInst->hasDbgValues());
-  EXPECT_TRUE(RetInst->hasDbgValues());
-  auto R1 = AddInst->getDbgValueRange();
+  EXPECT_FALSE(SubInst->hasDbgRecords());
+  EXPECT_TRUE(AddInst->hasDbgRecords());
+  EXPECT_TRUE(RetInst->hasDbgRecords());
+  auto R1 = AddInst->getDbgRecordRange();
   EXPECT_EQ(std::distance(R1.begin(), R1.end()), 1u);
-  auto R2 = RetInst->getDbgValueRange();
+  auto R2 = RetInst->getDbgRecordRange();
   EXPECT_EQ(std::distance(R2.begin(), R2.end()), 1u);
 
   // The Supported (TM) code sequence for removing then reinserting insts
@@ -1239,19 +1239,19 @@ TEST(BasicBlockDbgInfoTest, RemoveInstAndReinsert) {
   // We should have a re-insertion position.
   ASSERT_TRUE(Pos);
   // Both DPValues should now be attached to the ret inst.
-  auto R3 = RetInst->getDbgValueRange();
+  auto R3 = RetInst->getDbgRecordRange();
   EXPECT_EQ(std::distance(R3.begin(), R3.end()), 2u);
 
   // Re-insert and re-insert.
   AddInst->insertAfter(SubInst);
-  Entry.reinsertInstInDPValues(AddInst, Pos);
+  Entry.reinsertInstInDbgRecords(AddInst, Pos);
   // We should be back into a position of having one DPValue on add and ret.
-  EXPECT_FALSE(SubInst->hasDbgValues());
-  EXPECT_TRUE(AddInst->hasDbgValues());
-  EXPECT_TRUE(RetInst->hasDbgValues());
-  auto R4 = AddInst->getDbgValueRange();
+  EXPECT_FALSE(SubInst->hasDbgRecords());
+  EXPECT_TRUE(AddInst->hasDbgRecords());
+  EXPECT_TRUE(RetInst->hasDbgRecords());
+  auto R4 = AddInst->getDbgRecordRange();
   EXPECT_EQ(std::distance(R4.begin(), R4.end()), 1u);
-  auto R5 = RetInst->getDbgValueRange();
+  auto R5 = RetInst->getDbgRecordRange();
   EXPECT_EQ(std::distance(R5.begin(), R5.end()), 1u);
 
   UseNewDbgInfoFormat = false;
@@ -1300,10 +1300,10 @@ TEST(BasicBlockDbgInfoTest, RemoveInstAndReinsertForOneDPValue) {
   ASSERT_TRUE(isa<ReturnInst>(RetInst));
 
   // There should be one DPValue.
-  EXPECT_FALSE(SubInst->hasDbgValues());
-  EXPECT_TRUE(AddInst->hasDbgValues());
-  EXPECT_FALSE(RetInst->hasDbgValues());
-  auto R1 = AddInst->getDbgValueRange();
+  EXPECT_FALSE(SubInst->hasDbgRecords());
+  EXPECT_TRUE(AddInst->hasDbgRecords());
+  EXPECT_FALSE(RetInst->hasDbgRecords());
+  auto R1 = AddInst->getDbgRecordRange();
   EXPECT_EQ(std::distance(R1.begin(), R1.end()), 1u);
 
   // The Supported (TM) code sequence for removing then reinserting insts:
@@ -1314,18 +1314,18 @@ TEST(BasicBlockDbgInfoTest, RemoveInstAndReinsertForOneDPValue) {
   // No re-insertion position as there were no DPValues on the ret.
   ASSERT_FALSE(Pos);
   // The single DPValue should now be attached to the ret inst.
-  EXPECT_TRUE(RetInst->hasDbgValues());
-  auto R2 = RetInst->getDbgValueRange();
+  EXPECT_TRUE(RetInst->hasDbgRecords());
+  auto R2 = RetInst->getDbgRecordRange();
   EXPECT_EQ(std::distance(R2.begin(), R2.end()), 1u);
 
   // Re-insert and re-insert.
   AddInst->insertAfter(SubInst);
-  Entry.reinsertInstInDPValues(AddInst, Pos);
+  Entry.reinsertInstInDbgRecords(AddInst, Pos);
   // We should be back into a position of having one DPValue on the AddInst.
-  EXPECT_FALSE(SubInst->hasDbgValues());
-  EXPECT_TRUE(AddInst->hasDbgValues());
-  EXPECT_FALSE(RetInst->hasDbgValues());
-  auto R3 = AddInst->getDbgValueRange();
+  EXPECT_FALSE(SubInst->hasDbgRecords());
+  EXPECT_TRUE(AddInst->hasDbgRecords());
+  EXPECT_FALSE(RetInst->hasDbgRecords());
+  auto R3 = AddInst->getDbgRecordRange();
   EXPECT_EQ(std::distance(R3.begin(), R3.end()), 1u);
 
   UseNewDbgInfoFormat = false;
@@ -1376,7 +1376,7 @@ TEST(BasicBlockDbgInfoTest, DbgSpliceToEmpty1) {
 
   // Begin by forcing entry block to have dangling DPValue.
   Entry.getTerminator()->eraseFromParent();
-  ASSERT_NE(Entry.getTrailingDPValues(), nullptr);
+  ASSERT_NE(Entry.getTrailingDbgRecords(), nullptr);
   EXPECT_TRUE(Entry.empty());
 
   // Now transfer the entire contents of the exit block into the entry. This
@@ -1386,10 +1386,10 @@ TEST(BasicBlockDbgInfoTest, DbgSpliceToEmpty1) {
   // We should now have two dbg.values on the first instruction, and they
   // should be in the correct order of %a, then 0.
   Instruction *BInst = &*Entry.begin();
-  ASSERT_TRUE(BInst->hasDbgValues());
+  ASSERT_TRUE(BInst->hasDbgRecords());
   EXPECT_EQ(BInst->DbgMarker->StoredDPValues.size(), 2u);
   SmallVector<DPValue *, 2> DPValues;
-  for (DbgRecord &DPV : BInst->getDbgValueRange())
+  for (DbgRecord &DPV : BInst->getDbgRecordRange())
     DPValues.push_back(cast<DPValue>(&DPV));
 
   EXPECT_EQ(DPValues[0]->getVariableLocationOp(0), F.getArg(0));
@@ -1398,7 +1398,7 @@ TEST(BasicBlockDbgInfoTest, DbgSpliceToEmpty1) {
   EXPECT_EQ(cast<ConstantInt>(SecondDPVValue)->getZExtValue(), 0ull);
 
   // No trailing DPValues in the entry block now.
-  EXPECT_EQ(Entry.getTrailingDPValues(), nullptr);
+  EXPECT_EQ(Entry.getTrailingDbgRecords(), nullptr);
 
   UseNewDbgInfoFormat = false;
 }
@@ -1446,7 +1446,7 @@ TEST(BasicBlockDbgInfoTest, DbgSpliceToEmpty2) {
 
   // Begin by forcing entry block to have dangling DPValue.
   Entry.getTerminator()->eraseFromParent();
-  ASSERT_NE(Entry.getTrailingDPValues(), nullptr);
+  ASSERT_NE(Entry.getTrailingDbgRecords(), nullptr);
   EXPECT_TRUE(Entry.empty());
 
   // Now transfer into the entry block -- fetching the first instruction with
@@ -1456,23 +1456,23 @@ TEST(BasicBlockDbgInfoTest, DbgSpliceToEmpty2) {
 
   // We should now have one dbg.values on the first instruction, %a.
   Instruction *BInst = &*Entry.begin();
-  ASSERT_TRUE(BInst->hasDbgValues());
+  ASSERT_TRUE(BInst->hasDbgRecords());
   EXPECT_EQ(BInst->DbgMarker->StoredDPValues.size(), 1u);
   SmallVector<DPValue *, 2> DPValues;
-  for (DbgRecord &DPV : BInst->getDbgValueRange())
+  for (DbgRecord &DPV : BInst->getDbgRecordRange())
     DPValues.push_back(cast<DPValue>(&DPV));
 
   EXPECT_EQ(DPValues[0]->getVariableLocationOp(0), F.getArg(0));
   // No trailing DPValues in the entry block now.
-  EXPECT_EQ(Entry.getTrailingDPValues(), nullptr);
+  EXPECT_EQ(Entry.getTrailingDbgRecords(), nullptr);
 
   // We should have nothing left in the exit block...
   EXPECT_TRUE(Exit.empty());
   // ... except for some dangling DPValues.
-  EXPECT_NE(Exit.getTrailingDPValues(), nullptr);
-  EXPECT_FALSE(Exit.getTrailingDPValues()->empty());
-  Exit.getTrailingDPValues()->eraseFromParent();
-  Exit.deleteTrailingDPValues();
+  EXPECT_NE(Exit.getTrailingDbgRecords(), nullptr);
+  EXPECT_FALSE(Exit.getTrailingDbgRecords()->empty());
+  Exit.getTrailingDbgRecords()->eraseFromParent();
+  Exit.deleteTrailingDbgRecords();
 
   UseNewDbgInfoFormat = false;
 }
@@ -1517,14 +1517,14 @@ TEST(BasicBlockDbgInfoTest, DbgMoveToEnd) {
   // Move the return to the end of the entry block.
   Instruction *Br = Entry.getTerminator();
   Instruction *Ret = Exit.getTerminator();
-  EXPECT_EQ(Entry.getTrailingDPValues(), nullptr);
+  EXPECT_EQ(Entry.getTrailingDbgRecords(), nullptr);
   Ret->moveBefore(Entry, Entry.end());
   Br->eraseFromParent();
 
   // There should continue to not be any debug-info anywhere.
-  EXPECT_EQ(Entry.getTrailingDPValues(), nullptr);
-  EXPECT_EQ(Exit.getTrailingDPValues(), nullptr);
-  EXPECT_FALSE(Ret->hasDbgValues());
+  EXPECT_EQ(Entry.getTrailingDbgRecords(), nullptr);
+  EXPECT_EQ(Exit.getTrailingDbgRecords(), nullptr);
+  EXPECT_FALSE(Ret->hasDbgRecords());
 
   UseNewDbgInfoFormat = false;
 }
diff --git a/llvm/unittests/IR/DebugInfoTest.cpp b/llvm/unittests/IR/DebugInfoTest.cpp
index c99f928de8a9..0b019c26148b 100644
--- a/llvm/unittests/IR/DebugInfoTest.cpp
+++ b/llvm/unittests/IR/DebugInfoTest.cpp
@@ -952,10 +952,10 @@ TEST(MetadataTest, ConvertDbgToDPValue) {
   ExitBlock->createMarker(RetInst);
 
   // Insert DPValues into markers, order should come out DPV2, DPV1.
-  FirstInst->DbgMarker->insertDPValue(DPV1, false);
-  FirstInst->DbgMarker->insertDPValue(DPV2, true);
+  FirstInst->DbgMarker->insertDbgRecord(DPV1, false);
+  FirstInst->DbgMarker->insertDbgRecord(DPV2, true);
   unsigned int ItCount = 0;
-  for (DbgRecord &Item : FirstInst->DbgMarker->getDbgValueRange()) {
+  for (DbgRecord &Item : FirstInst->DbgMarker->getDbgRecordRange()) {
     EXPECT_TRUE((&Item == DPV2 && ItCount == 0) ||
               (&Item == DPV1 && ItCount == 1));
     EXPECT_EQ(Item.getMarker(), FirstInst->DbgMarker);
@@ -969,7 +969,7 @@ TEST(MetadataTest, ConvertDbgToDPValue) {
   // Check these things store the same information; but that they're not the same
   // objects.
   for (DPValue &Item :
-       DPValue::filter(RetInst->DbgMarker->getDbgValueRange())) {
+       DPValue::filter(RetInst->DbgMarker->getDbgRecordRange())) {
     EXPECT_TRUE((Item.getRawLocation() == DPV2->getRawLocation() && ItCount == 0) ||
                 (Item.getRawLocation() == DPV1->getRawLocation() && ItCount == 1));
 
@@ -979,11 +979,11 @@ TEST(MetadataTest, ConvertDbgToDPValue) {
     ++ItCount;
   }
 
-  RetInst->DbgMarker->dropDbgValues();
+  RetInst->DbgMarker->dropDbgRecords();
   EXPECT_EQ(RetInst->DbgMarker->StoredDPValues.size(), 0u);
 
   // Try cloning one single DPValue.
-  auto DIIt = std::next(FirstInst->DbgMarker->getDbgValueRange().begin());
+  auto DIIt = std::next(FirstInst->DbgMarker->getDbgRecordRange().begin());
   RetInst->DbgMarker->cloneDebugInfoFrom(FirstInst->DbgMarker, DIIt, false);
   EXPECT_EQ(RetInst->DbgMarker->StoredDPValues.size(), 1u);
   // The second DPValue should have been cloned; it should have the same values
@@ -992,7 +992,7 @@ TEST(MetadataTest, ConvertDbgToDPValue) {
                 ->getRawLocation(),
             DPV1->getRawLocation());
   // We should be able to drop individual DPValues.
-  RetInst->DbgMarker->dropOneDbgValue(
+  RetInst->DbgMarker->dropOneDbgRecord(
       &*RetInst->DbgMarker->StoredDPValues.begin());
 
   // "Aborb" a DPMarker: this means pretend that the instruction it's attached
@@ -1001,7 +1001,7 @@ TEST(MetadataTest, ConvertDbgToDPValue) {
   EXPECT_EQ(RetInst->DbgMarker->StoredDPValues.size(), 2u);
   // Should be the DPV1 and DPV2 objects.
   ItCount = 0;
-  for (DbgRecord &Item : RetInst->DbgMarker->getDbgValueRange()) {
+  for (DbgRecord &Item : RetInst->DbgMarker->getDbgRecordRange()) {
     EXPECT_TRUE((&Item == DPV2 && ItCount == 0) ||
               (&Item == DPV1 && ItCount == 1));
     EXPECT_EQ(Item.getMarker(), RetInst->DbgMarker);
@@ -1017,12 +1017,12 @@ TEST(MetadataTest, ConvertDbgToDPValue) {
   RetInst->DbgMarker->removeMarker();
   RetInst->eraseFromParent();
 
-  DPMarker *EndMarker = ExitBlock->getTrailingDPValues();
+  DPMarker *EndMarker = ExitBlock->getTrailingDbgRecords();
   ASSERT_NE(EndMarker, nullptr);
   EXPECT_EQ(EndMarker->StoredDPValues.size(), 2u);
   // Test again that it's those two DPValues, DPV1 and DPV2.
   ItCount = 0;
-  for (DbgRecord &Item : EndMarker->getDbgValueRange()) {
+  for (DbgRecord &Item : EndMarker->getDbgRecordRange()) {
     EXPECT_TRUE((&Item == DPV2 && ItCount == 0) ||
               (&Item == DPV1 && ItCount == 1));
     EXPECT_EQ(Item.getMarker(), EndMarker);
@@ -1034,7 +1034,7 @@ TEST(MetadataTest, ConvertDbgToDPValue) {
 
   // The record of those trailing DPValues would dangle and cause an assertion
   // failure if it lived until the end of the LLVMContext.
-  ExitBlock->deleteTrailingDPValues();
+  ExitBlock->deleteTrailingDbgRecords();
 }
 
 TEST(MetadataTest, DPValueConversionRoutines) {
@@ -1117,14 +1117,14 @@ TEST(MetadataTest, DPValueConversionRoutines) {
 
   EXPECT_EQ(FirstInst->DbgMarker->StoredDPValues.size(), 1u);
   DPValue *DPV1 =
-      cast<DPValue>(&*FirstInst->DbgMarker->getDbgValueRange().begin());
+      cast<DPValue>(&*FirstInst->DbgMarker->getDbgRecordRange().begin());
   EXPECT_EQ(DPV1->getMarker(), FirstInst->DbgMarker);
   // Should point at %a, an argument.
   EXPECT_TRUE(isa<Argument>(DPV1->getVariableLocationOp(0)));
 
   EXPECT_EQ(SecondInst->DbgMarker->StoredDPValues.size(), 1u);
   DPValue *DPV2 =
-      cast<DPValue>(&*SecondInst->DbgMarker->getDbgValueRange().begin());
+      cast<DPValue>(&*SecondInst->DbgMarker->getDbgRecordRange().begin());
   EXPECT_EQ(DPV2->getMarker(), SecondInst->DbgMarker);
   // Should point at FirstInst.
   EXPECT_EQ(DPV2->getVariableLocationOp(0), FirstInst);
diff --git a/llvm/unittests/IR/IRBuilderTest.cpp b/llvm/unittests/IR/IRBuilderTest.cpp
index cece65974c01..139e8832c97b 100644
--- a/llvm/unittests/IR/IRBuilderTest.cpp
+++ b/llvm/unittests/IR/IRBuilderTest.cpp
@@ -872,15 +872,15 @@ TEST_F(IRBuilderTest, createFunction) {
 
 TEST_F(IRBuilderTest, DIBuilder) {
   auto GetLastDbgRecord = [](const Instruction *I) -> DbgRecord * {
-    if (I->getDbgValueRange().empty())
+    if (I->getDbgRecordRange().empty())
       return nullptr;
-    return &*std::prev(I->getDbgValueRange().end());
+    return &*std::prev(I->getDbgRecordRange().end());
   };
 
   auto ExpectOrder = [&](DbgInstPtr First, BasicBlock::iterator Second) {
     if (M->IsNewDbgInfoFormat) {
       EXPECT_TRUE(First.is<DbgRecord *>());
-      EXPECT_FALSE(Second->getDbgValueRange().empty());
+      EXPECT_FALSE(Second->getDbgRecordRange().empty());
       EXPECT_EQ(GetLastDbgRecord(&*Second), First.get<DbgRecord *>());
     } else {
       EXPECT_TRUE(First.is<Instruction *>());
@@ -951,7 +951,7 @@ TEST_F(IRBuilderTest, DIBuilder) {
           I, VarX, DIB.createExpression(), VarLoc, BB);
       I = Builder.CreateAlloca(Builder.getInt32Ty());
       ExpectOrder(VarXValue, I->getIterator());
-      EXPECT_EQ(BB->getTrailingDPValues(), nullptr);
+      EXPECT_EQ(BB->getTrailingDbgRecords(), nullptr);
     }
     { /* dbg.declare | DPValue::Declare */
       ExpectOrder(DIB.insertDeclare(I, VarY, DIB.createExpression(), VarLoc, I),
@@ -961,7 +961,7 @@ TEST_F(IRBuilderTest, DIBuilder) {
           DIB.insertDeclare(I, VarY, DIB.createExpression(), VarLoc, BB);
       I = Builder.CreateAlloca(Builder.getInt32Ty());
       ExpectOrder(VarYDeclare, I->getIterator());
-      EXPECT_EQ(BB->getTrailingDPValues(), nullptr);
+      EXPECT_EQ(BB->getTrailingDbgRecords(), nullptr);
     }
     { /* dbg.assign | DPValue::Assign */
       I = Builder.CreateAlloca(Builder.getInt32Ty());
@@ -974,7 +974,7 @@ TEST_F(IRBuilderTest, DIBuilder) {
                               DIB.createExpression(), VarLoc);
       I = Builder.CreateAlloca(Builder.getInt32Ty());
       ExpectOrder(VarXAssign, I->getIterator());
-      EXPECT_EQ(BB->getTrailingDPValues(), nullptr);
+      EXPECT_EQ(BB->getTrailingDbgRecords(), nullptr);
     }
 
     Builder.CreateRet(nullptr);
diff --git a/llvm/unittests/IR/ValueTest.cpp b/llvm/unittests/IR/ValueTest.cpp
index 6146719bb296..97c8fea3c6db 100644
--- a/llvm/unittests/IR/ValueTest.cpp
+++ b/llvm/unittests/IR/ValueTest.cpp
@@ -376,11 +376,11 @@ TEST(ValueTest, replaceUsesOutsideBlockDPValue) {
   BasicBlock *Exit = GetNext(Entry);
   Instruction *Ret = &Exit->front();
 
-  EXPECT_TRUE(Branch->hasDbgValues());
-  EXPECT_TRUE(Ret->hasDbgValues());
+  EXPECT_TRUE(Branch->hasDbgRecords());
+  EXPECT_TRUE(Ret->hasDbgRecords());
 
-  DPValue *DPV1 = cast<DPValue>(&*Branch->getDbgValueRange().begin());
-  DPValue *DPV2 = cast<DPValue>(&*Ret->getDbgValueRange().begin());
+  DPValue *DPV1 = cast<DPValue>(&*Branch->getDbgRecordRange().begin());
+  DPValue *DPV2 = cast<DPValue>(&*Ret->getDbgRecordRange().begin());
 
   A->replaceUsesOutsideBlock(B, Entry);
   // These users are in Entry so shouldn't be changed.
diff --git a/llvm/unittests/Transforms/Utils/DebugifyTest.cpp b/llvm/unittests/Transforms/Utils/DebugifyTest.cpp
index 1ec9402b8aa9..89fa1334b427 100644
--- a/llvm/unittests/Transforms/Utils/DebugifyTest.cpp
+++ b/llvm/unittests/Transforms/Utils/DebugifyTest.cpp
@@ -61,7 +61,7 @@ struct DebugValueDrop : public FunctionPass {
         if (auto *DVI = dyn_cast<DbgVariableIntrinsic>(&I))
           Dbgs.push_back(DVI);
         // If there are any non-intrinsic records (DPValues), drop those too.
-        I.dropDbgValues();
+        I.dropDbgRecords();
       }
     }
 
diff --git a/llvm/unittests/Transforms/Utils/LocalTest.cpp b/llvm/unittests/Transforms/Utils/LocalTest.cpp
index 822577410457..87a2a2ae4700 100644
--- a/llvm/unittests/Transforms/Utils/LocalTest.cpp
+++ b/llvm/unittests/Transforms/Utils/LocalTest.cpp
@@ -1327,7 +1327,7 @@ TEST(Local, ReplaceDPValue) {
   RetInst->DbgMarker = new DPMarker();
   RetInst->DbgMarker->MarkedInstr = RetInst;
   DPValue *DPV = new DPValue(DVI);
-  RetInst->DbgMarker->insertDPValue(DPV, false);
+  RetInst->DbgMarker->insertDbgRecord(DPV, false);
   // ... and erase the dbg.value.
   DVI->eraseFromParent();
 
-- 
cgit v1.2.3


From 0fe271c35368a9190661bcca87101fc0916d6a8b Mon Sep 17 00:00:00 2001
From: Fanbo Meng <fanbo.meng@ibm.com>
Date: Tue, 12 Mar 2024 10:56:51 -0400
Subject: [SystemZ][z/OS] Add missing include header to AutoConvert.cpp to fix
 build (#84909)

ba13fa2a5d57581bff1a7e9322234af30f4882f6
added usages of `errnoAsErrorCode()` to AutoConvert.cpp, need to include
Error.h header to fix build failure.
---
 llvm/lib/Support/AutoConvert.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/llvm/lib/Support/AutoConvert.cpp b/llvm/lib/Support/AutoConvert.cpp
index 74842e9167bd..c509284ee916 100644
--- a/llvm/lib/Support/AutoConvert.cpp
+++ b/llvm/lib/Support/AutoConvert.cpp
@@ -14,6 +14,7 @@
 #ifdef __MVS__
 
 #include "llvm/Support/AutoConvert.h"
+#include "llvm/Support/Error.h"
 #include <cassert>
 #include <fcntl.h>
 #include <sys/stat.h>
-- 
cgit v1.2.3


From f32b04d4ea91ad1018c25a1d4178cc4392d34968 Mon Sep 17 00:00:00 2001
From: NagyDonat <donat.nagy@ericsson.com>
Date: Tue, 12 Mar 2024 16:01:04 +0100
Subject: Revert "[analyzer] Accept C library functions from the `std`
 namespace" (#84926)

Reverts llvm/llvm-project#84469 because it causes buildbot failures.
I'll examine them and re-submit the change.
---
 .../Core/PathSensitive/CallDescription.h           |  8 +-
 clang/lib/StaticAnalyzer/Core/CheckerContext.cpp   |  8 +-
 clang/unittests/StaticAnalyzer/CMakeLists.txt      |  1 -
 .../StaticAnalyzer/IsCLibraryFunctionTest.cpp      | 89 ----------------------
 .../clang/unittests/StaticAnalyzer/BUILD.gn        |  1 -
 5 files changed, 9 insertions(+), 98 deletions(-)
 delete mode 100644 clang/unittests/StaticAnalyzer/IsCLibraryFunctionTest.cpp

diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/CallDescription.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/CallDescription.h
index b4e1636130ca..3432d2648633 100644
--- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/CallDescription.h
+++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/CallDescription.h
@@ -41,8 +41,12 @@ public:
     ///  - We also accept calls where the number of arguments or parameters is
     ///    greater than the specified value.
     /// For the exact heuristics, see CheckerContext::isCLibraryFunction().
-    /// (This mode only matches functions that are declared either directly
-    /// within a TU or in the namespace `std`.)
+    /// Note that functions whose declaration context is not a TU (e.g.
+    /// methods, functions in namespaces) are not accepted as C library
+    /// functions.
+    /// FIXME: If I understand it correctly, this discards calls where C++ code
+    /// refers a C library function through the namespace `std::` via headers
+    /// like <cstdlib>.
     CLibrary,
 
     /// Matches "simple" functions that are not methods. (Static methods are
diff --git a/clang/lib/StaticAnalyzer/Core/CheckerContext.cpp b/clang/lib/StaticAnalyzer/Core/CheckerContext.cpp
index 1a9bff529e9b..d6d4cec9dd3d 100644
--- a/clang/lib/StaticAnalyzer/Core/CheckerContext.cpp
+++ b/clang/lib/StaticAnalyzer/Core/CheckerContext.cpp
@@ -87,11 +87,9 @@ bool CheckerContext::isCLibraryFunction(const FunctionDecl *FD,
   if (!II)
     return false;
 
-  // C library functions are either declared directly within a TU (the common
-  // case) or they are accessed through the namespace `std` (when they are used
-  // in C++ via headers like <cstdlib>).
-  const DeclContext *DC = FD->getDeclContext()->getRedeclContext();
-  if (!(DC->isTranslationUnit() || DC->isStdNamespace()))
+  // Look through 'extern "C"' and anything similar invented in the future.
+  // If this function is not in TU directly, it is not a C library function.
+  if (!FD->getDeclContext()->getRedeclContext()->isTranslationUnit())
     return false;
 
   // If this function is not externally visible, it is not a C library function.
diff --git a/clang/unittests/StaticAnalyzer/CMakeLists.txt b/clang/unittests/StaticAnalyzer/CMakeLists.txt
index db56e77331b8..775f0f8486b8 100644
--- a/clang/unittests/StaticAnalyzer/CMakeLists.txt
+++ b/clang/unittests/StaticAnalyzer/CMakeLists.txt
@@ -11,7 +11,6 @@ add_clang_unittest(StaticAnalysisTests
   CallEventTest.cpp
   ConflictingEvalCallsTest.cpp
   FalsePositiveRefutationBRVisitorTest.cpp
-  IsCLibraryFunctionTest.cpp
   NoStateChangeFuncVisitorTest.cpp
   ParamRegionTest.cpp
   RangeSetTest.cpp
diff --git a/clang/unittests/StaticAnalyzer/IsCLibraryFunctionTest.cpp b/clang/unittests/StaticAnalyzer/IsCLibraryFunctionTest.cpp
deleted file mode 100644
index 19c66cc6bee1..000000000000
--- a/clang/unittests/StaticAnalyzer/IsCLibraryFunctionTest.cpp
+++ /dev/null
@@ -1,89 +0,0 @@
-#include "clang/ASTMatchers/ASTMatchFinder.h"
-#include "clang/ASTMatchers/ASTMatchers.h"
-#include "clang/Analysis/AnalysisDeclContext.h"
-#include "clang/Frontend/ASTUnit.h"
-#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
-#include "clang/Tooling/Tooling.h"
-#include "gtest/gtest.h"
-
-#include <memory>
-
-using namespace clang;
-using namespace ento;
-using namespace ast_matchers;
-
-testing::AssertionResult extractFunctionDecl(StringRef Code,
-                                             const FunctionDecl *&Result) {
-  auto ASTUnit = tooling::buildASTFromCode(Code);
-  if (!ASTUnit)
-    return testing::AssertionFailure() << "AST construction failed";
-
-  ASTContext &Context = ASTUnit->getASTContext();
-  if (Context.getDiagnostics().hasErrorOccurred())
-    return testing::AssertionFailure() << "Compilation error";
-
-  auto Matches = ast_matchers::match(functionDecl().bind("fn"), Context);
-  if (Matches.empty())
-    return testing::AssertionFailure() << "No function declaration found";
-
-  if (Matches.size() > 1)
-    return testing::AssertionFailure()
-           << "Multiple function declarations found";
-
-  Result = Matches[0].getNodeAs<FunctionDecl>("fn");
-  return testing::AssertionSuccess();
-}
-
-TEST(IsCLibraryFunctionTest, AcceptsGlobal) {
-  const FunctionDecl *Result;
-  ASSERT_TRUE(extractFunctionDecl(R"cpp(void fun();)cpp", Result));
-  EXPECT_TRUE(CheckerContext::isCLibraryFunction(Result));
-}
-
-TEST(IsCLibraryFunctionTest, AcceptsExternCGlobal) {
-  const FunctionDecl *Result;
-  ASSERT_TRUE(
-      extractFunctionDecl(R"cpp(extern "C" { void fun(); })cpp", Result));
-  EXPECT_TRUE(CheckerContext::isCLibraryFunction(Result));
-}
-
-TEST(IsCLibraryFunctionTest, RejectsNoInlineNoExternalLinkage) {
-  // Functions that are neither inlined nor externally visible cannot be C library functions.
-  const FunctionDecl *Result;
-  ASSERT_TRUE(extractFunctionDecl(R"cpp(static void fun();)cpp", Result));
-  EXPECT_FALSE(CheckerContext::isCLibraryFunction(Result));
-}
-
-TEST(IsCLibraryFunctionTest, RejectsAnonymousNamespace) {
-  const FunctionDecl *Result;
-  ASSERT_TRUE(
-      extractFunctionDecl(R"cpp(namespace { void fun(); })cpp", Result));
-  EXPECT_FALSE(CheckerContext::isCLibraryFunction(Result));
-}
-
-TEST(IsCLibraryFunctionTest, AcceptsStdNamespace) {
-  const FunctionDecl *Result;
-  ASSERT_TRUE(
-      extractFunctionDecl(R"cpp(namespace std { void fun(); })cpp", Result));
-  EXPECT_TRUE(CheckerContext::isCLibraryFunction(Result));
-}
-
-TEST(IsCLibraryFunctionTest, RejectsOtherNamespaces) {
-  const FunctionDecl *Result;
-  ASSERT_TRUE(
-      extractFunctionDecl(R"cpp(namespace stdx { void fun(); })cpp", Result));
-  EXPECT_FALSE(CheckerContext::isCLibraryFunction(Result));
-}
-
-TEST(IsCLibraryFunctionTest, RejectsClassStatic) {
-  const FunctionDecl *Result;
-  ASSERT_TRUE(
-      extractFunctionDecl(R"cpp(class A { static void fun(); };)cpp", Result));
-  EXPECT_FALSE(CheckerContext::isCLibraryFunction(Result));
-}
-
-TEST(IsCLibraryFunctionTest, RejectsClassMember) {
-  const FunctionDecl *Result;
-  ASSERT_TRUE(extractFunctionDecl(R"cpp(class A { void fun(); };)cpp", Result));
-  EXPECT_FALSE(CheckerContext::isCLibraryFunction(Result));
-}
diff --git a/llvm/utils/gn/secondary/clang/unittests/StaticAnalyzer/BUILD.gn b/llvm/utils/gn/secondary/clang/unittests/StaticAnalyzer/BUILD.gn
index 9c240cff1816..01c2b6ced336 100644
--- a/llvm/utils/gn/secondary/clang/unittests/StaticAnalyzer/BUILD.gn
+++ b/llvm/utils/gn/secondary/clang/unittests/StaticAnalyzer/BUILD.gn
@@ -19,7 +19,6 @@ unittest("StaticAnalysisTests") {
     "CallEventTest.cpp",
     "ConflictingEvalCallsTest.cpp",
     "FalsePositiveRefutationBRVisitorTest.cpp",
-    "IsCLibraryFunctionTest.cpp",
     "NoStateChangeFuncVisitorTest.cpp",
     "ParamRegionTest.cpp",
     "RangeSetTest.cpp",
-- 
cgit v1.2.3


From 083da46ff07170471f8bb9ed2947f6ebc725670b Mon Sep 17 00:00:00 2001
From: Ben Langmuir <blangmuir@apple.com>
Date: Tue, 12 Mar 2024 08:02:54 -0700
Subject: [clang][deps] Fix dependency scanning with -working-directory
 (#84525)

Stop overriding -working-directory to CWD during argument parsing, which
should no longer necessary after we set the VFS working directory, and
set FSOpts correctly after parsing arguments so that working-directory
behaves correctly.
---
 .../DependencyScanningWorker.cpp                   | 14 +++++-----
 .../test/ClangScanDeps/working-directory-option.c  | 30 ++++++++++++++++++++++
 2 files changed, 37 insertions(+), 7 deletions(-)
 create mode 100644 clang/test/ClangScanDeps/working-directory-option.c

diff --git a/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp b/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp
index 2b882f8a5e07..76f3d950a13b 100644
--- a/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp
+++ b/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp
@@ -296,7 +296,7 @@ public:
         DisableFree(DisableFree), ModuleName(ModuleName) {}
 
   bool runInvocation(std::shared_ptr<CompilerInvocation> Invocation,
-                     FileManager *FileMgr,
+                     FileManager *DriverFileMgr,
                      std::shared_ptr<PCHContainerOperations> PCHContainerOps,
                      DiagnosticConsumer *DiagConsumer) override {
     // Make a deep copy of the original Clang invocation.
@@ -342,12 +342,13 @@ public:
     ScanInstance.getHeaderSearchOpts().ModulesIncludeVFSUsage =
         any(OptimizeArgs & ScanningOptimizations::VFS);
 
-    ScanInstance.setFileManager(FileMgr);
     // Support for virtual file system overlays.
-    FileMgr->setVirtualFileSystem(createVFSFromCompilerInvocation(
+    auto FS = createVFSFromCompilerInvocation(
         ScanInstance.getInvocation(), ScanInstance.getDiagnostics(),
-        FileMgr->getVirtualFileSystemPtr()));
+        DriverFileMgr->getVirtualFileSystemPtr());
 
+    // Create a new FileManager to match the invocation's FileSystemOptions.
+    auto *FileMgr = ScanInstance.createFileManager(FS);
     ScanInstance.createSourceManager(*FileMgr);
 
     // Store the list of prebuilt module files into header search options. This
@@ -624,9 +625,8 @@ bool DependencyScanningWorker::computeDependencies(
       ModifiedCommandLine ? *ModifiedCommandLine : CommandLine;
   auto &FinalFS = ModifiedFS ? ModifiedFS : BaseFS;
 
-  FileSystemOptions FSOpts;
-  FSOpts.WorkingDir = WorkingDirectory.str();
-  auto FileMgr = llvm::makeIntrusiveRefCnt<FileManager>(FSOpts, FinalFS);
+  auto FileMgr =
+      llvm::makeIntrusiveRefCnt<FileManager>(FileSystemOptions{}, FinalFS);
 
   std::vector<const char *> FinalCCommandLine(FinalCommandLine.size(), nullptr);
   llvm::transform(FinalCommandLine, FinalCCommandLine.begin(),
diff --git a/clang/test/ClangScanDeps/working-directory-option.c b/clang/test/ClangScanDeps/working-directory-option.c
new file mode 100644
index 000000000000..d57497d405d3
--- /dev/null
+++ b/clang/test/ClangScanDeps/working-directory-option.c
@@ -0,0 +1,30 @@
+// Test that -working-directory works even when it differs from the working
+// directory of the filesystem.
+
+// RUN: rm -rf %t
+// RUN: mkdir -p %t/other
+// RUN: split-file %s %t
+// RUN: sed -e "s|DIR|%/t|g" %t/cdb.json.template > %t/cdb.json
+
+// RUN: clang-scan-deps -compilation-database %t/cdb.json -format experimental-full \
+// RUN:   > %t/deps.json
+
+// RUN: cat %t/deps.json | sed 's:\\\\\?:/:g' | FileCheck %s -DPREFIX=%/t
+
+// CHECK:      "file-deps": [
+// CHECK-NEXT:   "[[PREFIX]]/cwd/t.c"
+// CHECK-NEXT:   "[[PREFIX]]/cwd/relative/h1.h"
+// CHECK-NEXT: ]
+// CHECK-NEXT: "input-file": "[[PREFIX]]/cwd/t.c"
+
+//--- cdb.json.template
+[{
+  "directory": "DIR/other",
+  "command": "clang -c t.c -I relative -working-directory DIR/cwd",
+  "file": "DIR/cwd/t.c"
+}]
+
+//--- cwd/relative/h1.h
+
+//--- cwd/t.c
+#include "h1.h"
-- 
cgit v1.2.3


From 3238b92142c8fb70a1b2c72ee06bb47d57229dc9 Mon Sep 17 00:00:00 2001
From: Bjorn Pettersson <bjorn.a.pettersson@ericsson.com>
Date: Tue, 12 Mar 2024 15:19:30 +0100
Subject: [LoopSimplifyCFG] Drop no longer needed DependenceAnalysis.h include

---
 llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp b/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp
index 028a487ecdbc..ae9103d0608a 100644
--- a/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp
@@ -16,7 +16,6 @@
 #include "llvm/Transforms/Scalar/LoopSimplifyCFG.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/DependenceAnalysis.h"
 #include "llvm/Analysis/DomTreeUpdater.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/LoopIterator.h"
-- 
cgit v1.2.3


From 4d0f79e346ceb0ddb25a94053c612a5b34a72100 Mon Sep 17 00:00:00 2001
From: Bjorn Pettersson <bjorn.a.pettersson@ericsson.com>
Date: Tue, 12 Mar 2024 16:02:02 +0100
Subject: Pre commit test cases SRL/SRA support in canCreateUndefOrPoison. NFC

Add test cases to show that we can't push freeze through SRA/SRL with
'exact' flag when there are multiple uses.
---
 llvm/test/CodeGen/X86/freeze-binary.ll | 48 ++++++++++++++++++++++++++++++++++
 1 file changed, 48 insertions(+)

diff --git a/llvm/test/CodeGen/X86/freeze-binary.ll b/llvm/test/CodeGen/X86/freeze-binary.ll
index defd81e6ab77..b212e9438e1b 100644
--- a/llvm/test/CodeGen/X86/freeze-binary.ll
+++ b/llvm/test/CodeGen/X86/freeze-binary.ll
@@ -488,6 +488,30 @@ define i32 @freeze_ashr_exact(i32 %a0) nounwind {
   ret i32 %z
 }
 
+define i32 @freeze_ashr_exact_extra_use(i32 %a0, ptr %escape) nounwind {
+; X86-LABEL: freeze_ashr_exact_extra_use:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    sarl $3, %eax
+; X86-NEXT:    movl %eax, (%ecx)
+; X86-NEXT:    sarl $6, %eax
+; X86-NEXT:    retl
+;
+; X64-LABEL: freeze_ashr_exact_extra_use:
+; X64:       # %bb.0:
+; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    sarl $3, %eax
+; X64-NEXT:    movl %eax, (%rsi)
+; X64-NEXT:    sarl $6, %eax
+; X64-NEXT:    retq
+  %x = ashr exact i32 %a0, 3
+  %y = freeze i32 %x
+  %z = ashr i32 %y, 6
+  store i32 %x, ptr %escape
+  ret i32 %z
+}
+
 define i32 @freeze_ashr_outofrange(i32 %a0) nounwind {
 ; X86-LABEL: freeze_ashr_outofrange:
 ; X86:       # %bb.0:
@@ -597,6 +621,30 @@ define i32 @freeze_lshr_exact(i32 %a0) nounwind {
   ret i32 %z
 }
 
+define i32 @freeze_lshr_exact_extra_use(i32 %a0, ptr %escape) nounwind {
+; X86-LABEL: freeze_lshr_exact_extra_use:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    shrl $3, %eax
+; X86-NEXT:    movl %eax, (%ecx)
+; X86-NEXT:    shrl $5, %eax
+; X86-NEXT:    retl
+;
+; X64-LABEL: freeze_lshr_exact_extra_use:
+; X64:       # %bb.0:
+; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    shrl $3, %eax
+; X64-NEXT:    movl %eax, (%rsi)
+; X64-NEXT:    shrl $5, %eax
+; X64-NEXT:    retq
+  %x = lshr exact i32 %a0, 3
+  %y = freeze i32 %x
+  %z = lshr i32 %y, 5
+  store i32 %x, ptr %escape
+  ret i32 %z
+}
+
 define i32 @freeze_lshr_outofrange(i32 %a0) nounwind {
 ; X86-LABEL: freeze_lshr_outofrange:
 ; X86:       # %bb.0:
-- 
cgit v1.2.3


From beba307c5bc206168bdea3b893e02ea31579fe62 Mon Sep 17 00:00:00 2001
From: Nikita Popov <npopov@redhat.com>
Date: Tue, 12 Mar 2024 16:23:25 +0100
Subject: [LSR] Clear SCEVExpander before deleting phi nodes

Fixes https://github.com/llvm/llvm-project/issues/84709.
---
 llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp  |  2 ++
 llvm/test/Transforms/LoopStrengthReduce/pr84709.ll | 34 ++++++++++++++++++++++
 2 files changed, 36 insertions(+)
 create mode 100644 llvm/test/Transforms/LoopStrengthReduce/pr84709.ll

diff --git a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index 8b078ddc4e74..c4e1a0db8b32 100644
--- a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -6971,6 +6971,7 @@ static bool ReduceLoopStrength(Loop *L, IVUsers &IU, ScalarEvolution &SE,
     Rewriter.setDebugType(DEBUG_TYPE);
 #endif
     unsigned numFolded = Rewriter.replaceCongruentIVs(L, &DT, DeadInsts, &TTI);
+    Rewriter.clear();
     if (numFolded) {
       Changed = true;
       RecursivelyDeleteTriviallyDeadInstructionsPermissive(DeadInsts, &TLI,
@@ -6989,6 +6990,7 @@ static bool ReduceLoopStrength(Loop *L, IVUsers &IU, ScalarEvolution &SE,
     SCEVExpander Rewriter(SE, DL, "lsr", true);
     int Rewrites = rewriteLoopExitValues(L, &LI, &TLI, &SE, &TTI, Rewriter, &DT,
                                          UnusedIndVarInLoop, DeadInsts);
+    Rewriter.clear();
     if (Rewrites) {
       Changed = true;
       RecursivelyDeleteTriviallyDeadInstructionsPermissive(DeadInsts, &TLI,
diff --git a/llvm/test/Transforms/LoopStrengthReduce/pr84709.ll b/llvm/test/Transforms/LoopStrengthReduce/pr84709.ll
new file mode 100644
index 000000000000..99794d01242c
--- /dev/null
+++ b/llvm/test/Transforms/LoopStrengthReduce/pr84709.ll
@@ -0,0 +1,34 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt -S -passes=loop-reduce < %s | FileCheck %s
+
+; Make sure it does not assert.
+define i64 @test() {
+; CHECK-LABEL: define i64 @test() {
+; CHECK-NEXT:  bb:
+; CHECK-NEXT:    br label [[BB1:%.*]]
+; CHECK:       bb1:
+; CHECK-NEXT:    br label [[BB2:%.*]]
+; CHECK:       bb2:
+; CHECK-NEXT:    br i1 true, label [[BB5:%.*]], label [[BB2]]
+; CHECK:       bb5:
+; CHECK-NEXT:    br label [[BB1]]
+;
+bb:
+  br label %bb1
+
+bb1:
+  %phi = phi i8 [ %zext6, %bb5 ], [ 0, %bb ]
+  br label %bb2
+
+bb2:
+  %phi3 = phi i8 [ %add, %bb2 ], [ %phi, %bb1 ]
+  %phi4 = phi i32 [ 0, %bb2 ], [ 1, %bb1 ]
+  %add = add i8 %phi3, 1
+  br i1 true, label %bb5, label %bb2
+
+bb5:
+  %zext = zext i8 %add to i32
+  %icmp = icmp sge i32 %phi4, 0
+  %zext6 = zext i1 %icmp to i8
+  br label %bb1
+}
-- 
cgit v1.2.3


From 08dd645c15a091a53313e278d8f3c090e7c385d1 Mon Sep 17 00:00:00 2001
From: Nemanja Ivanovic <nemanja.i.ibm@gmail.com>
Date: Tue, 12 Mar 2024 16:26:49 +0100
Subject: [RISC-V] Bad immediate value for Zcmp instructions with E extension
 (#84925)

When we are using the Zcmp extension together with the E extension in
32-bit mode and we need to spill both callee-saved registers as well as
needing a couple of 32-bit stack slots, we emit a meaningless stack
adjustment with cm.push/cm.popret. Furthermore this leads to the stack
slot for the ra being clobbered so control returns to a random location.

This is just a pre-commit test so that the PR for the fix shows the
difference in code generation.
---
 llvm/test/CodeGen/RISCV/zcmp-additional-stack.ll | 49 ++++++++++++++++++++++++
 1 file changed, 49 insertions(+)
 create mode 100644 llvm/test/CodeGen/RISCV/zcmp-additional-stack.ll

diff --git a/llvm/test/CodeGen/RISCV/zcmp-additional-stack.ll b/llvm/test/CodeGen/RISCV/zcmp-additional-stack.ll
new file mode 100644
index 000000000000..e5c2e0180ee0
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/zcmp-additional-stack.ll
@@ -0,0 +1,49 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc -mtriple=riscv32 -mattr=+zcmp,+e -target-abi ilp32e -verify-machineinstrs < %s | FileCheck %s --check-prefix=RV32
+define ptr @func(ptr %s, i32 %_c, ptr %incdec.ptr, i1 %0, i8 %conv14) #0 {
+; RV32-LABEL: func:
+; RV32:       # %bb.0: # %entry
+; RV32-NEXT:    cm.push {ra, s0-s1}, -24
+; RV32-NEXT:    .cfi_def_cfa_offset 24
+; RV32-NEXT:    .cfi_offset ra, -12
+; RV32-NEXT:    .cfi_offset s0, -8
+; RV32-NEXT:    .cfi_offset s1, -4
+; RV32-NEXT:    sw a4, 4(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw a2, 0(sp) # 4-byte Folded Spill
+; RV32-NEXT:    mv a2, a1
+; RV32-NEXT:    mv s1, a0
+; RV32-NEXT:    li a0, 1
+; RV32-NEXT:    andi a3, a3, 1
+; RV32-NEXT:  .LBB0_1: # %while.body
+; RV32-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV32-NEXT:    mv s0, a0
+; RV32-NEXT:    li a0, 0
+; RV32-NEXT:    bnez a3, .LBB0_1
+; RV32-NEXT:  # %bb.2: # %while.end
+; RV32-NEXT:    lui a0, 4112
+; RV32-NEXT:    addi a1, a0, 257
+; RV32-NEXT:    mv a0, a2
+; RV32-NEXT:    call __mulsi3
+; RV32-NEXT:    sw a0, 0(zero)
+; RV32-NEXT:    andi s0, s0, 1
+; RV32-NEXT:    lw a0, 0(sp) # 4-byte Folded Reload
+; RV32-NEXT:    add s0, s0, a0
+; RV32-NEXT:    lw a0, 4(sp) # 4-byte Folded Reload
+; RV32-NEXT:    sb a0, 0(s0)
+; RV32-NEXT:    mv a0, s1
+; RV32-NEXT:    cm.popret {ra, s0-s1}, 24
+entry:
+  br label %while.body
+
+while.body:                                       ; preds = %while.body, %entry
+  %n.addr.042 = phi i32 [ 1, %entry ], [ 0, %while.body ]
+  br i1 %0, label %while.body, label %while.end
+
+while.end:                                        ; preds = %while.body
+  %or5 = mul i32 %_c, 16843009
+  store i32 %or5, ptr null, align 4
+  %1 = and i32 %n.addr.042, 1
+  %scevgep = getelementptr i8, ptr %incdec.ptr, i32 %1
+  store i8 %conv14, ptr %scevgep, align 1
+  ret ptr %s
+}
-- 
cgit v1.2.3


From bae47d48b632a4fa1dce5591bc0783360cf69e28 Mon Sep 17 00:00:00 2001
From: Nick Desaulniers <nickdesaulniers@users.noreply.github.com>
Date: Tue, 12 Mar 2024 08:38:34 -0700
Subject: [libc] fix another build failure from using limits.h (#84827)

My GCC build is failing with issues similar why we added our own. Looks
like
we missed one spot. See also:

commit 72ce62941579 ("[libc] Add C23 limits.h header. (#78887)")
---
 libc/test/src/__support/CMakeLists.txt             | 1 +
 libc/test/src/__support/integer_to_string_test.cpp | 3 +--
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/libc/test/src/__support/CMakeLists.txt b/libc/test/src/__support/CMakeLists.txt
index 91dd0dc4decf..51b897f8b595 100644
--- a/libc/test/src/__support/CMakeLists.txt
+++ b/libc/test/src/__support/CMakeLists.txt
@@ -78,6 +78,7 @@ add_libc_test(
   SRCS
     integer_to_string_test.cpp
   DEPENDS
+    libc.src.__support.CPP.limits
     libc.src.__support.CPP.string_view
     libc.src.__support.integer_literals
     libc.src.__support.integer_to_string
diff --git a/libc/test/src/__support/integer_to_string_test.cpp b/libc/test/src/__support/integer_to_string_test.cpp
index a2a80c81b9f6..270fddd828b6 100644
--- a/libc/test/src/__support/integer_to_string_test.cpp
+++ b/libc/test/src/__support/integer_to_string_test.cpp
@@ -6,6 +6,7 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "src/__support/CPP/limits.h"
 #include "src/__support/CPP/span.h"
 #include "src/__support/CPP/string_view.h"
 #include "src/__support/UInt.h"
@@ -15,8 +16,6 @@
 
 #include "test/UnitTest/Test.h"
 
-#include "limits.h"
-
 using LIBC_NAMESPACE::IntegerToString;
 using LIBC_NAMESPACE::cpp::span;
 using LIBC_NAMESPACE::cpp::string_view;
-- 
cgit v1.2.3


From f0c0ddae45ec929d023232d2ff0b75b7f09853c2 Mon Sep 17 00:00:00 2001
From: Nick Desaulniers <nickdesaulniers@users.noreply.github.com>
Date: Tue, 12 Mar 2024 08:39:17 -0700
Subject: [libc] implement the final macros for stdbit.h support (#84798)

Relevant sections of n3096:
- 7.18.1p1
- 7.18.2
---
 libc/docs/c23.rst                             |  2 +-
 libc/docs/stdbit.rst                          |  8 ++++----
 libc/include/llvm-libc-macros/stdbit-macros.h |  5 +++++
 libc/spec/stdc.td                             |  4 ++++
 libc/test/include/stdbit_test.cpp             | 17 +++++++++++++++++
 5 files changed, 31 insertions(+), 5 deletions(-)

diff --git a/libc/docs/c23.rst b/libc/docs/c23.rst
index 24cef8539393..3f64722bc8e6 100644
--- a/libc/docs/c23.rst
+++ b/libc/docs/c23.rst
@@ -75,7 +75,7 @@ Additions:
   * dfmal
   * fsqrt*
   * dsqrtl
-* stdbit.h (New header)
+* stdbit.h (New header)    |check|
 * stdckdint.h (New header) |check|
 * stddef.h
 
diff --git a/libc/docs/stdbit.rst b/libc/docs/stdbit.rst
index 9b4974cf1479..d42f79382462 100644
--- a/libc/docs/stdbit.rst
+++ b/libc/docs/stdbit.rst
@@ -110,10 +110,10 @@ Macros
 =========================  =========
 Macro Name                 Available
 =========================  =========
-__STDC_VERSION_STDBIT_H__
-__STDC_ENDIAN_LITTLE__
-__STDC_ENDIAN_BIG__
-__STDC_ENDIAN_NATIVE__
+__STDC_VERSION_STDBIT_H__  |check|
+__STDC_ENDIAN_LITTLE__     |check|
+__STDC_ENDIAN_BIG__        |check|
+__STDC_ENDIAN_NATIVE__     |check|
 stdc_leading_zeros         |check|
 stdc_leading_ones          |check|
 stdc_trailing_zeros        |check|
diff --git a/libc/include/llvm-libc-macros/stdbit-macros.h b/libc/include/llvm-libc-macros/stdbit-macros.h
index 10c0fac3c8dd..c5b2f0977834 100644
--- a/libc/include/llvm-libc-macros/stdbit-macros.h
+++ b/libc/include/llvm-libc-macros/stdbit-macros.h
@@ -9,6 +9,11 @@
 #ifndef __LLVM_LIBC_MACROS_STDBIT_MACROS_H
 #define __LLVM_LIBC_MACROS_STDBIT_MACROS_H
 
+#define __STDC_VERSION_STDBIT_H__ 202311L
+#define __STDC_ENDIAN_LITTLE__ __ORDER_LITTLE_ENDIAN__
+#define __STDC_ENDIAN_BIG__ __ORDER_BIG_ENDIAN__
+#define __STDC_ENDIAN_NATIVE__ __BYTE_ORDER__
+
 // TODO(https://github.com/llvm/llvm-project/issues/80509): support _BitInt().
 #ifdef __cplusplus
 inline unsigned stdc_leading_zeros(unsigned char x) {
diff --git a/libc/spec/stdc.td b/libc/spec/stdc.td
index 1f14fe758130..1f9917b1f073 100644
--- a/libc/spec/stdc.td
+++ b/libc/spec/stdc.td
@@ -805,6 +805,10 @@ def StdC : StandardSpec<"stdc"> {
   HeaderSpec StdBit = HeaderSpec<
       "stdbit.h",
       [
+        Macro<"__STDC_VERSION_STDBIT_H__">,
+        Macro<"__STDC_ENDIAN_LITTLE__">,
+        Macro<"__STDC_ENDIAN_BIG__">,
+        Macro<"__STDC_ENDIAN_NATIVE__">,
         Macro<"stdc_leading_zeros">,
         Macro<"stdc_leading_ones">,
         Macro<"stdc_trailing_zeros">,
diff --git a/libc/test/include/stdbit_test.cpp b/libc/test/include/stdbit_test.cpp
index f3227eb86959..bee1a19f9c03 100644
--- a/libc/test/include/stdbit_test.cpp
+++ b/libc/test/include/stdbit_test.cpp
@@ -141,3 +141,20 @@ TEST(LlvmLibcStdbitTest, TypeGenericMacroBitCeil) {
   EXPECT_EQ(stdc_bit_ceil(0UL), 0x6DUL);
   EXPECT_EQ(stdc_bit_ceil(0ULL), 0x6EULL);
 }
+
+TEST(LlvmLibcStdbitTest, VersionMacro) {
+  // 7.18.1p2 an integer constant expression with a value equivalent to 202311L.
+  EXPECT_EQ(__STDC_VERSION_STDBIT_H__, 202311L);
+}
+
+TEST(LlvmLibcStdbitTest, EndianMacros) {
+  // 7.18.2p3 The values of the integer constant expressions for
+  // __STDC_ENDIAN_LITTLE__ and __STDC_ENDIAN_BIG__ are not equal.
+  EXPECT_NE(__STDC_ENDIAN_LITTLE__, __STDC_ENDIAN_BIG__);
+  // The standard does allow for __STDC_ENDIAN_NATIVE__ to be an integer
+  // constant expression with an implementation defined value for non-big or
+  // little endianness environments.  I assert such machines are no longer
+  // relevant.
+  EXPECT_TRUE(__STDC_ENDIAN_NATIVE__ == __STDC_ENDIAN_LITTLE__ ||
+              __STDC_ENDIAN_NATIVE__ == __STDC_ENDIAN_BIG__);
+}
-- 
cgit v1.2.3


From 9f69d3cf88905df5006f93dce536b7e73c0b1735 Mon Sep 17 00:00:00 2001
From: Joseph Huber <huberjn@outlook.com>
Date: Tue, 12 Mar 2024 10:39:40 -0500
Subject: [Libomptarget] Use NVPTX lane id intrinsic in DeviceRTL (#84928)

Summary:
We are currently taking the lower 5 bites of the thread ID as the warp
ID. This doesn't work in non-1D grids and is also slower than just using
the dedicated hardware register.
---
 openmp/libomptarget/DeviceRTL/src/Mapping.cpp | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/openmp/libomptarget/DeviceRTL/src/Mapping.cpp b/openmp/libomptarget/DeviceRTL/src/Mapping.cpp
index 31dd8054dec3..b2028a8fb4f5 100644
--- a/openmp/libomptarget/DeviceRTL/src/Mapping.cpp
+++ b/openmp/libomptarget/DeviceRTL/src/Mapping.cpp
@@ -172,10 +172,7 @@ uint32_t getThreadIdInBlock(int32_t Dim) {
   UNREACHABLE("Dim outside range!");
 }
 
-uint32_t getThreadIdInWarp() {
-  return impl::getThreadIdInBlock(mapping::DIM_X) &
-         (mapping::getWarpSize() - 1);
-}
+uint32_t getThreadIdInWarp() { return __nvvm_read_ptx_sreg_laneid(); }
 
 uint32_t getBlockIdInKernel(int32_t Dim) {
   switch (Dim) {
-- 
cgit v1.2.3


From 392436383a52bc5e188bd28bec5bc71b3cb5384a Mon Sep 17 00:00:00 2001
From: Nick Desaulniers <nickdesaulniers@users.noreply.github.com>
Date: Tue, 12 Mar 2024 08:39:50 -0700
Subject: [libc] fix typo in stdbit.h macro spec files (#84780)

---
 libc/spec/stdc.td | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/libc/spec/stdc.td b/libc/spec/stdc.td
index 1f9917b1f073..e012d0dee089 100644
--- a/libc/spec/stdc.td
+++ b/libc/spec/stdc.td
@@ -820,9 +820,9 @@ def StdC : StandardSpec<"stdc"> {
         Macro<"stdc_count_zeros">,
         Macro<"stdc_count_ones">,
         Macro<"stdc_has_single_bit">,
-        Macro<"std_bit_width">,
-        Macro<"std_bit_floor">,
-        Macro<"std_bit_ceil">
+        Macro<"stdc_bit_width">,
+        Macro<"stdc_bit_floor">,
+        Macro<"stdc_bit_ceil">
       ], // Macros
       [], // Types
       [], // Enumerations
-- 
cgit v1.2.3


From c167a2588737613558bd7be4c9280603e89281ac Mon Sep 17 00:00:00 2001
From: Joseph Huber <huberjn@outlook.com>
Date: Tue, 12 Mar 2024 10:40:35 -0500
Subject: [libc] Fix lane-id utility function not using built-in (#84902)

Summary:
Previously we got the lane-id from taking the global thread ID and
taking off the bottom 5 bits. This works but is inefficient compared to
the NVPTX intrinsic simply dedicated to get this value.
---
 libc/src/__support/GPU/nvptx/utils.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libc/src/__support/GPU/nvptx/utils.h b/libc/src/__support/GPU/nvptx/utils.h
index a92c8847b6ec..fe9da4e8e6cb 100644
--- a/libc/src/__support/GPU/nvptx/utils.h
+++ b/libc/src/__support/GPU/nvptx/utils.h
@@ -97,7 +97,7 @@ LIBC_INLINE uint32_t get_lane_size() { return 32; }
 
 /// Returns the id of the thread inside of a CUDA warp executing together.
 [[clang::convergent]] LIBC_INLINE uint32_t get_lane_id() {
-  return get_thread_id() & (get_lane_size() - 1);
+  return __nvvm_read_ptx_sreg_laneid();
 }
 
 /// Returns the bit-mask of active threads in the current warp.
-- 
cgit v1.2.3


From 261e5648e70b363aecf86acfcd7fb416eb48fb7b Mon Sep 17 00:00:00 2001
From: Joseph Huber <huberjn@outlook.com>
Date: Tue, 12 Mar 2024 10:40:49 -0500
Subject: [libc] Add utility functions for warp-level scan and reduction
 (#84866)

Summary:
The GPU uses a SIMT execution model. That means that each value actually
belongs to a group of 32 or 64 other lanes executing next to it. These
platforms offer some intrinsic fuctions to actually take elements from
neighboring lanes. With these we can do parallel scans or reductions.
These functions do not have an immediate user, but will be used in the
allocator interface that is in-progress and are generally good to have.
This patch is a precommit for these new utilitly functions.
---
 libc/src/__support/GPU/amdgpu/utils.h              |  6 +++
 libc/src/__support/GPU/generic/utils.h             |  2 +
 libc/src/__support/GPU/nvptx/utils.h               |  8 +++
 libc/src/__support/GPU/utils.h                     | 19 +++++++
 libc/test/integration/src/__support/CMakeLists.txt |  3 ++
 .../integration/src/__support/GPU/CMakeLists.txt   | 11 ++++
 .../integration/src/__support/GPU/scan_reduce.cpp  | 62 ++++++++++++++++++++++
 7 files changed, 111 insertions(+)
 create mode 100644 libc/test/integration/src/__support/GPU/CMakeLists.txt
 create mode 100644 libc/test/integration/src/__support/GPU/scan_reduce.cpp

diff --git a/libc/src/__support/GPU/amdgpu/utils.h b/libc/src/__support/GPU/amdgpu/utils.h
index 75f0b5744ebd..9b520a6bcf38 100644
--- a/libc/src/__support/GPU/amdgpu/utils.h
+++ b/libc/src/__support/GPU/amdgpu/utils.h
@@ -145,6 +145,12 @@ LIBC_INLINE uint32_t get_lane_size() {
   __builtin_amdgcn_wave_barrier();
 }
 
+/// Shuffles the the lanes inside the wavefront according to the given index.
+[[clang::convergent]] LIBC_INLINE uint32_t shuffle(uint64_t, uint32_t idx,
+                                                   uint32_t x) {
+  return __builtin_amdgcn_ds_bpermute(idx << 2, x);
+}
+
 /// Returns the current value of the GPU's processor clock.
 /// NOTE: The RDNA3 and RDNA2 architectures use a 20-bit cycle counter.
 LIBC_INLINE uint64_t processor_clock() { return __builtin_readcyclecounter(); }
diff --git a/libc/src/__support/GPU/generic/utils.h b/libc/src/__support/GPU/generic/utils.h
index c6c3c01cf7d5..b6df59f7aa9e 100644
--- a/libc/src/__support/GPU/generic/utils.h
+++ b/libc/src/__support/GPU/generic/utils.h
@@ -67,6 +67,8 @@ LIBC_INLINE void sync_threads() {}
 
 LIBC_INLINE void sync_lane(uint64_t) {}
 
+LIBC_INLINE uint32_t shuffle(uint64_t, uint32_t, uint32_t x) { return x; }
+
 LIBC_INLINE uint64_t processor_clock() { return 0; }
 
 LIBC_INLINE uint64_t fixed_frequency_clock() { return 0; }
diff --git a/libc/src/__support/GPU/nvptx/utils.h b/libc/src/__support/GPU/nvptx/utils.h
index fe9da4e8e6cb..3f19afb83648 100644
--- a/libc/src/__support/GPU/nvptx/utils.h
+++ b/libc/src/__support/GPU/nvptx/utils.h
@@ -126,6 +126,14 @@ LIBC_INLINE uint32_t get_lane_size() { return 32; }
   __nvvm_bar_warp_sync(static_cast<uint32_t>(mask));
 }
 
+/// Shuffles the the lanes inside the warp according to the given index.
+[[clang::convergent]] LIBC_INLINE uint32_t shuffle(uint64_t lane_mask,
+                                                   uint32_t idx, uint32_t x) {
+  uint32_t mask = static_cast<uint32_t>(lane_mask);
+  uint32_t bitmask = (mask >> idx) & 1;
+  return -bitmask & __nvvm_shfl_sync_idx_i32(mask, x, idx, get_lane_size() - 1);
+}
+
 /// Returns the current value of the GPU's processor clock.
 LIBC_INLINE uint64_t processor_clock() { return __builtin_readcyclecounter(); }
 
diff --git a/libc/src/__support/GPU/utils.h b/libc/src/__support/GPU/utils.h
index 0f9167cdee06..93022e8de811 100644
--- a/libc/src/__support/GPU/utils.h
+++ b/libc/src/__support/GPU/utils.h
@@ -31,6 +31,25 @@ LIBC_INLINE bool is_first_lane(uint64_t lane_mask) {
   return gpu::get_lane_id() == get_first_lane_id(lane_mask);
 }
 
+/// Gets the sum of all lanes inside the warp or wavefront.
+LIBC_INLINE uint32_t reduce(uint64_t lane_mask, uint32_t x) {
+  for (uint32_t step = gpu::get_lane_size() / 2; step > 0; step /= 2) {
+    uint32_t index = step + gpu::get_lane_id();
+    x += gpu::shuffle(lane_mask, index, x);
+  }
+  return gpu::broadcast_value(lane_mask, x);
+}
+
+/// Gets the accumulator scan of the threads in the warp or wavefront.
+LIBC_INLINE uint32_t scan(uint64_t lane_mask, uint32_t x) {
+  for (uint32_t step = 1; step < gpu::get_lane_size(); step *= 2) {
+    uint32_t index = gpu::get_lane_id() - step;
+    uint32_t bitmask = gpu::get_lane_id() >= step;
+    x += -bitmask & gpu::shuffle(lane_mask, index, x);
+  }
+  return x;
+}
+
 } // namespace gpu
 } // namespace LIBC_NAMESPACE
 
diff --git a/libc/test/integration/src/__support/CMakeLists.txt b/libc/test/integration/src/__support/CMakeLists.txt
index 7c853ff10259..b5b6557e8d68 100644
--- a/libc/test/integration/src/__support/CMakeLists.txt
+++ b/libc/test/integration/src/__support/CMakeLists.txt
@@ -1 +1,4 @@
 add_subdirectory(threads)
+if(LIBC_TARGET_OS_IS_GPU)
+  add_subdirectory(GPU)
+endif()
diff --git a/libc/test/integration/src/__support/GPU/CMakeLists.txt b/libc/test/integration/src/__support/GPU/CMakeLists.txt
new file mode 100644
index 000000000000..7811e0da45dd
--- /dev/null
+++ b/libc/test/integration/src/__support/GPU/CMakeLists.txt
@@ -0,0 +1,11 @@
+add_custom_target(libc-support-gpu-tests)
+add_dependencies(libc-integration-tests libc-support-gpu-tests)
+
+add_integration_test(
+  scan_reduce_test
+  SUITE libc-support-gpu-tests
+  SRCS
+    scan_reduce.cpp
+  LOADER_ARGS
+    --threads 64
+)
diff --git a/libc/test/integration/src/__support/GPU/scan_reduce.cpp b/libc/test/integration/src/__support/GPU/scan_reduce.cpp
new file mode 100644
index 000000000000..bc621c3300cb
--- /dev/null
+++ b/libc/test/integration/src/__support/GPU/scan_reduce.cpp
@@ -0,0 +1,62 @@
+//===-- Test for the parallel scan and reduction operations on the GPU ----===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/__support/CPP/bit.h"
+#include "src/__support/GPU/utils.h"
+#include "test/IntegrationTest/test.h"
+
+using namespace LIBC_NAMESPACE;
+
+static uint32_t sum(uint32_t n) { return n * (n + 1) / 2; }
+
+// Tests a reduction within a convergant warp or wavefront using some known
+// values. For example, if every element in the lane is one, then the sum should
+// be the size of the warp or wavefront, i.e. 1 + 1 + 1 ... + 1.
+static void test_reduce() {
+  uint64_t mask = gpu::get_lane_mask();
+  uint32_t x = gpu::reduce(mask, 1);
+  EXPECT_EQ(x, gpu::get_lane_size());
+
+  uint32_t y = gpu::reduce(mask, gpu::get_lane_id());
+  EXPECT_EQ(y, sum(gpu::get_lane_size() - 1));
+
+  uint32_t z = 0;
+  if (gpu::get_lane_id() % 2)
+    z = gpu::reduce(gpu::get_lane_mask(), 1);
+  gpu::sync_lane(mask);
+
+  EXPECT_EQ(z, gpu::get_lane_id() % 2 ? gpu::get_lane_size() / 2 : 0);
+}
+
+// Tests an accumulation scan within a convergent warp or wavefront using some
+// known values. For example, if every element in the lane is one, then the scan
+// should have each element be equivalent to its ID, i.e. 1, 1 + 1, ...
+static void test_scan() {
+  uint64_t mask = gpu::get_lane_mask();
+
+  uint32_t x = gpu::scan(mask, 1);
+  EXPECT_EQ(x, gpu::get_lane_id() + 1);
+
+  uint32_t y = gpu::scan(mask, gpu::get_lane_id());
+  EXPECT_EQ(y, sum(gpu::get_lane_id()));
+
+  uint32_t z = 0;
+  if (gpu::get_lane_id() % 2)
+    z = gpu::scan(gpu::get_lane_mask(), 1);
+  gpu::sync_lane(mask);
+
+  EXPECT_EQ(z, gpu::get_lane_id() % 2 ? gpu::get_lane_id() / 2 + 1 : 0);
+}
+
+TEST_MAIN(int argc, char **argv, char **envp) {
+  test_reduce();
+
+  test_scan();
+
+  return 0;
+}
-- 
cgit v1.2.3


From 0ebf511ad011a83022edb171e044c98d9d16b1fa Mon Sep 17 00:00:00 2001
From: Nick Desaulniers <nickdesaulniers@users.noreply.github.com>
Date: Tue, 12 Mar 2024 08:44:06 -0700
Subject: [libc] move non <bit> functions to math_extras (#84818)

As per TODOs added in

https://github.com/llvm/llvm-project/pull/84035/commits/48b0bc837085a38ff1de33010d9222363f70238f.
---
 libc/src/__support/CMakeLists.txt                |  1 +
 libc/src/__support/CPP/bit.h                     | 37 -------------------
 libc/src/__support/math_extras.h                 | 37 ++++++++++++++++++-
 libc/src/stdbit/CMakeLists.txt                   | 46 ++++++++++++++----------
 libc/src/stdbit/stdc_count_zeros_uc.cpp          |  4 +--
 libc/src/stdbit/stdc_count_zeros_ui.cpp          |  4 +--
 libc/src/stdbit/stdc_count_zeros_ul.cpp          |  4 +--
 libc/src/stdbit/stdc_count_zeros_ull.cpp         |  4 +--
 libc/src/stdbit/stdc_count_zeros_us.cpp          |  4 +--
 libc/src/stdbit/stdc_first_leading_one_uc.cpp    |  4 +--
 libc/src/stdbit/stdc_first_leading_one_ui.cpp    |  4 +--
 libc/src/stdbit/stdc_first_leading_one_ul.cpp    |  4 +--
 libc/src/stdbit/stdc_first_leading_one_ull.cpp   |  4 +--
 libc/src/stdbit/stdc_first_leading_one_us.cpp    |  4 +--
 libc/src/stdbit/stdc_first_leading_zero_uc.cpp   |  4 +--
 libc/src/stdbit/stdc_first_leading_zero_ui.cpp   |  4 +--
 libc/src/stdbit/stdc_first_leading_zero_ul.cpp   |  4 +--
 libc/src/stdbit/stdc_first_leading_zero_ull.cpp  |  4 +--
 libc/src/stdbit/stdc_first_leading_zero_us.cpp   |  4 +--
 libc/src/stdbit/stdc_first_trailing_one_uc.cpp   |  4 +--
 libc/src/stdbit/stdc_first_trailing_one_ui.cpp   |  4 +--
 libc/src/stdbit/stdc_first_trailing_one_ul.cpp   |  4 +--
 libc/src/stdbit/stdc_first_trailing_one_ull.cpp  |  4 +--
 libc/src/stdbit/stdc_first_trailing_one_us.cpp   |  4 +--
 libc/src/stdbit/stdc_first_trailing_zero_uc.cpp  |  4 +--
 libc/src/stdbit/stdc_first_trailing_zero_ui.cpp  |  4 +--
 libc/src/stdbit/stdc_first_trailing_zero_ul.cpp  |  4 +--
 libc/src/stdbit/stdc_first_trailing_zero_ull.cpp |  4 +--
 libc/src/stdbit/stdc_first_trailing_zero_us.cpp  |  4 +--
 libc/test/src/__support/CPP/bit_test.cpp         | 32 -----------------
 libc/test/src/__support/math_extras_test.cpp     | 40 +++++++++++++++++++++
 31 files changed, 154 insertions(+), 139 deletions(-)

diff --git a/libc/src/__support/CMakeLists.txt b/libc/src/__support/CMakeLists.txt
index 2e5a026bf423..4c1f271e1df4 100644
--- a/libc/src/__support/CMakeLists.txt
+++ b/libc/src/__support/CMakeLists.txt
@@ -34,6 +34,7 @@ add_header_library(
   HDRS
     math_extras.h
   DEPENDS
+    libc.src.__support.CPP.bit
     libc.src.__support.CPP.limits
     libc.src.__support.CPP.type_traits
     libc.src.__support.macros.attributes
diff --git a/libc/src/__support/CPP/bit.h b/libc/src/__support/CPP/bit.h
index 1a05728b8506..3f2fbec94405 100644
--- a/libc/src/__support/CPP/bit.h
+++ b/libc/src/__support/CPP/bit.h
@@ -239,36 +239,6 @@ LIBC_INLINE constexpr To bit_or_static_cast(const From &from) {
   }
 }
 
-// TODO: remove from 'bit.h' as it is not a standard function.
-template <typename T>
-[[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t<cpp::is_unsigned_v<T>, int>
-first_leading_zero(T value) {
-  return value == cpp::numeric_limits<T>::max() ? 0 : countl_one(value) + 1;
-}
-
-// TODO: remove from 'bit.h' as it is not a standard function.
-template <typename T>
-[[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t<cpp::is_unsigned_v<T>, int>
-first_leading_one(T value) {
-  return first_leading_zero(static_cast<T>(~value));
-}
-
-// TODO: remove from 'bit.h' as it is not a standard function.
-template <typename T>
-[[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t<cpp::is_unsigned_v<T>, int>
-first_trailing_zero(T value) {
-  return value == cpp::numeric_limits<T>::max()
-             ? 0
-             : countr_zero(static_cast<T>(~value)) + 1;
-}
-
-// TODO: remove from 'bit.h' as it is not a standard function.
-template <typename T>
-[[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t<cpp::is_unsigned_v<T>, int>
-first_trailing_one(T value) {
-  return value == cpp::numeric_limits<T>::max() ? 0 : countr_zero(value) + 1;
-}
-
 /// Count number of 1's aka population count or Hamming weight.
 ///
 /// Only unsigned integral types are allowed.
@@ -294,13 +264,6 @@ ADD_SPECIALIZATION(unsigned long long, __builtin_popcountll)
 // TODO: 128b specializations?
 #undef ADD_SPECIALIZATION
 
-// TODO: remove from 'bit.h' as it is not a standard function.
-template <typename T>
-[[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t<cpp::is_unsigned_v<T>, int>
-count_zeros(T value) {
-  return popcount<T>(static_cast<T>(~value));
-}
-
 } // namespace LIBC_NAMESPACE::cpp
 
 #endif // LLVM_LIBC_SRC___SUPPORT_CPP_BIT_H
diff --git a/libc/src/__support/math_extras.h b/libc/src/__support/math_extras.h
index c6b458ddecda..28ee1be8b999 100644
--- a/libc/src/__support/math_extras.h
+++ b/libc/src/__support/math_extras.h
@@ -10,7 +10,8 @@
 #ifndef LLVM_LIBC_SRC___SUPPORT_MATH_EXTRAS_H
 #define LLVM_LIBC_SRC___SUPPORT_MATH_EXTRAS_H
 
-#include "src/__support/CPP/limits.h"        // CHAR_BIT
+#include "src/__support/CPP/bit.h"           // countl_one, countr_zero
+#include "src/__support/CPP/limits.h"        // CHAR_BIT, numeric_limits
 #include "src/__support/CPP/type_traits.h"   // is_unsigned_v
 #include "src/__support/macros/attributes.h" // LIBC_INLINE
 #include "src/__support/macros/config.h"     // LIBC_HAS_BUILTIN
@@ -226,6 +227,40 @@ sub_with_borrow<unsigned long long>(unsigned long long a, unsigned long long b,
 
 #endif // LIBC_HAS_BUILTIN(__builtin_subc)
 
+template <typename T>
+[[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t<cpp::is_unsigned_v<T>, int>
+first_leading_zero(T value) {
+  return value == cpp::numeric_limits<T>::max() ? 0
+                                                : cpp::countl_one(value) + 1;
+}
+
+template <typename T>
+[[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t<cpp::is_unsigned_v<T>, int>
+first_leading_one(T value) {
+  return first_leading_zero(static_cast<T>(~value));
+}
+
+template <typename T>
+[[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t<cpp::is_unsigned_v<T>, int>
+first_trailing_zero(T value) {
+  return value == cpp::numeric_limits<T>::max()
+             ? 0
+             : cpp::countr_zero(static_cast<T>(~value)) + 1;
+}
+
+template <typename T>
+[[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t<cpp::is_unsigned_v<T>, int>
+first_trailing_one(T value) {
+  return value == cpp::numeric_limits<T>::max() ? 0
+                                                : cpp::countr_zero(value) + 1;
+}
+
+template <typename T>
+[[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t<cpp::is_unsigned_v<T>, int>
+count_zeros(T value) {
+  return cpp::popcount<T>(static_cast<T>(~value));
+}
+
 } // namespace LIBC_NAMESPACE
 
 #endif // LLVM_LIBC_SRC___SUPPORT_MATH_EXTRAS_H
diff --git a/libc/src/stdbit/CMakeLists.txt b/libc/src/stdbit/CMakeLists.txt
index 2aef2029f2df..0c22b1d2617a 100644
--- a/libc/src/stdbit/CMakeLists.txt
+++ b/libc/src/stdbit/CMakeLists.txt
@@ -1,30 +1,38 @@
+function(declare_dependencies prefixes dependencies)
+  set(suffixes c s i l ll)
+  foreach(prefix ${prefixes})
+    foreach(suffix IN LISTS suffixes)
+      add_entrypoint_object(
+        stdc_${prefix}_u${suffix}
+        SRCS
+          stdc_${prefix}_u${suffix}.cpp
+        HDRS
+          stdc_${prefix}_u${suffix}.h
+        DEPENDS
+          ${dependencies}
+      )
+    endforeach()
+  endforeach()
+endfunction()
+
+
 set(prefixes
   leading_zeros
   leading_ones
   trailing_zeros
   trailing_ones
-  first_leading_zero
-  first_leading_one
-  first_trailing_zero
-  first_trailing_one
-  count_zeros
   count_ones
   has_single_bit
   bit_width
   bit_floor
   bit_ceil
 )
-set(suffixes c s i l ll)
-foreach(prefix IN LISTS prefixes)
-  foreach(suffix IN LISTS suffixes)
-    add_entrypoint_object(
-      stdc_${prefix}_u${suffix}
-      SRCS
-        stdc_${prefix}_u${suffix}.cpp
-      HDRS
-        stdc_${prefix}_u${suffix}.h
-      DEPENDS
-        libc.src.__support.CPP.bit
-    )
-  endforeach()
-endforeach()
+declare_dependencies("${prefixes}" libc.src.__support.CPP.bit)
+set(prefixes
+  first_leading_zero
+  first_leading_one
+  first_trailing_zero
+  first_trailing_one
+  count_zeros
+)
+declare_dependencies("${prefixes}" libc.src.__support.math_extras)
diff --git a/libc/src/stdbit/stdc_count_zeros_uc.cpp b/libc/src/stdbit/stdc_count_zeros_uc.cpp
index 22c57bd60c38..309ebb55e0fa 100644
--- a/libc/src/stdbit/stdc_count_zeros_uc.cpp
+++ b/libc/src/stdbit/stdc_count_zeros_uc.cpp
@@ -8,13 +8,13 @@
 
 #include "src/stdbit/stdc_count_zeros_uc.h"
 
-#include "src/__support/CPP/bit.h"
 #include "src/__support/common.h"
+#include "src/__support/math_extras.h"
 
 namespace LIBC_NAMESPACE {
 
 LLVM_LIBC_FUNCTION(unsigned, stdc_count_zeros_uc, (unsigned char value)) {
-  return static_cast<unsigned>(cpp::count_zeros(value));
+  return static_cast<unsigned>(count_zeros(value));
 }
 
 } // namespace LIBC_NAMESPACE
diff --git a/libc/src/stdbit/stdc_count_zeros_ui.cpp b/libc/src/stdbit/stdc_count_zeros_ui.cpp
index 6a1defd9d555..31ea907b24de 100644
--- a/libc/src/stdbit/stdc_count_zeros_ui.cpp
+++ b/libc/src/stdbit/stdc_count_zeros_ui.cpp
@@ -8,13 +8,13 @@
 
 #include "src/stdbit/stdc_count_zeros_ui.h"
 
-#include "src/__support/CPP/bit.h"
 #include "src/__support/common.h"
+#include "src/__support/math_extras.h"
 
 namespace LIBC_NAMESPACE {
 
 LLVM_LIBC_FUNCTION(unsigned, stdc_count_zeros_ui, (unsigned value)) {
-  return static_cast<unsigned>(cpp::count_zeros(value));
+  return static_cast<unsigned>(count_zeros(value));
 }
 
 } // namespace LIBC_NAMESPACE
diff --git a/libc/src/stdbit/stdc_count_zeros_ul.cpp b/libc/src/stdbit/stdc_count_zeros_ul.cpp
index ceab32ef9ac3..f5df5c49f131 100644
--- a/libc/src/stdbit/stdc_count_zeros_ul.cpp
+++ b/libc/src/stdbit/stdc_count_zeros_ul.cpp
@@ -8,13 +8,13 @@
 
 #include "src/stdbit/stdc_count_zeros_ul.h"
 
-#include "src/__support/CPP/bit.h"
 #include "src/__support/common.h"
+#include "src/__support/math_extras.h"
 
 namespace LIBC_NAMESPACE {
 
 LLVM_LIBC_FUNCTION(unsigned, stdc_count_zeros_ul, (unsigned long value)) {
-  return static_cast<unsigned>(cpp::count_zeros(value));
+  return static_cast<unsigned>(count_zeros(value));
 }
 
 } // namespace LIBC_NAMESPACE
diff --git a/libc/src/stdbit/stdc_count_zeros_ull.cpp b/libc/src/stdbit/stdc_count_zeros_ull.cpp
index 2f57f727a691..6a9c8f04a799 100644
--- a/libc/src/stdbit/stdc_count_zeros_ull.cpp
+++ b/libc/src/stdbit/stdc_count_zeros_ull.cpp
@@ -8,13 +8,13 @@
 
 #include "src/stdbit/stdc_count_zeros_ull.h"
 
-#include "src/__support/CPP/bit.h"
 #include "src/__support/common.h"
+#include "src/__support/math_extras.h"
 
 namespace LIBC_NAMESPACE {
 
 LLVM_LIBC_FUNCTION(unsigned, stdc_count_zeros_ull, (unsigned long long value)) {
-  return static_cast<unsigned>(cpp::count_zeros(value));
+  return static_cast<unsigned>(count_zeros(value));
 }
 
 } // namespace LIBC_NAMESPACE
diff --git a/libc/src/stdbit/stdc_count_zeros_us.cpp b/libc/src/stdbit/stdc_count_zeros_us.cpp
index fc06836ee292..c08186ec6e87 100644
--- a/libc/src/stdbit/stdc_count_zeros_us.cpp
+++ b/libc/src/stdbit/stdc_count_zeros_us.cpp
@@ -8,13 +8,13 @@
 
 #include "src/stdbit/stdc_count_zeros_us.h"
 
-#include "src/__support/CPP/bit.h"
 #include "src/__support/common.h"
+#include "src/__support/math_extras.h"
 
 namespace LIBC_NAMESPACE {
 
 LLVM_LIBC_FUNCTION(unsigned, stdc_count_zeros_us, (unsigned short value)) {
-  return static_cast<unsigned>(cpp::count_zeros(value));
+  return static_cast<unsigned>(count_zeros(value));
 }
 
 } // namespace LIBC_NAMESPACE
diff --git a/libc/src/stdbit/stdc_first_leading_one_uc.cpp b/libc/src/stdbit/stdc_first_leading_one_uc.cpp
index 02871595fdb6..2e28ed3bb6f8 100644
--- a/libc/src/stdbit/stdc_first_leading_one_uc.cpp
+++ b/libc/src/stdbit/stdc_first_leading_one_uc.cpp
@@ -8,13 +8,13 @@
 
 #include "src/stdbit/stdc_first_leading_one_uc.h"
 
-#include "src/__support/CPP/bit.h"
 #include "src/__support/common.h"
+#include "src/__support/math_extras.h"
 
 namespace LIBC_NAMESPACE {
 
 LLVM_LIBC_FUNCTION(unsigned, stdc_first_leading_one_uc, (unsigned char value)) {
-  return static_cast<unsigned>(cpp::first_leading_one(value));
+  return static_cast<unsigned>(first_leading_one(value));
 }
 
 } // namespace LIBC_NAMESPACE
diff --git a/libc/src/stdbit/stdc_first_leading_one_ui.cpp b/libc/src/stdbit/stdc_first_leading_one_ui.cpp
index a6c7ef5a8339..a07a39b09d9f 100644
--- a/libc/src/stdbit/stdc_first_leading_one_ui.cpp
+++ b/libc/src/stdbit/stdc_first_leading_one_ui.cpp
@@ -8,13 +8,13 @@
 
 #include "src/stdbit/stdc_first_leading_one_ui.h"
 
-#include "src/__support/CPP/bit.h"
 #include "src/__support/common.h"
+#include "src/__support/math_extras.h"
 
 namespace LIBC_NAMESPACE {
 
 LLVM_LIBC_FUNCTION(unsigned, stdc_first_leading_one_ui, (unsigned value)) {
-  return static_cast<unsigned>(cpp::first_leading_one(value));
+  return static_cast<unsigned>(first_leading_one(value));
 }
 
 } // namespace LIBC_NAMESPACE
diff --git a/libc/src/stdbit/stdc_first_leading_one_ul.cpp b/libc/src/stdbit/stdc_first_leading_one_ul.cpp
index d1bcab5dda02..4350fb7826b4 100644
--- a/libc/src/stdbit/stdc_first_leading_one_ul.cpp
+++ b/libc/src/stdbit/stdc_first_leading_one_ul.cpp
@@ -8,13 +8,13 @@
 
 #include "src/stdbit/stdc_first_leading_one_ul.h"
 
-#include "src/__support/CPP/bit.h"
 #include "src/__support/common.h"
+#include "src/__support/math_extras.h"
 
 namespace LIBC_NAMESPACE {
 
 LLVM_LIBC_FUNCTION(unsigned, stdc_first_leading_one_ul, (unsigned long value)) {
-  return static_cast<unsigned>(cpp::first_leading_one(value));
+  return static_cast<unsigned>(first_leading_one(value));
 }
 
 } // namespace LIBC_NAMESPACE
diff --git a/libc/src/stdbit/stdc_first_leading_one_ull.cpp b/libc/src/stdbit/stdc_first_leading_one_ull.cpp
index 7be8f1051ec2..57a5ae368e11 100644
--- a/libc/src/stdbit/stdc_first_leading_one_ull.cpp
+++ b/libc/src/stdbit/stdc_first_leading_one_ull.cpp
@@ -8,14 +8,14 @@
 
 #include "src/stdbit/stdc_first_leading_one_ull.h"
 
-#include "src/__support/CPP/bit.h"
 #include "src/__support/common.h"
+#include "src/__support/math_extras.h"
 
 namespace LIBC_NAMESPACE {
 
 LLVM_LIBC_FUNCTION(unsigned, stdc_first_leading_one_ull,
                    (unsigned long long value)) {
-  return static_cast<unsigned>(cpp::first_leading_one(value));
+  return static_cast<unsigned>(first_leading_one(value));
 }
 
 } // namespace LIBC_NAMESPACE
diff --git a/libc/src/stdbit/stdc_first_leading_one_us.cpp b/libc/src/stdbit/stdc_first_leading_one_us.cpp
index 7a4c7e673f36..f14433b13f35 100644
--- a/libc/src/stdbit/stdc_first_leading_one_us.cpp
+++ b/libc/src/stdbit/stdc_first_leading_one_us.cpp
@@ -8,14 +8,14 @@
 
 #include "src/stdbit/stdc_first_leading_one_us.h"
 
-#include "src/__support/CPP/bit.h"
 #include "src/__support/common.h"
+#include "src/__support/math_extras.h"
 
 namespace LIBC_NAMESPACE {
 
 LLVM_LIBC_FUNCTION(unsigned, stdc_first_leading_one_us,
                    (unsigned short value)) {
-  return static_cast<unsigned>(cpp::first_leading_one(value));
+  return static_cast<unsigned>(first_leading_one(value));
 }
 
 } // namespace LIBC_NAMESPACE
diff --git a/libc/src/stdbit/stdc_first_leading_zero_uc.cpp b/libc/src/stdbit/stdc_first_leading_zero_uc.cpp
index ffc1d9247406..6e2164256f17 100644
--- a/libc/src/stdbit/stdc_first_leading_zero_uc.cpp
+++ b/libc/src/stdbit/stdc_first_leading_zero_uc.cpp
@@ -8,14 +8,14 @@
 
 #include "src/stdbit/stdc_first_leading_zero_uc.h"
 
-#include "src/__support/CPP/bit.h"
 #include "src/__support/common.h"
+#include "src/__support/math_extras.h"
 
 namespace LIBC_NAMESPACE {
 
 LLVM_LIBC_FUNCTION(unsigned, stdc_first_leading_zero_uc,
                    (unsigned char value)) {
-  return static_cast<unsigned>(cpp::first_leading_zero(value));
+  return static_cast<unsigned>(first_leading_zero(value));
 }
 
 } // namespace LIBC_NAMESPACE
diff --git a/libc/src/stdbit/stdc_first_leading_zero_ui.cpp b/libc/src/stdbit/stdc_first_leading_zero_ui.cpp
index 1eeab2963e6a..cb733a94c0d8 100644
--- a/libc/src/stdbit/stdc_first_leading_zero_ui.cpp
+++ b/libc/src/stdbit/stdc_first_leading_zero_ui.cpp
@@ -8,13 +8,13 @@
 
 #include "src/stdbit/stdc_first_leading_zero_ui.h"
 
-#include "src/__support/CPP/bit.h"
 #include "src/__support/common.h"
+#include "src/__support/math_extras.h"
 
 namespace LIBC_NAMESPACE {
 
 LLVM_LIBC_FUNCTION(unsigned, stdc_first_leading_zero_ui, (unsigned value)) {
-  return static_cast<unsigned>(cpp::first_leading_zero(value));
+  return static_cast<unsigned>(first_leading_zero(value));
 }
 
 } // namespace LIBC_NAMESPACE
diff --git a/libc/src/stdbit/stdc_first_leading_zero_ul.cpp b/libc/src/stdbit/stdc_first_leading_zero_ul.cpp
index 6743d3eda516..8a3930a271ed 100644
--- a/libc/src/stdbit/stdc_first_leading_zero_ul.cpp
+++ b/libc/src/stdbit/stdc_first_leading_zero_ul.cpp
@@ -8,14 +8,14 @@
 
 #include "src/stdbit/stdc_first_leading_zero_ul.h"
 
-#include "src/__support/CPP/bit.h"
 #include "src/__support/common.h"
+#include "src/__support/math_extras.h"
 
 namespace LIBC_NAMESPACE {
 
 LLVM_LIBC_FUNCTION(unsigned, stdc_first_leading_zero_ul,
                    (unsigned long value)) {
-  return static_cast<unsigned>(cpp::first_leading_zero(value));
+  return static_cast<unsigned>(first_leading_zero(value));
 }
 
 } // namespace LIBC_NAMESPACE
diff --git a/libc/src/stdbit/stdc_first_leading_zero_ull.cpp b/libc/src/stdbit/stdc_first_leading_zero_ull.cpp
index 8128dd3d59a7..5a69197a8299 100644
--- a/libc/src/stdbit/stdc_first_leading_zero_ull.cpp
+++ b/libc/src/stdbit/stdc_first_leading_zero_ull.cpp
@@ -8,14 +8,14 @@
 
 #include "src/stdbit/stdc_first_leading_zero_ull.h"
 
-#include "src/__support/CPP/bit.h"
 #include "src/__support/common.h"
+#include "src/__support/math_extras.h"
 
 namespace LIBC_NAMESPACE {
 
 LLVM_LIBC_FUNCTION(unsigned, stdc_first_leading_zero_ull,
                    (unsigned long long value)) {
-  return static_cast<unsigned>(cpp::first_leading_zero(value));
+  return static_cast<unsigned>(first_leading_zero(value));
 }
 
 } // namespace LIBC_NAMESPACE
diff --git a/libc/src/stdbit/stdc_first_leading_zero_us.cpp b/libc/src/stdbit/stdc_first_leading_zero_us.cpp
index d931535e7690..6482c8654db3 100644
--- a/libc/src/stdbit/stdc_first_leading_zero_us.cpp
+++ b/libc/src/stdbit/stdc_first_leading_zero_us.cpp
@@ -8,14 +8,14 @@
 
 #include "src/stdbit/stdc_first_leading_zero_us.h"
 
-#include "src/__support/CPP/bit.h"
 #include "src/__support/common.h"
+#include "src/__support/math_extras.h"
 
 namespace LIBC_NAMESPACE {
 
 LLVM_LIBC_FUNCTION(unsigned, stdc_first_leading_zero_us,
                    (unsigned short value)) {
-  return static_cast<unsigned>(cpp::first_leading_zero(value));
+  return static_cast<unsigned>(first_leading_zero(value));
 }
 
 } // namespace LIBC_NAMESPACE
diff --git a/libc/src/stdbit/stdc_first_trailing_one_uc.cpp b/libc/src/stdbit/stdc_first_trailing_one_uc.cpp
index 6ed35966be61..d3e8825eef00 100644
--- a/libc/src/stdbit/stdc_first_trailing_one_uc.cpp
+++ b/libc/src/stdbit/stdc_first_trailing_one_uc.cpp
@@ -8,14 +8,14 @@
 
 #include "src/stdbit/stdc_first_trailing_one_uc.h"
 
-#include "src/__support/CPP/bit.h"
 #include "src/__support/common.h"
+#include "src/__support/math_extras.h"
 
 namespace LIBC_NAMESPACE {
 
 LLVM_LIBC_FUNCTION(unsigned, stdc_first_trailing_one_uc,
                    (unsigned char value)) {
-  return static_cast<unsigned>(cpp::first_trailing_one(value));
+  return static_cast<unsigned>(first_trailing_one(value));
 }
 
 } // namespace LIBC_NAMESPACE
diff --git a/libc/src/stdbit/stdc_first_trailing_one_ui.cpp b/libc/src/stdbit/stdc_first_trailing_one_ui.cpp
index a89083bd4950..842bd6995050 100644
--- a/libc/src/stdbit/stdc_first_trailing_one_ui.cpp
+++ b/libc/src/stdbit/stdc_first_trailing_one_ui.cpp
@@ -8,13 +8,13 @@
 
 #include "src/stdbit/stdc_first_trailing_one_ui.h"
 
-#include "src/__support/CPP/bit.h"
 #include "src/__support/common.h"
+#include "src/__support/math_extras.h"
 
 namespace LIBC_NAMESPACE {
 
 LLVM_LIBC_FUNCTION(unsigned, stdc_first_trailing_one_ui, (unsigned value)) {
-  return static_cast<unsigned>(cpp::first_trailing_one(value));
+  return static_cast<unsigned>(first_trailing_one(value));
 }
 
 } // namespace LIBC_NAMESPACE
diff --git a/libc/src/stdbit/stdc_first_trailing_one_ul.cpp b/libc/src/stdbit/stdc_first_trailing_one_ul.cpp
index f30078d0f5ff..0497d1d77811 100644
--- a/libc/src/stdbit/stdc_first_trailing_one_ul.cpp
+++ b/libc/src/stdbit/stdc_first_trailing_one_ul.cpp
@@ -8,14 +8,14 @@
 
 #include "src/stdbit/stdc_first_trailing_one_ul.h"
 
-#include "src/__support/CPP/bit.h"
 #include "src/__support/common.h"
+#include "src/__support/math_extras.h"
 
 namespace LIBC_NAMESPACE {
 
 LLVM_LIBC_FUNCTION(unsigned, stdc_first_trailing_one_ul,
                    (unsigned long value)) {
-  return static_cast<unsigned>(cpp::first_trailing_one(value));
+  return static_cast<unsigned>(first_trailing_one(value));
 }
 
 } // namespace LIBC_NAMESPACE
diff --git a/libc/src/stdbit/stdc_first_trailing_one_ull.cpp b/libc/src/stdbit/stdc_first_trailing_one_ull.cpp
index 2e526a890cda..6e062dd27cdd 100644
--- a/libc/src/stdbit/stdc_first_trailing_one_ull.cpp
+++ b/libc/src/stdbit/stdc_first_trailing_one_ull.cpp
@@ -8,14 +8,14 @@
 
 #include "src/stdbit/stdc_first_trailing_one_ull.h"
 
-#include "src/__support/CPP/bit.h"
 #include "src/__support/common.h"
+#include "src/__support/math_extras.h"
 
 namespace LIBC_NAMESPACE {
 
 LLVM_LIBC_FUNCTION(unsigned, stdc_first_trailing_one_ull,
                    (unsigned long long value)) {
-  return static_cast<unsigned>(cpp::first_trailing_one(value));
+  return static_cast<unsigned>(first_trailing_one(value));
 }
 
 } // namespace LIBC_NAMESPACE
diff --git a/libc/src/stdbit/stdc_first_trailing_one_us.cpp b/libc/src/stdbit/stdc_first_trailing_one_us.cpp
index e4c88e0d7906..e90158f10204 100644
--- a/libc/src/stdbit/stdc_first_trailing_one_us.cpp
+++ b/libc/src/stdbit/stdc_first_trailing_one_us.cpp
@@ -8,14 +8,14 @@
 
 #include "src/stdbit/stdc_first_trailing_one_us.h"
 
-#include "src/__support/CPP/bit.h"
 #include "src/__support/common.h"
+#include "src/__support/math_extras.h"
 
 namespace LIBC_NAMESPACE {
 
 LLVM_LIBC_FUNCTION(unsigned, stdc_first_trailing_one_us,
                    (unsigned short value)) {
-  return static_cast<unsigned>(cpp::first_trailing_one(value));
+  return static_cast<unsigned>(first_trailing_one(value));
 }
 
 } // namespace LIBC_NAMESPACE
diff --git a/libc/src/stdbit/stdc_first_trailing_zero_uc.cpp b/libc/src/stdbit/stdc_first_trailing_zero_uc.cpp
index 5825d5d441c5..a6939f6286b3 100644
--- a/libc/src/stdbit/stdc_first_trailing_zero_uc.cpp
+++ b/libc/src/stdbit/stdc_first_trailing_zero_uc.cpp
@@ -8,14 +8,14 @@
 
 #include "src/stdbit/stdc_first_trailing_zero_uc.h"
 
-#include "src/__support/CPP/bit.h"
 #include "src/__support/common.h"
+#include "src/__support/math_extras.h"
 
 namespace LIBC_NAMESPACE {
 
 LLVM_LIBC_FUNCTION(unsigned, stdc_first_trailing_zero_uc,
                    (unsigned char value)) {
-  return static_cast<unsigned>(cpp::first_trailing_zero(value));
+  return static_cast<unsigned>(first_trailing_zero(value));
 }
 
 } // namespace LIBC_NAMESPACE
diff --git a/libc/src/stdbit/stdc_first_trailing_zero_ui.cpp b/libc/src/stdbit/stdc_first_trailing_zero_ui.cpp
index 3b51b5fa22c3..7a50b696afff 100644
--- a/libc/src/stdbit/stdc_first_trailing_zero_ui.cpp
+++ b/libc/src/stdbit/stdc_first_trailing_zero_ui.cpp
@@ -8,13 +8,13 @@
 
 #include "src/stdbit/stdc_first_trailing_zero_ui.h"
 
-#include "src/__support/CPP/bit.h"
 #include "src/__support/common.h"
+#include "src/__support/math_extras.h"
 
 namespace LIBC_NAMESPACE {
 
 LLVM_LIBC_FUNCTION(unsigned, stdc_first_trailing_zero_ui, (unsigned value)) {
-  return static_cast<unsigned>(cpp::first_trailing_zero(value));
+  return static_cast<unsigned>(first_trailing_zero(value));
 }
 
 } // namespace LIBC_NAMESPACE
diff --git a/libc/src/stdbit/stdc_first_trailing_zero_ul.cpp b/libc/src/stdbit/stdc_first_trailing_zero_ul.cpp
index abf122944a76..88acbabdf2d9 100644
--- a/libc/src/stdbit/stdc_first_trailing_zero_ul.cpp
+++ b/libc/src/stdbit/stdc_first_trailing_zero_ul.cpp
@@ -8,14 +8,14 @@
 
 #include "src/stdbit/stdc_first_trailing_zero_ul.h"
 
-#include "src/__support/CPP/bit.h"
 #include "src/__support/common.h"
+#include "src/__support/math_extras.h"
 
 namespace LIBC_NAMESPACE {
 
 LLVM_LIBC_FUNCTION(unsigned, stdc_first_trailing_zero_ul,
                    (unsigned long value)) {
-  return static_cast<unsigned>(cpp::first_trailing_zero(value));
+  return static_cast<unsigned>(first_trailing_zero(value));
 }
 
 } // namespace LIBC_NAMESPACE
diff --git a/libc/src/stdbit/stdc_first_trailing_zero_ull.cpp b/libc/src/stdbit/stdc_first_trailing_zero_ull.cpp
index 336e7d6e075f..92df8f284e8b 100644
--- a/libc/src/stdbit/stdc_first_trailing_zero_ull.cpp
+++ b/libc/src/stdbit/stdc_first_trailing_zero_ull.cpp
@@ -8,14 +8,14 @@
 
 #include "src/stdbit/stdc_first_trailing_zero_ull.h"
 
-#include "src/__support/CPP/bit.h"
 #include "src/__support/common.h"
+#include "src/__support/math_extras.h"
 
 namespace LIBC_NAMESPACE {
 
 LLVM_LIBC_FUNCTION(unsigned, stdc_first_trailing_zero_ull,
                    (unsigned long long value)) {
-  return static_cast<unsigned>(cpp::first_trailing_zero(value));
+  return static_cast<unsigned>(first_trailing_zero(value));
 }
 
 } // namespace LIBC_NAMESPACE
diff --git a/libc/src/stdbit/stdc_first_trailing_zero_us.cpp b/libc/src/stdbit/stdc_first_trailing_zero_us.cpp
index b7d05047b272..86caa20dd3bd 100644
--- a/libc/src/stdbit/stdc_first_trailing_zero_us.cpp
+++ b/libc/src/stdbit/stdc_first_trailing_zero_us.cpp
@@ -8,14 +8,14 @@
 
 #include "src/stdbit/stdc_first_trailing_zero_us.h"
 
-#include "src/__support/CPP/bit.h"
 #include "src/__support/common.h"
+#include "src/__support/math_extras.h"
 
 namespace LIBC_NAMESPACE {
 
 LLVM_LIBC_FUNCTION(unsigned, stdc_first_trailing_zero_us,
                    (unsigned short value)) {
-  return static_cast<unsigned>(cpp::first_trailing_zero(value));
+  return static_cast<unsigned>(first_trailing_zero(value));
 }
 
 } // namespace LIBC_NAMESPACE
diff --git a/libc/test/src/__support/CPP/bit_test.cpp b/libc/test/src/__support/CPP/bit_test.cpp
index 3deb1f41dcf8..cee5b90c8f4b 100644
--- a/libc/test/src/__support/CPP/bit_test.cpp
+++ b/libc/test/src/__support/CPP/bit_test.cpp
@@ -228,38 +228,6 @@ TEST(LlvmLibcBitTest, Rotr) {
             rotr<uint64_t>(0x12345678deadbeefULL, -19));
 }
 
-TYPED_TEST(LlvmLibcBitTest, FirstLeadingZero, UnsignedTypesNoBigInt) {
-  EXPECT_EQ(first_leading_zero<T>(cpp::numeric_limits<T>::max()), 0);
-  for (int i = 0U; i != cpp::numeric_limits<T>::digits; ++i)
-    EXPECT_EQ(first_leading_zero<T>(~(T(1) << i)),
-              cpp::numeric_limits<T>::digits - i);
-}
-
-TYPED_TEST(LlvmLibcBitTest, FirstLeadingOne, UnsignedTypesNoBigInt) {
-  EXPECT_EQ(first_leading_one<T>(static_cast<T>(0)), 0);
-  for (int i = 0U; i != cpp::numeric_limits<T>::digits; ++i)
-    EXPECT_EQ(first_leading_one<T>(T(1) << i),
-              cpp::numeric_limits<T>::digits - i);
-}
-
-TYPED_TEST(LlvmLibcBitTest, FirstTrailingZero, UnsignedTypesNoBigInt) {
-  EXPECT_EQ(first_trailing_zero<T>(cpp::numeric_limits<T>::max()), 0);
-  for (int i = 0U; i != cpp::numeric_limits<T>::digits; ++i)
-    EXPECT_EQ(first_trailing_zero<T>(~(T(1) << i)), i + 1);
-}
-
-TYPED_TEST(LlvmLibcBitTest, FirstTrailingOne, UnsignedTypesNoBigInt) {
-  EXPECT_EQ(first_trailing_one<T>(cpp::numeric_limits<T>::max()), 0);
-  for (int i = 0U; i != cpp::numeric_limits<T>::digits; ++i)
-    EXPECT_EQ(first_trailing_one<T>(T(1) << i), i + 1);
-}
-
-TYPED_TEST(LlvmLibcBitTest, CountZeros, UnsignedTypesNoBigInt) {
-  EXPECT_EQ(count_zeros(T(0)), cpp::numeric_limits<T>::digits);
-  for (int i = 0; i != cpp::numeric_limits<T>::digits; ++i)
-    EXPECT_EQ(count_zeros<T>(cpp::numeric_limits<T>::max() >> i), i);
-}
-
 TYPED_TEST(LlvmLibcBitTest, CountOnes, UnsignedTypesNoBigInt) {
   EXPECT_EQ(popcount(T(0)), 0);
   for (int i = 0; i != cpp::numeric_limits<T>::digits; ++i)
diff --git a/libc/test/src/__support/math_extras_test.cpp b/libc/test/src/__support/math_extras_test.cpp
index ed064363d446..e642248881a4 100644
--- a/libc/test/src/__support/math_extras_test.cpp
+++ b/libc/test/src/__support/math_extras_test.cpp
@@ -13,6 +13,14 @@
 
 namespace LIBC_NAMESPACE {
 
+// TODO: add UInt<128> support.
+using UnsignedTypesNoBigInt = testing::TypeList<
+#if defined(LIBC_TYPES_HAS_INT128)
+    __uint128_t,
+#endif // LIBC_TYPES_HAS_INT128
+    unsigned char, unsigned short, unsigned int, unsigned long,
+    unsigned long long>;
+
 TEST(LlvmLibcBlockMathExtrasTest, mask_trailing_ones) {
   EXPECT_EQ(0_u8, (mask_leading_ones<uint8_t, 0>()));
   EXPECT_EQ(0_u8, (mask_trailing_ones<uint8_t, 0>()));
@@ -61,4 +69,36 @@ TEST(LlvmLibcBlockMathExtrasTest, mask_trailing_ones) {
             (mask_leading_ones<UInt128, 128>()));
 }
 
+TYPED_TEST(LlvmLibcBitTest, FirstLeadingZero, UnsignedTypesNoBigInt) {
+  EXPECT_EQ(first_leading_zero<T>(cpp::numeric_limits<T>::max()), 0);
+  for (int i = 0U; i != cpp::numeric_limits<T>::digits; ++i)
+    EXPECT_EQ(first_leading_zero<T>(~(T(1) << i)),
+              cpp::numeric_limits<T>::digits - i);
+}
+
+TYPED_TEST(LlvmLibcBitTest, FirstLeadingOne, UnsignedTypesNoBigInt) {
+  EXPECT_EQ(first_leading_one<T>(static_cast<T>(0)), 0);
+  for (int i = 0U; i != cpp::numeric_limits<T>::digits; ++i)
+    EXPECT_EQ(first_leading_one<T>(T(1) << i),
+              cpp::numeric_limits<T>::digits - i);
+}
+
+TYPED_TEST(LlvmLibcBitTest, FirstTrailingZero, UnsignedTypesNoBigInt) {
+  EXPECT_EQ(first_trailing_zero<T>(cpp::numeric_limits<T>::max()), 0);
+  for (int i = 0U; i != cpp::numeric_limits<T>::digits; ++i)
+    EXPECT_EQ(first_trailing_zero<T>(~(T(1) << i)), i + 1);
+}
+
+TYPED_TEST(LlvmLibcBitTest, FirstTrailingOne, UnsignedTypesNoBigInt) {
+  EXPECT_EQ(first_trailing_one<T>(cpp::numeric_limits<T>::max()), 0);
+  for (int i = 0U; i != cpp::numeric_limits<T>::digits; ++i)
+    EXPECT_EQ(first_trailing_one<T>(T(1) << i), i + 1);
+}
+
+TYPED_TEST(LlvmLibcBitTest, CountZeros, UnsignedTypesNoBigInt) {
+  EXPECT_EQ(count_zeros(T(0)), cpp::numeric_limits<T>::digits);
+  for (int i = 0; i != cpp::numeric_limits<T>::digits; ++i)
+    EXPECT_EQ(count_zeros<T>(cpp::numeric_limits<T>::max() >> i), i);
+}
+
 } // namespace LIBC_NAMESPACE
-- 
cgit v1.2.3


From 87dc068280aaddc98acb7865ae3df1d248f4a170 Mon Sep 17 00:00:00 2001
From: Jason Molenda <jmolenda@apple.com>
Date: Tue, 12 Mar 2024 09:03:28 -0700
Subject: [lldb] [debugserver] Handle interrupted reads correctly (#84872)

The first half of this patch is a long-standing annoyance, if I attach
to debugserver with lldb while it is waiting for an lldb connection, the
syscall is interrupted and it doesn't retry, debugserver exits
immediately.

The second half is a request from another tool that is communicating
with debugserver, that we retry reads on our sockets in the same way. I
haven't dug in to the details of how they're communicating that this is
necessary, but the best I've been able to find reading the POSIX API
docs, this is fine.

rdar://117113298
---
 lldb/tools/debugserver/source/RNBSocket.cpp | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/lldb/tools/debugserver/source/RNBSocket.cpp b/lldb/tools/debugserver/source/RNBSocket.cpp
index 1282ea221625..fc55dbf2e1f1 100644
--- a/lldb/tools/debugserver/source/RNBSocket.cpp
+++ b/lldb/tools/debugserver/source/RNBSocket.cpp
@@ -120,8 +120,13 @@ rnb_err_t RNBSocket::Listen(const char *listen_host, uint16_t port,
   while (!accept_connection) {
 
     struct kevent event_list[4];
-    int num_events =
-        kevent(queue_id, events.data(), events.size(), event_list, 4, NULL);
+    int num_events;
+    do {
+      errno = 0;
+      num_events =
+          kevent(queue_id, events.data(), events.size(), event_list, 4, NULL);
+    } while (num_events == -1 &&
+             (errno == EAGAIN || errno == EWOULDBLOCK || errno == EINTR));
 
     if (num_events < 0) {
       err.SetError(errno, DNBError::MachKernel);
@@ -291,7 +296,12 @@ rnb_err_t RNBSocket::Read(std::string &p) {
   // DNBLogThreadedIf(LOG_RNB_COMM, "%8u RNBSocket::%s calling read()",
   // (uint32_t)m_timer.ElapsedMicroSeconds(true), __FUNCTION__);
   DNBError err;
-  ssize_t bytesread = read(m_fd, buf, sizeof(buf));
+  ssize_t bytesread;
+  do {
+    errno = 0;
+    bytesread = read(m_fd, buf, sizeof(buf));
+  } while (bytesread == -1 &&
+           (errno == EAGAIN || errno == EWOULDBLOCK || errno == EINTR));
   if (bytesread <= 0)
     err.SetError(errno, DNBError::POSIX);
   else
-- 
cgit v1.2.3


From fa1d13590cfd0be77c452cca929bc32efb456627 Mon Sep 17 00:00:00 2001
From: Jake Egan <jake.egan@ibm.com>
Date: Tue, 12 Mar 2024 12:03:42 -0400
Subject: [AIX][tests] Disable failing tests on AIX

These new tests are failing on the AIX bot because the -I option isn't supported.

Disable these tests for now until they can be fixed.
---
 llvm/test/CodeGen/AMDGPU/lds-run-twice-absolute-md.ll | 2 ++
 llvm/test/CodeGen/AMDGPU/lds-run-twice.ll             | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/llvm/test/CodeGen/AMDGPU/lds-run-twice-absolute-md.ll b/llvm/test/CodeGen/AMDGPU/lds-run-twice-absolute-md.ll
index 52b44eea35c8..51e10d903797 100644
--- a/llvm/test/CodeGen/AMDGPU/lds-run-twice-absolute-md.ll
+++ b/llvm/test/CodeGen/AMDGPU/lds-run-twice-absolute-md.ll
@@ -1,3 +1,5 @@
+; XFAIL: target={{.*}}-aix{{.*}}
+
 ; RUN: opt -S -mtriple=amdgcn-- -amdgpu-lower-module-lds %s -o %t.ll
 ; RUN: opt -S -mtriple=amdgcn-- -amdgpu-lower-module-lds %t.ll -o %t.second.ll
 ; RUN: diff -ub %t.ll %t.second.ll -I ".*ModuleID.*"
diff --git a/llvm/test/CodeGen/AMDGPU/lds-run-twice.ll b/llvm/test/CodeGen/AMDGPU/lds-run-twice.ll
index b830ccb944a2..e121f0da327d 100644
--- a/llvm/test/CodeGen/AMDGPU/lds-run-twice.ll
+++ b/llvm/test/CodeGen/AMDGPU/lds-run-twice.ll
@@ -1,3 +1,5 @@
+; XFAIL: target={{.*}}-aix{{.*}}
+
 ; RUN: opt -S -mtriple=amdgcn-- -amdgpu-lower-module-lds %s -o %t.ll
 ; RUN: opt -S -mtriple=amdgcn-- -amdgpu-lower-module-lds %t.ll -o %t.second.ll
 ; RUN: diff -ub %t.ll %t.second.ll -I ".*ModuleID.*"
-- 
cgit v1.2.3


From 65eea3e5dc907c3059c57ab4d16770522c5b9fb0 Mon Sep 17 00:00:00 2001
From: Mark de Wever <koraq@xs4all.nl>
Date: Tue, 12 Mar 2024 17:26:39 +0100
Subject: [libc++][TZDB] Fixes parsing interleaved rules. (#84808)

Typically the rules in the database are contiguous, but that is not a
requirement. This fixes the case when they are not.

---------

Co-authored-by: Louis Dionne <ldionne.2@gmail.com>
---
 libcxx/src/tzdb.cpp                                | 27 ++++++++++++++++++----
 .../time/time.zone/time.zone.db/rules.pass.cpp     | 24 +++++++++++++++++++
 2 files changed, 47 insertions(+), 4 deletions(-)

diff --git a/libcxx/src/tzdb.cpp b/libcxx/src/tzdb.cpp
index 2bb801e48694..0307f754caab 100644
--- a/libcxx/src/tzdb.cpp
+++ b/libcxx/src/tzdb.cpp
@@ -511,14 +511,33 @@ static string __parse_version(istream& __input) {
   return chrono::__parse_string(__input);
 }
 
+[[nodiscard]]
+static __tz::__rule& __create_entry(__tz::__rules_storage_type& __rules, const string& __name) {
+  auto __result = [&]() -> __tz::__rule& {
+    auto& __rule = __rules.emplace_back(__name, vector<__tz::__rule>{});
+    return __rule.second.emplace_back();
+  };
+
+  if (__rules.empty())
+    return __result();
+
+  // Typically rules are in contiguous order in the database.
+  // But there are exceptions, some rules are interleaved.
+  if (__rules.back().first == __name)
+    return __rules.back().second.emplace_back();
+
+  if (auto __it = ranges::find(__rules, __name, [](const auto& __r) { return __r.first; });
+      __it != ranges::end(__rules))
+    return __it->second.emplace_back();
+
+  return __result();
+}
+
 static void __parse_rule(tzdb& __tzdb, __tz::__rules_storage_type& __rules, istream& __input) {
   chrono::__skip_mandatory_whitespace(__input);
   string __name = chrono::__parse_string(__input);
 
-  if (__rules.empty() || __rules.back().first != __name)
-    __rules.emplace_back(__name, vector<__tz::__rule>{});
-
-  __tz::__rule& __rule = __rules.back().second.emplace_back();
+  __tz::__rule& __rule = __create_entry(__rules, __name);
 
   chrono::__skip_mandatory_whitespace(__input);
   __rule.__from = chrono::__parse_year(__input);
diff --git a/libcxx/test/libcxx/time/time.zone/time.zone.db/rules.pass.cpp b/libcxx/test/libcxx/time/time.zone/time.zone.db/rules.pass.cpp
index 5ae2ed1e91eb..4814f4aad87f 100644
--- a/libcxx/test/libcxx/time/time.zone/time.zone.db/rules.pass.cpp
+++ b/libcxx/test/libcxx/time/time.zone/time.zone.db/rules.pass.cpp
@@ -550,6 +550,29 @@ R a 0 1 - Ja Su>=31 1w 2s abc
   assert(result.rules[0].second[2].__letters == "abc");
 }
 
+static void test_mixed_order() {
+  // This is a part of the real database. The interesting part is that the
+  // rules NZ and Chatham are interleaved. Make sure the parse algorithm
+  // handles this correctly.
+  parse_result result{
+      R"(
+# Since 1957 Chatham has been 45 minutes ahead of NZ, but until 2018a
+# there was no documented single notation for the date and time of this
+# transition.  Duplicate the Rule lines for now, to give the 2018a change
+# time to percolate out.
+Rule NZ  1974    only    -   Nov Sun>=1  2:00s   1:00    D
+Rule Chatham 1974    only    -   Nov Sun>=1  2:45s   1:00    -
+Rule NZ  1975    only    -   Feb lastSun 2:00s   0   S
+Rule Chatham 1975    only    -   Feb lastSun 2:45s   0   -
+Rule NZ  1975    1988    -   Oct lastSun 2:00s   1:00    D
+Rule Chatham 1975    1988    -   Oct lastSun 2:45s   1:00    -
+)"};
+
+  assert(result.rules.size() == 2);
+  assert(result.rules[0].second.size() == 3);
+  assert(result.rules[1].second.size() == 3);
+}
+
 int main(int, const char**) {
   test_invalid();
   test_name();
@@ -560,6 +583,7 @@ int main(int, const char**) {
   test_at();
   test_save();
   test_letter();
+  test_mixed_order();
 
   return 0;
 }
-- 
cgit v1.2.3


From af21659c8c5c1d16b9bc5e745aaaf49b322f64d7 Mon Sep 17 00:00:00 2001
From: Mark de Wever <koraq@xs4all.nl>
Date: Tue, 12 Mar 2024 17:28:15 +0100
Subject: [libc++][CI] Installs tzdata package in Docker. (#84643)

This allows testing the time zone information in the CI. This is needed
to let https://github.com/llvm/llvm-project/pull/82108 pass the CI.
---
 libcxx/utils/ci/Dockerfile | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/libcxx/utils/ci/Dockerfile b/libcxx/utils/ci/Dockerfile
index 225de937cc86..178cba415933 100644
--- a/libcxx/utils/ci/Dockerfile
+++ b/libcxx/utils/ci/Dockerfile
@@ -65,6 +65,12 @@ RUN <<EOF
   echo "ALL ALL = (ALL) NOPASSWD: ALL" | tee /etc/sudoers || true
 EOF
 
+# Installing tzdata before other packages avoids the time zone prompts.
+# These prompts seem to ignore DEBIAN_FRONTEND=noninteractive.
+RUN sudo apt-get update \
+    && sudo apt-get install -y \
+        tzdata
+
 RUN sudo apt-get update \
     && sudo apt-get install -y \
         python3 \
-- 
cgit v1.2.3


From fe8cf2fc9090d3d56fa1a6bb4b70bd38277874eb Mon Sep 17 00:00:00 2001
From: Balazs Benics <benicsbalazs@gmail.com>
Date: Tue, 12 Mar 2024 17:31:22 +0100
Subject: [clang][ASTMatchers] Fix forEachArgumentWithParam* for deducing
 "this" operator calls (#84887)

This is a follow-up commit of #84446.
In this patch, I demonstrate that `forEachArgumentWithParam` and
`forEachArgumentWithParamType` did not correctly handle the presence of
the explicit object parameter for operator calls.

Prior to this patch, the matcher would skip the first (and only)
argument of the operator call if the explicit object param was used.

Note that I had to move the definition of
`isExplicitObjectMemberFunction`, to be declared before the matcher I
fix to be visible.

I also had to do some gymnastics for passing the language standard
version command-line flags to the invocation as
`matchAndVerifyResultTrue` wasn't really considered for non-c++11 code.
See the that it always prepends `-std=gnu++11` to the command-line
arguments. I workarounded it by accepting extra args, which get
appended, thus possibly overriding the hardcoded arguments.

I'm not sure if this qualifies for backporting to clang-18 (probably not
because its not a crash, but a semantic problem), but I figure it might
be useful for some vendors (like us).
But we are also happy to cherry-pick this fix to downstream. Let me know
if you want this to be backported or not.

CPP-5074
---
 clang/docs/ReleaseNotes.rst                        |  2 +
 clang/include/clang/ASTMatchers/ASTMatchers.h      | 59 ++++++++++----------
 clang/unittests/ASTMatchers/ASTMatchersTest.h      | 36 ++++++++-----
 .../ASTMatchers/ASTMatchersTraversalTest.cpp       | 63 ++++++++++++++++++++++
 4 files changed, 119 insertions(+), 41 deletions(-)

diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 6c30af304d98..2842b63197ff 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -450,6 +450,8 @@ AST Matchers
 
 - ``isInStdNamespace`` now supports Decl declared with ``extern "C++"``.
 - Add ``isExplicitObjectMemberFunction``.
+- Fixed ``forEachArgumentWithParam`` and ``forEachArgumentWithParamType`` to
+  not skip the explicit object parameter for operator calls.
 
 clang-format
 ------------
diff --git a/clang/include/clang/ASTMatchers/ASTMatchers.h b/clang/include/clang/ASTMatchers/ASTMatchers.h
index 96dbcdc344e1..2f71053d030f 100644
--- a/clang/include/clang/ASTMatchers/ASTMatchers.h
+++ b/clang/include/clang/ASTMatchers/ASTMatchers.h
@@ -5032,6 +5032,25 @@ AST_POLYMORPHIC_MATCHER_P2(hasParameter,
           && InnerMatcher.matches(*Node.parameters()[N], Finder, Builder));
 }
 
+/// Matches if the given method declaration declares a member function with an
+/// explicit object parameter.
+///
+/// Given
+/// \code
+/// struct A {
+///  int operator-(this A, int);
+///  void fun(this A &&self);
+///  static int operator()(int);
+///  int operator+(int);
+/// };
+/// \endcode
+///
+/// cxxMethodDecl(isExplicitObjectMemberFunction()) matches the first two
+/// methods but not the last two.
+AST_MATCHER(CXXMethodDecl, isExplicitObjectMemberFunction) {
+  return Node.isExplicitObjectMemberFunction();
+}
+
 /// Matches all arguments and their respective ParmVarDecl.
 ///
 /// Given
@@ -5060,10 +5079,12 @@ AST_POLYMORPHIC_MATCHER_P2(forEachArgumentWithParam,
   // argument of the method which should not be matched against a parameter, so
   // we skip over it here.
   BoundNodesTreeBuilder Matches;
-  unsigned ArgIndex = cxxOperatorCallExpr(callee(cxxMethodDecl()))
-                              .matches(Node, Finder, &Matches)
-                          ? 1
-                          : 0;
+  unsigned ArgIndex =
+      cxxOperatorCallExpr(
+          callee(cxxMethodDecl(unless(isExplicitObjectMemberFunction()))))
+              .matches(Node, Finder, &Matches)
+          ? 1
+          : 0;
   int ParamIndex = 0;
   bool Matched = false;
   for (; ArgIndex < Node.getNumArgs(); ++ArgIndex) {
@@ -5121,11 +5142,12 @@ AST_POLYMORPHIC_MATCHER_P2(forEachArgumentWithParamType,
   // argument of the method which should not be matched against a parameter, so
   // we skip over it here.
   BoundNodesTreeBuilder Matches;
-  unsigned ArgIndex = cxxOperatorCallExpr(callee(cxxMethodDecl()))
-                              .matches(Node, Finder, &Matches)
-                          ? 1
-                          : 0;
-
+  unsigned ArgIndex =
+      cxxOperatorCallExpr(
+          callee(cxxMethodDecl(unless(isExplicitObjectMemberFunction()))))
+              .matches(Node, Finder, &Matches)
+          ? 1
+          : 0;
   const FunctionProtoType *FProto = nullptr;
 
   if (const auto *Call = dyn_cast<CallExpr>(&Node)) {
@@ -6366,25 +6388,6 @@ AST_MATCHER(CXXMethodDecl, isConst) {
   return Node.isConst();
 }
 
-/// Matches if the given method declaration declares a member function with an
-/// explicit object parameter.
-///
-/// Given
-/// \code
-/// struct A {
-///  int operator-(this A, int);
-///  void fun(this A &&self);
-///  static int operator()(int);
-///  int operator+(int);
-/// };
-/// \endcode
-///
-/// cxxMethodDecl(isExplicitObjectMemberFunction()) matches the first two
-/// methods but not the last two.
-AST_MATCHER(CXXMethodDecl, isExplicitObjectMemberFunction) {
-  return Node.isExplicitObjectMemberFunction();
-}
-
 /// Matches if the given method declaration declares a copy assignment
 /// operator.
 ///
diff --git a/clang/unittests/ASTMatchers/ASTMatchersTest.h b/clang/unittests/ASTMatchers/ASTMatchersTest.h
index 1ed1b5958a8b..e98129953157 100644
--- a/clang/unittests/ASTMatchers/ASTMatchersTest.h
+++ b/clang/unittests/ASTMatchers/ASTMatchersTest.h
@@ -293,7 +293,8 @@ testing::AssertionResult notMatchesWithOpenMP51(const Twine &Code,
 template <typename T>
 testing::AssertionResult matchAndVerifyResultConditionally(
     const Twine &Code, const T &AMatcher,
-    std::unique_ptr<BoundNodesCallback> FindResultVerifier, bool ExpectResult) {
+    std::unique_ptr<BoundNodesCallback> FindResultVerifier, bool ExpectResult,
+    ArrayRef<std::string> Args = {}, StringRef Filename = "input.cc") {
   bool VerifiedResult = false;
   MatchFinder Finder;
   VerifyMatch VerifyVerifiedResult(std::move(FindResultVerifier),
@@ -304,9 +305,13 @@ testing::AssertionResult matchAndVerifyResultConditionally(
   // Some tests use typeof, which is a gnu extension.  Using an explicit
   // unknown-unknown triple is good for a large speedup, because it lets us
   // avoid constructing a full system triple.
-  std::vector<std::string> Args = {"-std=gnu++11", "-target",
-                                   "i386-unknown-unknown"};
-  if (!runToolOnCodeWithArgs(Factory->create(), Code, Args)) {
+  std::vector<std::string> CompileArgs = {"-std=gnu++11", "-target",
+                                          "i386-unknown-unknown"};
+  // Append additional arguments at the end to allow overriding the default
+  // choices that we made above.
+  llvm::copy(Args, std::back_inserter(CompileArgs));
+
+  if (!runToolOnCodeWithArgs(Factory->create(), Code, CompileArgs, Filename)) {
     return testing::AssertionFailure() << "Parsing error in \"" << Code << "\"";
   }
   if (!VerifiedResult && ExpectResult) {
@@ -319,8 +324,8 @@ testing::AssertionResult matchAndVerifyResultConditionally(
 
   VerifiedResult = false;
   SmallString<256> Buffer;
-  std::unique_ptr<ASTUnit> AST(
-      buildASTFromCodeWithArgs(Code.toStringRef(Buffer), Args));
+  std::unique_ptr<ASTUnit> AST(buildASTFromCodeWithArgs(
+      Code.toStringRef(Buffer), CompileArgs, Filename));
   if (!AST.get())
     return testing::AssertionFailure()
            << "Parsing error in \"" << Code << "\" while building AST";
@@ -339,19 +344,24 @@ testing::AssertionResult matchAndVerifyResultConditionally(
 // FIXME: Find better names for these functions (or document what they
 // do more precisely).
 template <typename T>
-testing::AssertionResult matchAndVerifyResultTrue(
-    const Twine &Code, const T &AMatcher,
-    std::unique_ptr<BoundNodesCallback> FindResultVerifier) {
-  return matchAndVerifyResultConditionally(Code, AMatcher,
-                                           std::move(FindResultVerifier), true);
+testing::AssertionResult
+matchAndVerifyResultTrue(const Twine &Code, const T &AMatcher,
+                         std::unique_ptr<BoundNodesCallback> FindResultVerifier,
+                         ArrayRef<std::string> Args = {},
+                         StringRef Filename = "input.cc") {
+  return matchAndVerifyResultConditionally(
+      Code, AMatcher, std::move(FindResultVerifier),
+      /*ExpectResult=*/true, Args, Filename);
 }
 
 template <typename T>
 testing::AssertionResult matchAndVerifyResultFalse(
     const Twine &Code, const T &AMatcher,
-    std::unique_ptr<BoundNodesCallback> FindResultVerifier) {
+    std::unique_ptr<BoundNodesCallback> FindResultVerifier,
+    ArrayRef<std::string> Args = {}, StringRef Filename = "input.cc") {
   return matchAndVerifyResultConditionally(
-      Code, AMatcher, std::move(FindResultVerifier), false);
+      Code, AMatcher, std::move(FindResultVerifier),
+      /*ExpectResult=*/false, Args, Filename);
 }
 
 // Implements a run method that returns whether BoundNodes contains a
diff --git a/clang/unittests/ASTMatchers/ASTMatchersTraversalTest.cpp b/clang/unittests/ASTMatchers/ASTMatchersTraversalTest.cpp
index 6911d7600a71..f198dc71eb83 100644
--- a/clang/unittests/ASTMatchers/ASTMatchersTraversalTest.cpp
+++ b/clang/unittests/ASTMatchers/ASTMatchersTraversalTest.cpp
@@ -985,6 +985,38 @@ TEST(ForEachArgumentWithParam, HandlesBoundNodesForNonMatches) {
     std::make_unique<VerifyIdIsBoundTo<VarDecl>>("v", 4)));
 }
 
+TEST_P(ASTMatchersTest,
+       ForEachArgumentWithParamMatchesExplicitObjectParamOnOperatorCalls) {
+  if (!GetParam().isCXX23OrLater()) {
+    return;
+  }
+
+  auto DeclRef = declRefExpr(to(varDecl().bind("declOfArg"))).bind("arg");
+  auto SelfParam = parmVarDecl().bind("param");
+  StatementMatcher CallExpr =
+      callExpr(forEachArgumentWithParam(DeclRef, SelfParam));
+
+  StringRef S = R"cpp(
+  struct A {
+    int operator()(this const A &self);
+  };
+  A obj;
+  int global = obj();
+  )cpp";
+
+  auto Args = GetParam().getCommandLineArgs();
+  auto Filename = getFilenameForTesting(GetParam().Language);
+
+  EXPECT_TRUE(matchAndVerifyResultTrue(
+      S, CallExpr,
+      std::make_unique<VerifyIdIsBoundTo<ParmVarDecl>>("param", "self"), Args,
+      Filename));
+  EXPECT_TRUE(matchAndVerifyResultTrue(
+      S, CallExpr,
+      std::make_unique<VerifyIdIsBoundTo<VarDecl>>("declOfArg", "obj"), Args,
+      Filename));
+}
+
 TEST(ForEachArgumentWithParamType, ReportsNoFalsePositives) {
   StatementMatcher ArgumentY =
       declRefExpr(to(varDecl(hasName("y")))).bind("arg");
@@ -1168,6 +1200,37 @@ TEST(ForEachArgumentWithParamType, MatchesVariadicFunctionPtrCalls) {
       S, CallExpr, std::make_unique<VerifyIdIsBoundTo<DeclRefExpr>>("arg")));
 }
 
+TEST_P(ASTMatchersTest,
+       ForEachArgumentWithParamTypeMatchesExplicitObjectParamOnOperatorCalls) {
+  if (!GetParam().isCXX23OrLater()) {
+    return;
+  }
+
+  auto DeclRef = declRefExpr(to(varDecl().bind("declOfArg"))).bind("arg");
+  auto SelfTy = qualType(asString("const A &")).bind("selfType");
+  StatementMatcher CallExpr =
+      callExpr(forEachArgumentWithParamType(DeclRef, SelfTy));
+
+  StringRef S = R"cpp(
+  struct A {
+    int operator()(this const A &self);
+  };
+  A obj;
+  int global = obj();
+  )cpp";
+
+  auto Args = GetParam().getCommandLineArgs();
+  auto Filename = getFilenameForTesting(GetParam().Language);
+
+  EXPECT_TRUE(matchAndVerifyResultTrue(
+      S, CallExpr, std::make_unique<VerifyIdIsBoundTo<QualType>>("selfType"),
+      Args, Filename));
+  EXPECT_TRUE(matchAndVerifyResultTrue(
+      S, CallExpr,
+      std::make_unique<VerifyIdIsBoundTo<VarDecl>>("declOfArg", "obj"), Args,
+      Filename));
+}
+
 TEST(QualType, hasCanonicalType) {
   EXPECT_TRUE(notMatches("typedef int &int_ref;"
                            "int a;"
-- 
cgit v1.2.3


From c8cc7903b373589cc85271987980ae277145df7c Mon Sep 17 00:00:00 2001
From: XChy <xxs_chy@outlook.com>
Date: Wed, 13 Mar 2024 00:33:50 +0800
Subject: [SelectionDAG] Replace some basic patterns in visitADDLike with
 SDPatternMatch (#84759)

Resolves #84745.

Based on SDPatternMatch introduced by #78654, this patch replaces some
of basic patterns in `visitADDLike` with corresponding patterns in
SDPatternMatch.

This patch only replaces original folds, instead of introducing new ones.
---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 55 ++++++++++++---------------
 1 file changed, 25 insertions(+), 30 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 5476ef879714..735cec8ecc06 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -38,6 +38,7 @@
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineMemOperand.h"
 #include "llvm/CodeGen/RuntimeLibcalls.h"
+#include "llvm/CodeGen/SDPatternMatch.h"
 #include "llvm/CodeGen/SelectionDAG.h"
 #include "llvm/CodeGen/SelectionDAGAddressAnalysis.h"
 #include "llvm/CodeGen/SelectionDAGNodes.h"
@@ -79,6 +80,7 @@
 #include "MatchContext.h"
 
 using namespace llvm;
+using namespace llvm::SDPatternMatch;
 
 #define DEBUG_TYPE "dagcombine"
 
@@ -2697,52 +2699,45 @@ SDValue DAGCombiner::visitADDLike(SDNode *N) {
             reassociateReduction(ISD::VECREDUCE_ADD, ISD::ADD, DL, VT, N0, N1))
       return SD;
   }
+
+  SDValue A, B, C;
+
   // fold ((0-A) + B) -> B-A
-  if (N0.getOpcode() == ISD::SUB && isNullOrNullSplat(N0.getOperand(0)))
-    return DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
+  if (sd_match(N0, m_Sub(m_Zero(), m_Value(A))))
+    return DAG.getNode(ISD::SUB, DL, VT, N1, A);
 
   // fold (A + (0-B)) -> A-B
-  if (N1.getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0)))
-    return DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(1));
+  if (sd_match(N1, m_Sub(m_Zero(), m_Value(B))))
+    return DAG.getNode(ISD::SUB, DL, VT, N0, B);
 
   // fold (A+(B-A)) -> B
-  if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(1))
-    return N1.getOperand(0);
+  if (sd_match(N1, m_Sub(m_Value(B), m_Specific(N0))))
+    return B;
 
   // fold ((B-A)+A) -> B
-  if (N0.getOpcode() == ISD::SUB && N1 == N0.getOperand(1))
-    return N0.getOperand(0);
+  if (sd_match(N0, m_Sub(m_Value(B), m_Specific(N1))))
+    return B;
 
   // fold ((A-B)+(C-A)) -> (C-B)
-  if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB &&
-      N0.getOperand(0) == N1.getOperand(1))
-    return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
-                       N0.getOperand(1));
+  if (sd_match(N0, m_Sub(m_Value(A), m_Value(B))) &&
+      sd_match(N1, m_Sub(m_Value(C), m_Specific(A))))
+    return DAG.getNode(ISD::SUB, DL, VT, C, B);
 
   // fold ((A-B)+(B-C)) -> (A-C)
-  if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB &&
-      N0.getOperand(1) == N1.getOperand(0))
-    return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),
-                       N1.getOperand(1));
+  if (sd_match(N0, m_Sub(m_Value(A), m_Value(B))) &&
+      sd_match(N1, m_Sub(m_Specific(B), m_Value(C))))
+    return DAG.getNode(ISD::SUB, DL, VT, A, C);
 
   // fold (A+(B-(A+C))) to (B-C)
-  if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
-      N0 == N1.getOperand(1).getOperand(0))
-    return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
-                       N1.getOperand(1).getOperand(1));
-
   // fold (A+(B-(C+A))) to (B-C)
-  if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
-      N0 == N1.getOperand(1).getOperand(1))
-    return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
-                       N1.getOperand(1).getOperand(0));
+  if (sd_match(N1, m_Sub(m_Value(B), m_Add(m_Specific(N0), m_Value(C)))))
+    return DAG.getNode(ISD::SUB, DL, VT, B, C);
 
   // fold (A+((B-A)+or-C)) to (B+or-C)
-  if ((N1.getOpcode() == ISD::SUB || N1.getOpcode() == ISD::ADD) &&
-      N1.getOperand(0).getOpcode() == ISD::SUB &&
-      N0 == N1.getOperand(0).getOperand(1))
-    return DAG.getNode(N1.getOpcode(), DL, VT, N1.getOperand(0).getOperand(0),
-                       N1.getOperand(1));
+  if (sd_match(N1,
+               m_AnyOf(m_Add(m_Sub(m_Value(B), m_Specific(N0)), m_Value(C)),
+                       m_Sub(m_Sub(m_Value(B), m_Specific(N0)), m_Value(C)))))
+    return DAG.getNode(N1.getOpcode(), DL, VT, B, C);
 
   // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant
   if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB &&
-- 
cgit v1.2.3


From bd72ebd8d1ec3e97ca666623aa628d653a1d54a5 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Tue, 12 Mar 2024 22:05:47 +0530
Subject: AMDGPU: Add some more mfma hazard recognizer tests (#84727)

---
 llvm/test/CodeGen/AMDGPU/mai-hazards-gfx940.mir | 454 ++++++++++++++++++++++++
 1 file changed, 454 insertions(+)

diff --git a/llvm/test/CodeGen/AMDGPU/mai-hazards-gfx940.mir b/llvm/test/CodeGen/AMDGPU/mai-hazards-gfx940.mir
index 4d307a444b19..a98b02d792d9 100644
--- a/llvm/test/CodeGen/AMDGPU/mai-hazards-gfx940.mir
+++ b/llvm/test/CodeGen/AMDGPU/mai-hazards-gfx940.mir
@@ -2028,3 +2028,457 @@ body:             |
     $agpr0_agpr1 = V_MFMA_F64_4X4X4F64_e64 $agpr0_agpr1, $agpr0_agpr1, $agpr0_agpr1, 0, 0, 0, implicit $mode, implicit $exec
     BUFFER_STORE_DWORDX2_OFFEN_exact $vgpr2_vgpr3, $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec
 ...
+
+...
+# 2 pass source
+# GCN-LABEL: name: xdl_mfma_2pass_write_vgpr_xdl_mfma_read_overlap_srcc
+# GCN:      V_MFMA
+# GCN-NEXT: S_NOP 2
+# GCN-NEXT: V_MFMA
+name:            xdl_mfma_2pass_write_vgpr_xdl_mfma_read_overlap_srcc
+body:             |
+  bb.0:
+
+    $vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_F32_4X4X4F16_vgprcd_e64 $vgpr4_vgpr5, $vgpr6_vgpr7, $vgpr0_vgpr1_vgpr2_vgpr3, 1, 2, 3, implicit $mode, implicit $exec
+    $vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_F32_4X4X4F16_vgprcd_e64 $vgpr4_vgpr5, $vgpr6_vgpr7, $vgpr2_vgpr3_vgpr4_vgpr5, 1, 2, 3, implicit $mode, implicit $exec
+
+...
+
+...
+# 2 pass source
+# GCN-LABEL: name: xdl_mfma_2pass_write_vgpr_xdl_mfma_read_overlap_srca
+# GCN:      V_MFMA
+# GCN-NEXT: S_NOP 4
+# GCN-NEXT: V_MFMA
+name:            xdl_mfma_2pass_write_vgpr_xdl_mfma_read_overlap_srca
+body:             |
+  bb.0:
+
+    $vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_F32_4X4X4F16_vgprcd_e64 $vgpr4_vgpr5, $vgpr6_vgpr7, $vgpr0_vgpr1_vgpr2_vgpr3, 1, 2, 3, implicit $mode, implicit $exec
+    $vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_F32_4X4X4F16_vgprcd_e64 $vgpr0_vgpr1, $vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10_vgpr11, 1, 2, 3, implicit $mode, implicit $exec
+
+...
+
+...
+# 2 pass source
+# GCN-LABEL: name: xdl_mfma_2pass_write_vgpr_xdl_mfma_read_overlap_srcb
+# GCN:      V_MFMA
+# GCN-NEXT: S_NOP 4
+# GCN-NEXT: V_MFMA
+name:            xdl_mfma_2pass_write_vgpr_xdl_mfma_read_overlap_srcb
+body:             |
+  bb.0:
+
+    $vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_F32_4X4X4F16_vgprcd_e64 $vgpr4_vgpr5, $vgpr6_vgpr7, $vgpr0_vgpr1_vgpr2_vgpr3, 1, 2, 3, implicit $mode, implicit $exec
+    $vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_F32_4X4X4F16_vgprcd_e64 $vgpr6_vgpr7, $vgpr2_vgpr3, $vgpr8_vgpr9_vgpr10_vgpr11, 1, 2, 3, implicit $mode, implicit $exec
+
+...
+
+...
+# 4 pass source
+# GCN-LABEL: name: xdl_mfma_4pass_write_vgpr_xdl_mfma_read_overlap_srcc
+# GCN:      V_MFMA
+# GCN-NEXT: S_NOP 4
+# GCN-NEXT: V_MFMA
+name:            xdl_mfma_4pass_write_vgpr_xdl_mfma_read_overlap_srcc
+body:             |
+  bb.0:
+    $vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_F32_16X16X16F16_vgprcd_e64 $vgpr4_vgpr5, $vgpr6_vgpr7, $vgpr0_vgpr1_vgpr2_vgpr3, 1, 2, 3, implicit $mode, implicit $exec
+    $vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_F32_16X16X16F16_vgprcd_e64 $vgpr8_vgpr9, $vgpr10_vgpr11, $vgpr2_vgpr3_vgpr4_vgpr5, 1, 2, 3, implicit $mode, implicit $exec
+
+...
+
+...
+# 4 pass source
+# GCN-LABEL: name: xdl_mfma_4pass_write_vgpr_xdl_mfma_read_overlap_srca
+# GCN:      V_MFMA
+# GCN-NEXT: S_NOP 6
+# GCN-NEXT: V_MFMA
+name:            xdl_mfma_4pass_write_vgpr_xdl_mfma_read_overlap_srca
+body:             |
+  bb.0:
+    $vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_F32_16X16X16F16_vgprcd_e64 $vgpr4_vgpr5, $vgpr6_vgpr7, $vgpr0_vgpr1_vgpr2_vgpr3, 1, 2, 3, implicit $mode, implicit $exec
+    $vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_F32_16X16X16F16_vgprcd_e64 $vgpr2_vgpr3, $vgpr10_vgpr11, $vgpr6_vgpr7_vgpr8_vgpr9, 1, 2, 3, implicit $mode, implicit $exec
+
+...
+
+...
+# 4 pass source
+# GCN-LABEL: name: xdl_mfma_4pass_write_vgpr_xdl_mfma_read_overlap_srcb
+# GCN:      V_MFMA
+# GCN-NEXT: S_NOP 6
+# GCN-NEXT: V_MFMA
+name:            xdl_mfma_4pass_write_vgpr_xdl_mfma_read_overlap_srcb
+body:             |
+  bb.0:
+    $vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_F32_16X16X16F16_vgprcd_e64 $vgpr4_vgpr5, $vgpr6_vgpr7, $vgpr0_vgpr1_vgpr2_vgpr3, 1, 2, 3, implicit $mode, implicit $exec
+    $vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_F32_16X16X16F16_vgprcd_e64 $vgpr10_vgpr11, $vgpr2_vgpr3, $vgpr6_vgpr7_vgpr8_vgpr9, 1, 2, 3, implicit $mode, implicit $exec
+
+...
+
+...
+# 2 pass source
+# GCN-LABEL: name: xdl_mfma_2pass_write_vgpr_sgemm_mfma_read_overlap_srcc
+# GCN:      V_MFMA
+# GCN-NEXT: S_NOP 2
+# GCN-NEXT: V_MFMA
+name:            xdl_mfma_2pass_write_vgpr_sgemm_mfma_read_overlap_srcc
+body:             |
+  bb.0:
+
+    $vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_F32_4X4X4F16_vgprcd_e64 $vgpr4_vgpr5, $vgpr6_vgpr7, $vgpr0_vgpr1_vgpr2_vgpr3, 1, 2, 3, implicit $mode, implicit $exec
+    $vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_F32_4X4X1F32_vgprcd_e64 $vgpr6, $vgpr8, $vgpr2_vgpr3_vgpr4_vgpr5, 0, 0, 0, implicit $mode, implicit $exec
+
+...
+
+...
+# 2 pass source
+# GCN-LABEL: name: xdl_mfma_2pass_write_vgpr_sgemm_mfma_read_overlap_srca
+# GCN:      V_MFMA
+# GCN-NEXT: S_NOP 4
+# GCN-NEXT: V_MFMA
+name:            xdl_mfma_2pass_write_vgpr_sgemm_mfma_read_overlap_srca
+body:             |
+  bb.0:
+
+    $vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_F32_4X4X4F16_vgprcd_e64 $vgpr4_vgpr5, $vgpr6_vgpr7, $vgpr0_vgpr1_vgpr2_vgpr3, 1, 2, 3, implicit $mode, implicit $exec
+    $vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_F32_4X4X1F32_vgprcd_e64 $vgpr1, $vgpr8, $vgpr6_vgpr7_vgpr8_vgpr9, 0, 0, 0, implicit $mode, implicit $exec
+
+...
+
+...
+# 2 pass source
+# GCN-LABEL: name: xdl_mfma_2pass_write_vgpr_sgemm_mfma_read_overlap_srcb
+# GCN:      V_MFMA
+# GCN-NEXT: S_NOP 4
+# GCN-NEXT: V_MFMA
+name:            xdl_mfma_2pass_write_vgpr_sgemm_mfma_read_overlap_srcb
+body:             |
+  bb.0:
+
+    $vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_F32_4X4X4F16_vgprcd_e64 $vgpr4_vgpr5, $vgpr6_vgpr7, $vgpr0_vgpr1_vgpr2_vgpr3, 1, 2, 3, implicit $mode, implicit $exec
+    $vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_F32_4X4X1F32_vgprcd_e64 $vgpr8, $vgpr1, $vgpr6_vgpr7_vgpr8_vgpr9, 0, 0, 0, implicit $mode, implicit $exec
+
+...
+
+...
+# 4 pass source
+# GCN-LABEL: name: xdl_mfma_4pass_write_vgpr_sgemm_mfma_read_overlap_srcc
+# GCN:      V_MFMA
+# GCN-NEXT: S_NOP 4
+# GCN-NEXT: V_MFMA
+name:            xdl_mfma_4pass_write_vgpr_sgemm_mfma_read_overlap_srcc
+body:             |
+  bb.0:
+    $vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_F32_16X16X16F16_vgprcd_e64 $vgpr4_vgpr5, $vgpr6_vgpr7, $vgpr0_vgpr1_vgpr2_vgpr3, 1, 2, 3, implicit $mode, implicit $exec
+    $vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_F32_4X4X1F32_vgprcd_e64 $vgpr8, $vgpr9, $vgpr2_vgpr3_vgpr4_vgpr5, 0, 0, 0, implicit $mode, implicit $exec
+
+...
+
+...
+# 4 pass source
+# GCN-LABEL: name: xdl_mfma_4pass_write_vgpr_sgemm_mfma_read_overlap_srca
+# GCN:      V_MFMA
+# GCN-NEXT: S_NOP 6
+# GCN-NEXT: V_MFMA
+name:            xdl_mfma_4pass_write_vgpr_sgemm_mfma_read_overlap_srca
+body:             |
+  bb.0:
+    $vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_F32_16X16X16F16_vgprcd_e64 $vgpr4_vgpr5, $vgpr6_vgpr7, $vgpr0_vgpr1_vgpr2_vgpr3, 1, 2, 3, implicit $mode, implicit $exec
+    $vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_F32_4X4X1F32_vgprcd_e64 $vgpr1, $vgpr8, $vgpr6_vgpr7_vgpr8_vgpr9, 0, 0, 0, implicit $mode, implicit $exec
+
+...
+
+...
+# 4 pass source
+# GCN-LABEL: name: xdl_mfma_4pass_write_vgpr_sgemm_mfma_read_overlap_srcb
+# GCN:      V_MFMA
+# GCN-NEXT: S_NOP 6
+# GCN-NEXT: V_MFMA
+name:            xdl_mfma_4pass_write_vgpr_sgemm_mfma_read_overlap_srcb
+body:             |
+  bb.0:
+    $vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_F32_16X16X16F16_vgprcd_e64 $vgpr4_vgpr5, $vgpr6_vgpr7, $vgpr0_vgpr1_vgpr2_vgpr3, 1, 2, 3, implicit $mode, implicit $exec
+    $vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_F32_4X4X1F32_vgprcd_e64 $vgpr8, $vgpr1, $vgpr6_vgpr7_vgpr8_vgpr9, 0, 0, 0, implicit $mode, implicit $exec
+
+...
+
+...
+# 8 pass source
+# GCN-LABEL: name: xdl_mfma_8pass_write_vgpr_nonxdl_sgemm_mfma_read_overlap_srcc
+# GCN:      V_MFMA
+# GCN-NEXT: S_NOP 7
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: V_MFMA
+name: xdl_mfma_8pass_write_vgpr_nonxdl_sgemm_mfma_read_overlap_srcc
+body:             |
+  bb.0:
+    renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = V_MFMA_F32_32X32X8F16_vgprcd_e64 killed $vgpr0_vgpr1, killed $vgpr2_vgpr3, 1065353216, 0, 0, 0, implicit $mode, implicit $exec
+
+    $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17 = V_MFMA_F32_16X16X1F32_vgprcd_e64 $vgpr18, $vgpr19, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17, 0, 0, 0, implicit $mode, implicit $exec
+...
+
+...
+# 8 pass source
+# GCN-LABEL: name: xdl_mfma_8pass_write_vgpr_nonxdl_sgemm_mfma_read_overlap_srca
+# GCN:      V_MFMA
+# GCN-NEXT: S_NOP 7
+# GCN-NEXT: S_NOP 2
+# GCN-NEXT: V_MFMA
+name: xdl_mfma_8pass_write_vgpr_nonxdl_sgemm_mfma_read_overlap_srca
+body:             |
+  bb.0:
+    renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = V_MFMA_F32_32X32X8F16_vgprcd_e64 killed $vgpr0_vgpr1, killed $vgpr2_vgpr3, 1065353216, 0, 0, 0, implicit $mode, implicit $exec
+
+    $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17 = V_MFMA_F32_16X16X1F32_vgprcd_e64 $vgpr0, $vgpr33, $vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33, 0, 0, 0, implicit $mode, implicit $exec
+...
+
+...
+# 8 pass source
+# GCN-LABEL: name: xdl_mfma_8pass_write_vgpr_nonxdl_sgemm_mfma_read_overlap_srcb
+# GCN:      V_MFMA
+# GCN-NEXT: S_NOP 7
+# GCN-NEXT: S_NOP 2
+# GCN-NEXT: V_MFMA
+name: xdl_mfma_8pass_write_vgpr_nonxdl_sgemm_mfma_read_overlap_srcb
+body:             |
+  bb.0:
+    renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = V_MFMA_F32_32X32X8F16_vgprcd_e64 killed $vgpr0_vgpr1, killed $vgpr2_vgpr3, 1065353216, 0, 0, 0, implicit $mode, implicit $exec
+
+    $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17 = V_MFMA_F32_16X16X1F32_vgprcd_e64 $vgpr33, $vgpr1, $vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33, 0, 0, 0, implicit $mode, implicit $exec
+...
+
+...
+# 16 pass source
+# GCN-LABEL: name: xdl_16pass_write_vgpr_nonxdl_sgemm_mfma_read_overlap_srcc
+# GCN:      V_MFMA
+# GCN-NEXT: S_NOP 7
+# GCN-NEXT: S_NOP 7
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: V_MFMA
+name:            xdl_16pass_write_vgpr_nonxdl_sgemm_mfma_read_overlap_srcc
+body:             |
+  bb.0:
+    $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = V_MFMA_F32_32X32X4F16_vgprcd_e64 $vgpr126_vgpr127, $vgpr128_vgpr129, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, 0, 0, 0, implicit $mode, implicit $exec
+
+    $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = V_MFMA_F32_32X32X2F32_vgprcd_e64 killed $vgpr32, killed $vgpr33, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, 1, 2, 3, implicit $mode, implicit $exec
+
+...
+
+...
+# 16 pass source
+# GCN-LABEL: name: xdl_16pass_write_vgpr_nonxdl_sgemm_mfma_read_overlap_srca
+# GCN:      V_MFMA
+# GCN-NEXT: S_NOP 7
+# GCN-NEXT: S_NOP 7
+# GCN-NEXT: S_NOP 2
+# GCN-NEXT: V_MFMA
+name:            xdl_16pass_write_vgpr_nonxdl_sgemm_mfma_read_overlap_srca
+body:             |
+  bb.0:
+    $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = V_MFMA_F32_32X32X4F16_vgprcd_e64 $vgpr126_vgpr127, $vgpr128_vgpr129, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, 0, 0, 0, implicit $mode, implicit $exec
+    $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = V_MFMA_F32_32X32X2F32_vgprcd_e64 killed $vgpr0, killed $vgpr33, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, 1, 2, 3, implicit $mode, implicit $exec
+
+...
+
+...
+# 16 pass source
+# GCN-LABEL: name: xdl_16pass_write_vgpr_nonxdl_sgemm_mfma_read_overlap_srcb
+# GCN:      V_MFMA
+# GCN-NEXT: S_NOP 7
+# GCN-NEXT: S_NOP 7
+# GCN-NEXT: S_NOP 2
+# GCN-NEXT: V_MFMA
+name:            xdl_16pass_write_vgpr_nonxdl_sgemm_mfma_read_overlap_srcb
+body:             |
+  bb.0:
+    $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = V_MFMA_F32_32X32X4F16_vgprcd_e64 $vgpr126_vgpr127, $vgpr128_vgpr129, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, 0, 0, 0, implicit $mode, implicit $exec
+    $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = V_MFMA_F32_32X32X2F32_vgprcd_e64 killed $vgpr33, killed $vgpr0, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, 1, 2, 3, implicit $mode, implicit $exec
+
+...
+
+...
+# 8 pass source
+# GCN-LABEL: name: nonxdl_8pass_write_vgpr_nonxdl_sgemm_mfma_read_overlap_srcc
+# GCN:      V_MFMA
+# GCN-NEXT: S_NOP 7
+# GCN-NEXT: V_MFMA
+name:            nonxdl_8pass_write_vgpr_nonxdl_sgemm_mfma_read_overlap_srcc
+body:             |
+  bb.0:
+    $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17 = V_MFMA_F32_16X16X1F32_vgprcd_e64 $vgpr18, $vgpr19, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 0, 0, 0, implicit $mode, implicit $exec
+    $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17 = V_MFMA_F32_16X16X1F32_vgprcd_e64 $vgpr18, $vgpr19, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 0, 0, 0, implicit $mode, implicit $exec
+...
+
+...
+# 8 pass source
+# GCN-LABEL: name: nonxdl_8pass_write_vgpr_nonxdl_sgemm_mfma_read_overlap_srca
+# GCN:      V_MFMA
+# GCN-NEXT: S_NOP 7
+# GCN-NEXT: S_NOP 1
+# GCN-NEXT: V_MFMA
+name:            nonxdl_8pass_write_vgpr_nonxdl_sgemm_mfma_read_overlap_srca
+body:             |
+  bb.0:
+    $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17 = V_MFMA_F32_16X16X1F32_vgprcd_e64 $vgpr18, $vgpr19, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 0, 0, 0, implicit $mode, implicit $exec
+    $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17 = V_MFMA_F32_16X16X1F32_vgprcd_e64 $vgpr3, $vgpr19, $vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33, 0, 0, 0, implicit $mode, implicit $exec
+...
+
+...
+# 8 pass source
+# GCN-LABEL: name: nonxdl_8pass_write_vgpr_nonxdl_sgemm_mfma_read_overlap_srcb
+# GCN:      V_MFMA
+# GCN-NEXT: S_NOP 7
+# GCN-NEXT: S_NOP 1
+# GCN-NEXT: V_MFMA
+name:            nonxdl_8pass_write_vgpr_nonxdl_sgemm_mfma_read_overlap_srcb
+body:             |
+  bb.0:
+    $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17 = V_MFMA_F32_16X16X1F32_vgprcd_e64 $vgpr18, $vgpr19, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 0, 0, 0, implicit $mode, implicit $exec
+    $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17 = V_MFMA_F32_16X16X1F32_vgprcd_e64 $vgpr19, $vgpr3, $vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33, 0, 0, 0, implicit $mode, implicit $exec
+...
+...
+# 8 pass source
+# GCN-LABEL: name: xdl_mfma_8pass_write_vgpr_xdl_mfma_read_overlap_srcc
+# GCN:      V_MFMA
+# GCN-NEXT: S_NOP 7
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: V_MFMA
+name:            xdl_mfma_8pass_write_vgpr_xdl_mfma_read_overlap_srcc
+body:             |
+  bb.0:
+    $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = V_MFMA_F32_32X32X8F16_vgprcd_e64 killed $vgpr0_vgpr1, killed $vgpr2_vgpr3, 1065353216, 0, 0, 0, implicit $mode, implicit $exec
+    $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = V_MFMA_F32_32X32X8F16_vgprcd_e64 killed $vgpr18_vgpr19, killed $vgpr20_vgpr21, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17, 0, 0, 0, implicit $mode, implicit $exec
+...
+
+...
+# 8 pass source
+# GCN-LABEL: name: xdl_mfma_8pass_write_vgpr_xdl_mfma_read_overlap_srca
+# GCN:      V_MFMA
+# GCN-NEXT: S_NOP 7
+# GCN-NEXT: S_NOP 2
+# GCN-NEXT: V_MFMA
+name:            xdl_mfma_8pass_write_vgpr_xdl_mfma_read_overlap_srca
+body:             |
+  bb.0:
+    renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = V_MFMA_F32_32X32X8F16_vgprcd_e64 killed $vgpr0_vgpr1, killed $vgpr2_vgpr3, 1065353216, 0, 0, 0, implicit $mode, implicit $exec
+    $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = V_MFMA_F32_32X32X8F16_vgprcd_e64 killed $vgpr2_vgpr3, killed $vgpr36_vgpr37, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, 0, 0, 0, implicit $mode, implicit $exec
+...
+
+...
+# 8 pass source
+# GCN-LABEL: name: xdl_mfma_8pass_write_vgpr_xdl_mfma_read_overlap_srcb
+# GCN:      V_MFMA
+# GCN-NEXT: S_NOP 7
+# GCN-NEXT: S_NOP 2
+# GCN-NEXT: V_MFMA
+name:            xdl_mfma_8pass_write_vgpr_xdl_mfma_read_overlap_srcb
+body:             |
+  bb.0:
+    renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = V_MFMA_F32_32X32X8F16_vgprcd_e64 killed $vgpr0_vgpr1, killed $vgpr2_vgpr3, 1065353216, 0, 0, 0, implicit $mode, implicit $exec
+    $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = V_MFMA_F32_32X32X8F16_vgprcd_e64 killed $vgpr36_vgpr37, killed $vgpr2_vgpr3, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, 0, 0, 0, implicit $mode, implicit $exec
+...
+
+...
+# 16 pass source
+# GCN-LABEL: name: xdl_16pass_write_vgpr_xdl_mfma_read_overlap_srcc
+# GCN:      V_MFMA
+# GCN-NEXT: S_NOP 7
+# GCN-NEXT: S_NOP 7
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: V_MFMA
+name:            xdl_16pass_write_vgpr_xdl_mfma_read_overlap_srcc
+body:             |
+  bb.0:
+    $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = V_MFMA_F32_32X32X4F16_vgprcd_e64 $vgpr126_vgpr127, $vgpr128_vgpr129, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, 0, 0, 0, implicit $mode, implicit $exec
+    $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = V_MFMA_F32_32X32X4F16_vgprcd_e64 $vgpr126_vgpr127, $vgpr128_vgpr129, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33, 0, 0, 0, implicit $mode, implicit $exec
+
+...
+
+...
+# 16 pass source
+# GCN-LABEL: name: xdl_16pass_write_vgpr_xdl_mfma_read_overlap_srca
+# GCN:      V_MFMA
+# GCN-NEXT: S_NOP 7
+# GCN-NEXT: S_NOP 7
+# GCN-NEXT: S_NOP 2
+# GCN-NEXT: V_MFMA
+name:            xdl_16pass_write_vgpr_xdl_mfma_read_overlap_srca
+body:             |
+  bb.0:
+    $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = V_MFMA_F32_32X32X4F16_vgprcd_e64 $vgpr126_vgpr127, $vgpr128_vgpr129, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, 0, 0, 0, implicit $mode, implicit $exec
+    $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = V_MFMA_F32_32X32X4F16_vgprcd_e64 $vgpr2_vgpr3, $vgpr128_vgpr129, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, 0, 0, 0, implicit $mode, implicit $exec
+
+
+...
+
+...
+# 16 pass source
+# GCN-LABEL: name: xdl_16pass_write_vgpr_xdl_mfma_read_overlap_srcb
+# GCN:      V_MFMA
+# GCN-NEXT: S_NOP 7
+# GCN-NEXT: S_NOP 7
+# GCN-NEXT: S_NOP 2
+# GCN-NEXT: V_MFMA
+name:            xdl_16pass_write_vgpr_xdl_mfma_read_overlap_srcb
+body:             |
+  bb.0:
+    $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = V_MFMA_F32_32X32X4F16_vgprcd_e64 $vgpr126_vgpr127, $vgpr128_vgpr129, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, 0, 0, 0, implicit $mode, implicit $exec
+    $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = V_MFMA_F32_32X32X4F16_vgprcd_e64 $vgpr128_vgpr129, $vgpr2_vgpr3, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, 0, 0, 0, implicit $mode, implicit $exec
+
+...
+
+...
+# 2 pass source
+# GCN-LABEL: name: xdl_mfma_2pass_write_agpr_smfmac_read_overlap_srcc
+# GCN:      V_MFMA
+# GCN-NEXT: S_NOP 2
+# GCN-NEXT: V_SMFMAC_
+name:            xdl_mfma_2pass_write_agpr_smfmac_read_overlap_srcc
+body:             |
+  bb.0:
+
+    $agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X4F16_e64 $vgpr4_vgpr5, $vgpr6_vgpr7, $agpr0_agpr1_agpr2_agpr3, 1, 2, 3, implicit $mode, implicit $exec
+    $agpr2_agpr3_agpr4_agpr5 = V_SMFMAC_F32_16X16X32_F16_e64 $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5, $vgpr32, 0, 0, $agpr2_agpr3_agpr4_agpr5, implicit $mode, implicit $exec
+
+...
+
+...
+# GCN-LABEL: name: xdl_4pass_mfma_write_agpr_smfmac_read_overlap_srcc
+# GCN:      V_MFMA
+# GCN-NEXT: S_NOP 4
+# GCN-NEXT: V_SMFMAC_
+name:            xdl_4pass_mfma_write_agpr_smfmac_read_overlap_srcc
+body:             |
+  bb.0:
+    $agpr0_agpr1_agpr2_agpr3 = V_MFMA_I32_16X16X32I8_e64 $vgpr0_vgpr1, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec
+    $agpr2_agpr3_agpr4_agpr5 = V_SMFMAC_F32_16X16X32_F16_e64 $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5, $vgpr32, 0, 0, $agpr2_agpr3_agpr4_agpr5, implicit $mode, implicit $exec
+
+...
+
+...
+# GCN-LABEL: name: xdl_8pass_mfma_write_agpr_smfmac_read_overlap_srcc
+# GCN:      V_MFMA
+# GCN-NEXT: S_NOP 7
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: V_SMFMAC_
+name:            xdl_8pass_mfma_write_agpr_smfmac_read_overlap_srcc
+body:             |
+  bb.0:
+    renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_32X32X8F16_e64 killed $vgpr0_vgpr1, killed $vgpr2_vgpr3, 1065353216, 0, 0, 0, implicit $mode, implicit $exec
+    $agpr2_agpr3_agpr4_agpr5 = V_SMFMAC_F32_16X16X32_F16_e64 $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5, $vgpr32, 0, 0, $agpr2_agpr3_agpr4_agpr5, implicit $mode, implicit $exec
+...
+
+...
+# GCN-LABEL: name: xdl_16pass_mfma_write_agpr_smfmac_read_overlap_srcc
+# GCN:      V_MFMA
+# GCN-NEXT: S_NOP 7
+# GCN-NEXT: S_NOP 7
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: V_SMFMAC_
+name:            xdl_16pass_mfma_write_agpr_smfmac_read_overlap_srcc
+body:             |
+  bb.0:
+    $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 = V_MFMA_F32_32X32X4F16_e64 $vgpr126_vgpr127, $vgpr128_vgpr129, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31, 0, 0, 0, implicit $mode, implicit $exec
+    $agpr2_agpr3_agpr4_agpr5 = V_SMFMAC_F32_16X16X32_F16_e64 $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5, $vgpr32, 0, 0, $agpr2_agpr3_agpr4_agpr5, implicit $mode, implicit $exec
+...
-- 
cgit v1.2.3


From e09761944cea4aeafadd055b9510ef9f0e9a7338 Mon Sep 17 00:00:00 2001
From: Mark de Wever <koraq@xs4all.nl>
Date: Tue, 12 Mar 2024 18:04:14 +0100
Subject: [libc++] Improves UB handling in ios_base destructor. (#76525)

Destroying an ios_base object before it is properly initialized is
undefined behavior. Unlike typical C++ classes the initialization is not
done in the constructor, but in a dedicated init function. Due to
virtual inheritance of the basic_ios object in ostream and friends this
undefined behaviour can be triggered when inheriting from classes that
can throw in their constructor and inheriting from ostream.

Use the __loc_ member of ios_base as sentinel to detect whether the
object has or has not been initialized.

Addresses https://github.com/llvm/llvm-project/issues/57964
---
 libcxx/include/ios                                 | 12 +++-
 libcxx/src/ios.cpp                                 |  4 ++
 .../ios.base.cons/dtor.uninitialized.pass.cpp      | 80 ++++++++++++++++++++++
 3 files changed, 94 insertions(+), 2 deletions(-)
 create mode 100644 libcxx/test/libcxx/input.output/iostreams.base/ios.base/ios.base.cons/dtor.uninitialized.pass.cpp

diff --git a/libcxx/include/ios b/libcxx/include/ios
index 4b1306fc2ad8..00c1d5c2d4bc 100644
--- a/libcxx/include/ios
+++ b/libcxx/include/ios
@@ -359,7 +359,13 @@ public:
   }
 
 protected:
-  _LIBCPP_HIDE_FROM_ABI ios_base() { // purposefully does no initialization
+  _LIBCPP_HIDE_FROM_ABI ios_base() : __loc_(nullptr) {
+    // Purposefully does no initialization
+    //
+    // Except for the locale, this is a sentinel to avoid destroying
+    // an uninitialized object. See
+    // test/libcxx/input.output/iostreams.base/ios.base/ios.base.cons/dtor.uninitialized.pass.cpp
+    // for the details.
   }
 
   void init(void* __sb);
@@ -571,7 +577,9 @@ public:
   _LIBCPP_HIDE_FROM_ABI char_type widen(char __c) const;
 
 protected:
-  _LIBCPP_HIDE_FROM_ABI basic_ios() { // purposefully does no initialization
+  _LIBCPP_HIDE_FROM_ABI basic_ios() {
+    // purposefully does no initialization
+    // since the destructor does nothing this does not have ios_base issues.
   }
   _LIBCPP_HIDE_FROM_ABI void init(basic_streambuf<char_type, traits_type>* __sb);
 
diff --git a/libcxx/src/ios.cpp b/libcxx/src/ios.cpp
index d58827fa1255..a727855c4655 100644
--- a/libcxx/src/ios.cpp
+++ b/libcxx/src/ios.cpp
@@ -195,6 +195,10 @@ void ios_base::register_callback(event_callback fn, int index) {
 }
 
 ios_base::~ios_base() {
+  // Avoid UB when not properly initialized. See ios_base::ios_base for
+  // more information.
+  if (!__loc_)
+    return;
   __call_callbacks(erase_event);
   locale& loc_storage = *reinterpret_cast<locale*>(&__loc_);
   loc_storage.~locale();
diff --git a/libcxx/test/libcxx/input.output/iostreams.base/ios.base/ios.base.cons/dtor.uninitialized.pass.cpp b/libcxx/test/libcxx/input.output/iostreams.base/ios.base/ios.base.cons/dtor.uninitialized.pass.cpp
new file mode 100644
index 000000000000..ea42203d0544
--- /dev/null
+++ b/libcxx/test/libcxx/input.output/iostreams.base/ios.base/ios.base.cons/dtor.uninitialized.pass.cpp
@@ -0,0 +1,80 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: no-exceptions
+
+// The fix for issue 57964 requires an updated dylib due to explicit
+// instantiations. That means Apple backdeployment targets remain broken.
+// UNSUPPORTED: using-built-library-before-llvm-19
+
+// <ios>
+
+// class ios_base
+
+// ~ios_base()
+//
+// Destroying a constructed ios_base object that has not been
+// initialized by basic_ios::init is undefined behaviour. This can
+// happen in practice, make sure the undefined behaviour is handled
+// gracefully.
+//
+//
+// [ios.base.cons]/1
+//
+// ios_base();
+// Effects: Each ios_base member has an indeterminate value after construction.
+// The object's members shall be initialized by calling basic_ios::init before
+// the object's first use or before it is destroyed, whichever comes first;
+// otherwise the behavior is undefined.
+//
+// [basic.ios.cons]/2
+//
+// basic_ios();
+// Effects: Leaves its member objects uninitialized.  The object shall be
+// initialized by calling basic_ios::init before its first use or before it is
+// destroyed, whichever comes first; otherwise the behavior is undefined.
+//
+// ostream and friends have a basic_ios virtual base.
+// [class.base.init]/13
+// In a non-delegating constructor, initialization proceeds in the
+// following order:
+// - First, and only for the constructor of the most derived class
+//   ([intro.object]), virtual base classes are initialized ...
+//
+// So in this example
+// struct Foo : AlwaysThrows, std::ostream {
+//    Foo() : AlwaysThrows{}, std::ostream{nullptr} {}
+// };
+//
+// Here
+// - the ios_base object is constructed
+// - the AlwaysThrows object is constructed and throws an exception
+// - the AlwaysThrows object is destrodyed
+// - the ios_base object is destroyed
+//
+// The ios_base object is destroyed before it has been initialized and runs
+// into undefined behavior. By using __loc_ as a sentinel we can avoid
+// accessing uninitialized memory in the destructor.
+
+#include <ostream>
+
+struct AlwaysThrows {
+  AlwaysThrows() { throw 1; }
+};
+
+struct Foo : AlwaysThrows, std::ostream {
+  Foo() : AlwaysThrows(), std::ostream(nullptr) {}
+};
+
+int main(int, char**) {
+  try {
+    Foo foo;
+  } catch (...) {
+  };
+  return 0;
+}
-- 
cgit v1.2.3


From 683a9ac803a56f6dda9b783a6e2d6d92a5d0626c Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Tue, 12 Mar 2024 17:10:16 +0000
Subject: [X86] combineVectorPack - use APInt::truncSSat for PACKSS constant
 folding. NFC.

Unfortunately PACKUS can't use APInt::truncUSat
---
 llvm/lib/Target/X86/X86ISelLowering.cpp | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index b4d0421c14c0..72b45d462dfe 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -47630,16 +47630,12 @@ static SDValue combineVectorPack(SDNode *N, SelectionDAG &DAG,
           // PACKSS: Truncate signed value with signed saturation.
           // Source values less than dst minint are saturated to minint.
           // Source values greater than dst maxint are saturated to maxint.
-          if (Val.isSignedIntN(DstBitsPerElt))
-            Val = Val.trunc(DstBitsPerElt);
-          else if (Val.isNegative())
-            Val = APInt::getSignedMinValue(DstBitsPerElt);
-          else
-            Val = APInt::getSignedMaxValue(DstBitsPerElt);
+          Val = Val.truncSSat(DstBitsPerElt);
         } else {
           // PACKUS: Truncate signed value with unsigned saturation.
           // Source values less than zero are saturated to zero.
           // Source values greater than dst maxuint are saturated to maxuint.
+          // NOTE: This is different from APInt::truncUSat.
           if (Val.isIntN(DstBitsPerElt))
             Val = Val.trunc(DstBitsPerElt);
           else if (Val.isNegative())
-- 
cgit v1.2.3


From 7bee91fadf8db90f71b458aaff4de0efa7dc23a0 Mon Sep 17 00:00:00 2001
From: "Diego A. Estrada Rivera" <diego.estrada1@proton.me>
Date: Tue, 12 Mar 2024 13:21:31 -0400
Subject: [analyzer][NFC] Turn NodeBuilderContext into a class (#84638)

From issue #73088. I changed `NodeBuilderContext` into a class.
Additionally, there were some other mentions of the former being a
struct which I also changed into a class. This is my first time working
with an issue so I will be open to hearing any advice or changes that
need to be done.
---
 clang/include/clang/StaticAnalyzer/Core/CheckerManager.h     |  2 +-
 .../clang/StaticAnalyzer/Core/PathSensitive/CoreEngine.h     | 12 +++++++++---
 .../clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h     |  2 +-
 clang/lib/StaticAnalyzer/Core/CoreEngine.cpp                 |  6 +++---
 4 files changed, 14 insertions(+), 8 deletions(-)

diff --git a/clang/include/clang/StaticAnalyzer/Core/CheckerManager.h b/clang/include/clang/StaticAnalyzer/Core/CheckerManager.h
index a45ba1bc573e..ad25d18f2807 100644
--- a/clang/include/clang/StaticAnalyzer/Core/CheckerManager.h
+++ b/clang/include/clang/StaticAnalyzer/Core/CheckerManager.h
@@ -49,7 +49,7 @@ class ExplodedNodeSet;
 class ExprEngine;
 struct EvalCallOptions;
 class MemRegion;
-struct NodeBuilderContext;
+class NodeBuilderContext;
 class ObjCMethodCall;
 class RegionAndSymbolInvalidationTraits;
 class SVal;
diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/CoreEngine.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/CoreEngine.h
index 8e392421fef9..0ef353bf9731 100644
--- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/CoreEngine.h
+++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/CoreEngine.h
@@ -59,7 +59,7 @@ class CoreEngine {
   friend class ExprEngine;
   friend class IndirectGotoNodeBuilder;
   friend class NodeBuilder;
-  friend struct NodeBuilderContext;
+  friend class NodeBuilderContext;
   friend class SwitchNodeBuilder;
 
 public:
@@ -193,12 +193,12 @@ public:
   DataTag::Factory &getDataTags() { return DataTags; }
 };
 
-// TODO: Turn into a class.
-struct NodeBuilderContext {
+class NodeBuilderContext {
   const CoreEngine &Eng;
   const CFGBlock *Block;
   const LocationContext *LC;
 
+public:
   NodeBuilderContext(const CoreEngine &E, const CFGBlock *B,
                      const LocationContext *L)
       : Eng(E), Block(B), LC(L) {
@@ -208,9 +208,15 @@ struct NodeBuilderContext {
   NodeBuilderContext(const CoreEngine &E, const CFGBlock *B, ExplodedNode *N)
       : NodeBuilderContext(E, B, N->getLocationContext()) {}
 
+  /// Return the CoreEngine associated with this builder.
+  const CoreEngine &getEngine() const { return Eng; }
+
   /// Return the CFGBlock associated with this builder.
   const CFGBlock *getBlock() const { return Block; }
 
+  /// Return the location context associated with this builder.
+  const LocationContext *getLocationContext() const { return LC; }
+
   /// Returns the number of times the current basic block has been
   /// visited on the exploded graph path.
   unsigned blockCount() const {
diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h
index f7894fb83ce6..859c1497d7e6 100644
--- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h
+++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h
@@ -85,7 +85,7 @@ class ExplodedNodeSet;
 class ExplodedNode;
 class IndirectGotoNodeBuilder;
 class MemRegion;
-struct NodeBuilderContext;
+class NodeBuilderContext;
 class NodeBuilderWithSinks;
 class ProgramState;
 class ProgramStateManager;
diff --git a/clang/lib/StaticAnalyzer/Core/CoreEngine.cpp b/clang/lib/StaticAnalyzer/Core/CoreEngine.cpp
index 141d0cb320bf..8605fa149e4f 100644
--- a/clang/lib/StaticAnalyzer/Core/CoreEngine.cpp
+++ b/clang/lib/StaticAnalyzer/Core/CoreEngine.cpp
@@ -625,8 +625,8 @@ ExplodedNode* NodeBuilder::generateNodeImpl(const ProgramPoint &Loc,
                                             bool MarkAsSink) {
   HasGeneratedNodes = true;
   bool IsNew;
-  ExplodedNode *N = C.Eng.G.getNode(Loc, State, MarkAsSink, &IsNew);
-  N->addPredecessor(FromN, C.Eng.G);
+  ExplodedNode *N = C.getEngine().G.getNode(Loc, State, MarkAsSink, &IsNew);
+  N->addPredecessor(FromN, C.getEngine().G);
   Frontier.erase(FromN);
 
   if (!IsNew)
@@ -655,7 +655,7 @@ ExplodedNode *BranchNodeBuilder::generateNode(ProgramStateRef State,
   if (!isFeasible(branch))
     return nullptr;
 
-  ProgramPoint Loc = BlockEdge(C.Block, branch ? DstT:DstF,
+  ProgramPoint Loc = BlockEdge(C.getBlock(), branch ? DstT : DstF,
                                NodePred->getLocationContext());
   ExplodedNode *Succ = generateNodeImpl(Loc, State, NodePred);
   return Succ;
-- 
cgit v1.2.3


From 93503aafcdc66837ecf220243aaa530c05c35895 Mon Sep 17 00:00:00 2001
From: Hiroshi Yamauchi <56735936+hjyamauchi@users.noreply.github.com>
Date: Tue, 12 Mar 2024 10:26:44 -0700
Subject: Fix MSVC build issues (#84362)

MSVC fails when there is ambiguity (multiple options) around implicit
type conversion operators.

Make ConstString's conversion operator to string_view explicit to avoid
ambiguity with one to StringRef and remove an unused local variable that
MSVC also fails on.
---
 lldb/include/lldb/Utility/ConstString.h              |  4 ++--
 lldb/source/Core/Mangled.cpp                         | 12 +++++++-----
 lldb/unittests/SymbolFile/PDB/SymbolFilePDBTests.cpp |  1 -
 3 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/lldb/include/lldb/Utility/ConstString.h b/lldb/include/lldb/Utility/ConstString.h
index 470a554ca048..f7f7ec7605eb 100644
--- a/lldb/include/lldb/Utility/ConstString.h
+++ b/lldb/include/lldb/Utility/ConstString.h
@@ -168,8 +168,8 @@ public:
   // Implicitly convert \class ConstString instances to \class StringRef.
   operator llvm::StringRef() const { return GetStringRef(); }
 
-  // Implicitly convert \class ConstString instances to \class std::string_view.
-  operator std::string_view() const {
+  // Explicitly convert \class ConstString instances to \class std::string_view.
+  explicit operator std::string_view() const {
     return std::string_view(m_string, GetLength());
   }
 
diff --git a/lldb/source/Core/Mangled.cpp b/lldb/source/Core/Mangled.cpp
index 23ae3913093f..b167c51fdce2 100644
--- a/lldb/source/Core/Mangled.cpp
+++ b/lldb/source/Core/Mangled.cpp
@@ -125,7 +125,7 @@ void Mangled::SetValue(ConstString name) {
 }
 
 // Local helpers for different demangling implementations.
-static char *GetMSVCDemangledStr(std::string_view M) {
+static char *GetMSVCDemangledStr(llvm::StringRef M) {
   char *demangled_cstr = llvm::microsoftDemangle(
       M, nullptr, nullptr,
       llvm::MSDemangleFlags(
@@ -169,27 +169,29 @@ static char *GetItaniumDemangledStr(const char *M) {
   return demangled_cstr;
 }
 
-static char *GetRustV0DemangledStr(std::string_view M) {
+static char *GetRustV0DemangledStr(llvm::StringRef M) {
   char *demangled_cstr = llvm::rustDemangle(M);
 
   if (Log *log = GetLog(LLDBLog::Demangle)) {
     if (demangled_cstr && demangled_cstr[0])
       LLDB_LOG(log, "demangled rustv0: {0} -> \"{1}\"", M, demangled_cstr);
     else
-      LLDB_LOG(log, "demangled rustv0: {0} -> error: failed to demangle", M);
+      LLDB_LOG(log, "demangled rustv0: {0} -> error: failed to demangle",
+               static_cast<std::string_view>(M));
   }
 
   return demangled_cstr;
 }
 
-static char *GetDLangDemangledStr(std::string_view M) {
+static char *GetDLangDemangledStr(llvm::StringRef M) {
   char *demangled_cstr = llvm::dlangDemangle(M);
 
   if (Log *log = GetLog(LLDBLog::Demangle)) {
     if (demangled_cstr && demangled_cstr[0])
       LLDB_LOG(log, "demangled dlang: {0} -> \"{1}\"", M, demangled_cstr);
     else
-      LLDB_LOG(log, "demangled dlang: {0} -> error: failed to demangle", M);
+      LLDB_LOG(log, "demangled dlang: {0} -> error: failed to demangle",
+               static_cast<std::string_view>(M));
   }
 
   return demangled_cstr;
diff --git a/lldb/unittests/SymbolFile/PDB/SymbolFilePDBTests.cpp b/lldb/unittests/SymbolFile/PDB/SymbolFilePDBTests.cpp
index 6a2ea8c4a41b..f237dd63ab1c 100644
--- a/lldb/unittests/SymbolFile/PDB/SymbolFilePDBTests.cpp
+++ b/lldb/unittests/SymbolFile/PDB/SymbolFilePDBTests.cpp
@@ -527,7 +527,6 @@ TEST_F(SymbolFilePDBTests, TestTypedefs) {
   SymbolFilePDB *symfile =
       static_cast<SymbolFilePDB *>(module->GetSymbolFile());
   llvm::pdb::IPDBSession &session = symfile->GetPDBSession();
-  TypeMap results;
 
   const char *TypedefsToCheck[] = {"ClassTypedef", "NSClassTypedef",
                                    "FuncPointerTypedef",
-- 
cgit v1.2.3


From c4e517f59c086eafe2eb61d23197820f05be799c Mon Sep 17 00:00:00 2001
From: Jun Wang <jwang86@yahoo.com>
Date: Tue, 12 Mar 2024 10:30:39 -0700
Subject: [AMDGPU] Adding the amdgpu_num_work_groups function attribute
 (#79035)

A new function attribute named amdgpu_num_work_groups is added. This
attribute, which consists of three integers, allows programmers to let
the compiler know the number of workgroups to be launched in each of the
three dimensions and do optimizations based on that information.

---------

Co-authored-by: Jun Wang <jun.wang7@amd.com>
---
 clang/docs/ReleaseNotes.rst                        |   6 +
 clang/include/clang/Basic/Attr.td                  |   7 ++
 clang/include/clang/Basic/AttrDocs.td              |  27 +++++
 clang/include/clang/Sema/Sema.h                    |  10 ++
 clang/lib/CodeGen/Targets/AMDGPU.cpp               |  23 ++++
 clang/lib/Sema/SemaDeclAttr.cpp                    |  62 ++++++++++
 clang/lib/Sema/SemaTemplateInstantiateDecl.cpp     |  29 +++++
 clang/test/CodeGenCUDA/amdgpu-kernel-attrs.cu      |  35 ++++++
 clang/test/CodeGenOpenCL/amdgpu-attrs.cl           |  47 ++++++++
 ...pragma-attribute-supported-attributes-list.test |   1 +
 clang/test/SemaCUDA/amdgpu-attrs.cu                | 132 +++++++++++++++++++++
 llvm/docs/AMDGPUUsage.rst                          |  10 ++
 .../Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp    |   8 ++
 llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp         |   7 +-
 llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h           |   3 +
 llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp   |   2 +
 llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h     |  10 ++
 llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp    |  37 ++++++
 llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h      |  18 +++
 .../CodeGen/AMDGPU/attr-amdgpu-num-workgroups.ll   |  84 +++++++++++++
 .../attr-amdgpu-num-workgroups_error_check.ll      |  71 +++++++++++
 21 files changed, 628 insertions(+), 1 deletion(-)
 create mode 100644 llvm/test/CodeGen/AMDGPU/attr-amdgpu-num-workgroups.ll
 create mode 100644 llvm/test/CodeGen/AMDGPU/attr-amdgpu-num-workgroups_error_check.ll

diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 2842b63197ff..e14c92eae0af 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -194,6 +194,12 @@ Removed Compiler Flags
 
 Attribute Changes in Clang
 --------------------------
+- Introduced a new function attribute ``__attribute__((amdgpu_max_num_work_groups(x, y, z)))`` or
+  ``[[clang::amdgpu_max_num_work_groups(x, y, z)]]`` for the AMDGPU target. This attribute can be
+  attached to HIP or OpenCL kernel function definitions to provide an optimization hint. The parameters
+  ``x``, ``y``, and ``z`` specify the maximum number of workgroups for the respective dimensions,
+  and each must be a positive integer when provided. The parameter ``x`` is required, while ``y`` and
+  ``z`` are optional with default value of 1.
 
 Improvements to Clang's diagnostics
 -----------------------------------
diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td
index 080340669b60..63efd85dcd4e 100644
--- a/clang/include/clang/Basic/Attr.td
+++ b/clang/include/clang/Basic/Attr.td
@@ -2054,6 +2054,13 @@ def AMDGPUNumVGPR : InheritableAttr {
   let Subjects = SubjectList<[Function], ErrorDiag, "kernel functions">;
 }
 
+def AMDGPUMaxNumWorkGroups : InheritableAttr {
+  let Spellings = [Clang<"amdgpu_max_num_work_groups", 0>];
+  let Args = [ExprArgument<"MaxNumWorkGroupsX">, ExprArgument<"MaxNumWorkGroupsY", 1>, ExprArgument<"MaxNumWorkGroupsZ", 1>];
+  let Documentation = [AMDGPUMaxNumWorkGroupsDocs];
+  let Subjects = SubjectList<[Function], ErrorDiag, "kernel functions">;
+}
+
 def AMDGPUKernelCall : DeclOrTypeAttr {
   let Spellings = [Clang<"amdgpu_kernel">];
   let Documentation = [Undocumented];
diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td
index 2c07cd09b0d5..d61f96ade557 100644
--- a/clang/include/clang/Basic/AttrDocs.td
+++ b/clang/include/clang/Basic/AttrDocs.td
@@ -2741,6 +2741,33 @@ An error will be given if:
   }];
 }
 
+def AMDGPUMaxNumWorkGroupsDocs : Documentation {
+  let Category = DocCatAMDGPUAttributes;
+  let Content = [{
+This attribute specifies the max number of work groups when the kernel
+is dispatched.
+
+Clang supports the
+``__attribute__((amdgpu_max_num_work_groups(<x>, <y>, <z>)))`` or
+``[[clang::amdgpu_max_num_work_groups(<x>, <y>, <z>)]]`` attribute for the
+AMDGPU target. This attribute may be attached to HIP or OpenCL kernel function
+definitions and is an optimization hint.
+
+The ``<x>`` parameter specifies the maximum number of work groups in the x dimension.
+Similarly ``<y>`` and ``<z>`` are for the y and z dimensions respectively.
+Each of the three values must be greater than 0 when provided. The ``<x>`` parameter
+is required, while ``<y>`` and ``<z>`` are optional with default value of 1.
+
+If specified, the AMDGPU target backend might be able to produce better machine
+code.
+
+An error will be given if:
+  - Specified values violate subtarget specifications;
+  - Specified values are not compatible with values provided through other
+    attributes.
+  }];
+}
+
 def DocCatCallingConvs : DocumentationCategory<"Calling Conventions"> {
   let Content = [{
 Clang supports several different calling conventions, depending on the target
diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h
index 267c79cc057c..b226851f0303 100644
--- a/clang/include/clang/Sema/Sema.h
+++ b/clang/include/clang/Sema/Sema.h
@@ -3911,6 +3911,16 @@ public:
   void addAMDGPUWavesPerEUAttr(Decl *D, const AttributeCommonInfo &CI,
                                Expr *Min, Expr *Max);
 
+  /// Create an AMDGPUMaxNumWorkGroupsAttr attribute.
+  AMDGPUMaxNumWorkGroupsAttr *
+  CreateAMDGPUMaxNumWorkGroupsAttr(const AttributeCommonInfo &CI, Expr *XExpr,
+                                   Expr *YExpr, Expr *ZExpr);
+
+  /// addAMDGPUMaxNumWorkGroupsAttr - Adds an amdgpu_max_num_work_groups
+  /// attribute to a particular declaration.
+  void addAMDGPUMaxNumWorkGroupsAttr(Decl *D, const AttributeCommonInfo &CI,
+                                     Expr *XExpr, Expr *YExpr, Expr *ZExpr);
+
   DLLImportAttr *mergeDLLImportAttr(Decl *D, const AttributeCommonInfo &CI);
   DLLExportAttr *mergeDLLExportAttr(Decl *D, const AttributeCommonInfo &CI);
   MSInheritanceAttr *mergeMSInheritanceAttr(Decl *D,
diff --git a/clang/lib/CodeGen/Targets/AMDGPU.cpp b/clang/lib/CodeGen/Targets/AMDGPU.cpp
index 03ac6b78598f..44e86c0b40f6 100644
--- a/clang/lib/CodeGen/Targets/AMDGPU.cpp
+++ b/clang/lib/CodeGen/Targets/AMDGPU.cpp
@@ -356,6 +356,29 @@ void AMDGPUTargetCodeGenInfo::setFunctionDeclAttributes(
     if (NumVGPR != 0)
       F->addFnAttr("amdgpu-num-vgpr", llvm::utostr(NumVGPR));
   }
+
+  if (const auto *Attr = FD->getAttr<AMDGPUMaxNumWorkGroupsAttr>()) {
+    uint32_t X = Attr->getMaxNumWorkGroupsX()
+                     ->EvaluateKnownConstInt(M.getContext())
+                     .getExtValue();
+    // Y and Z dimensions default to 1 if not specified
+    uint32_t Y = Attr->getMaxNumWorkGroupsY()
+                     ? Attr->getMaxNumWorkGroupsY()
+                           ->EvaluateKnownConstInt(M.getContext())
+                           .getExtValue()
+                     : 1;
+    uint32_t Z = Attr->getMaxNumWorkGroupsZ()
+                     ? Attr->getMaxNumWorkGroupsZ()
+                           ->EvaluateKnownConstInt(M.getContext())
+                           .getExtValue()
+                     : 1;
+
+    llvm::SmallString<32> AttrVal;
+    llvm::raw_svector_ostream OS(AttrVal);
+    OS << X << ',' << Y << ',' << Z;
+
+    F->addFnAttr("amdgpu-max-num-workgroups", AttrVal.str());
+  }
 }
 
 /// Emits control constants used to change per-architecture behaviour in the
diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp
index c00120b59d39..e3da3e606435 100644
--- a/clang/lib/Sema/SemaDeclAttr.cpp
+++ b/clang/lib/Sema/SemaDeclAttr.cpp
@@ -8079,6 +8079,65 @@ static void handleAMDGPUNumVGPRAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
   D->addAttr(::new (S.Context) AMDGPUNumVGPRAttr(S.Context, AL, NumVGPR));
 }
 
+static bool
+checkAMDGPUMaxNumWorkGroupsArguments(Sema &S, Expr *XExpr, Expr *YExpr,
+                                     Expr *ZExpr,
+                                     const AMDGPUMaxNumWorkGroupsAttr &Attr) {
+  if (S.DiagnoseUnexpandedParameterPack(XExpr) ||
+      (YExpr && S.DiagnoseUnexpandedParameterPack(YExpr)) ||
+      (ZExpr && S.DiagnoseUnexpandedParameterPack(ZExpr)))
+    return true;
+
+  // Accept template arguments for now as they depend on something else.
+  // We'll get to check them when they eventually get instantiated.
+  if (XExpr->isValueDependent() || (YExpr && YExpr->isValueDependent()) ||
+      (ZExpr && ZExpr->isValueDependent()))
+    return false;
+
+  uint32_t NumWG = 0;
+  Expr *Exprs[3] = {XExpr, YExpr, ZExpr};
+  for (int i = 0; i < 3; i++) {
+    if (Exprs[i]) {
+      if (!checkUInt32Argument(S, Attr, Exprs[i], NumWG, i,
+                               /*StrictlyUnsigned=*/true))
+        return true;
+      if (NumWG == 0) {
+        S.Diag(Attr.getLoc(), diag::err_attribute_argument_is_zero)
+            << &Attr << Exprs[i]->getSourceRange();
+        return true;
+      }
+    }
+  }
+
+  return false;
+}
+
+AMDGPUMaxNumWorkGroupsAttr *
+Sema::CreateAMDGPUMaxNumWorkGroupsAttr(const AttributeCommonInfo &CI,
+                                       Expr *XExpr, Expr *YExpr, Expr *ZExpr) {
+  AMDGPUMaxNumWorkGroupsAttr TmpAttr(Context, CI, XExpr, YExpr, ZExpr);
+
+  if (checkAMDGPUMaxNumWorkGroupsArguments(*this, XExpr, YExpr, ZExpr, TmpAttr))
+    return nullptr;
+
+  return ::new (Context)
+      AMDGPUMaxNumWorkGroupsAttr(Context, CI, XExpr, YExpr, ZExpr);
+}
+
+void Sema::addAMDGPUMaxNumWorkGroupsAttr(Decl *D, const AttributeCommonInfo &CI,
+                                         Expr *XExpr, Expr *YExpr,
+                                         Expr *ZExpr) {
+  if (auto *Attr = CreateAMDGPUMaxNumWorkGroupsAttr(CI, XExpr, YExpr, ZExpr))
+    D->addAttr(Attr);
+}
+
+static void handleAMDGPUMaxNumWorkGroupsAttr(Sema &S, Decl *D,
+                                             const ParsedAttr &AL) {
+  Expr *YExpr = (AL.getNumArgs() > 1) ? AL.getArgAsExpr(1) : nullptr;
+  Expr *ZExpr = (AL.getNumArgs() > 2) ? AL.getArgAsExpr(2) : nullptr;
+  S.addAMDGPUMaxNumWorkGroupsAttr(D, AL, AL.getArgAsExpr(0), YExpr, ZExpr);
+}
+
 static void handleX86ForceAlignArgPointerAttr(Sema &S, Decl *D,
                                               const ParsedAttr &AL) {
   // If we try to apply it to a function pointer, don't warn, but don't
@@ -9183,6 +9242,9 @@ ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D, const ParsedAttr &AL,
   case ParsedAttr::AT_AMDGPUNumVGPR:
     handleAMDGPUNumVGPRAttr(S, D, AL);
     break;
+  case ParsedAttr::AT_AMDGPUMaxNumWorkGroups:
+    handleAMDGPUMaxNumWorkGroupsAttr(S, D, AL);
+    break;
   case ParsedAttr::AT_AVRSignal:
     handleAVRSignalAttr(S, D, AL);
     break;
diff --git a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
index 20c2c93ac9c7..8ef8bfdf2a7b 100644
--- a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
+++ b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
@@ -607,6 +607,29 @@ static void instantiateDependentAMDGPUWavesPerEUAttr(
   S.addAMDGPUWavesPerEUAttr(New, Attr, MinExpr, MaxExpr);
 }
 
+static void instantiateDependentAMDGPUMaxNumWorkGroupsAttr(
+    Sema &S, const MultiLevelTemplateArgumentList &TemplateArgs,
+    const AMDGPUMaxNumWorkGroupsAttr &Attr, Decl *New) {
+  EnterExpressionEvaluationContext Unevaluated(
+      S, Sema::ExpressionEvaluationContext::ConstantEvaluated);
+
+  ExprResult ResultX = S.SubstExpr(Attr.getMaxNumWorkGroupsX(), TemplateArgs);
+  if (!ResultX.isUsable())
+    return;
+  ExprResult ResultY = S.SubstExpr(Attr.getMaxNumWorkGroupsY(), TemplateArgs);
+  if (!ResultY.isUsable())
+    return;
+  ExprResult ResultZ = S.SubstExpr(Attr.getMaxNumWorkGroupsZ(), TemplateArgs);
+  if (!ResultZ.isUsable())
+    return;
+
+  Expr *XExpr = ResultX.getAs<Expr>();
+  Expr *YExpr = ResultY.getAs<Expr>();
+  Expr *ZExpr = ResultZ.getAs<Expr>();
+
+  S.addAMDGPUMaxNumWorkGroupsAttr(New, Attr, XExpr, YExpr, ZExpr);
+}
+
 // This doesn't take any template parameters, but we have a custom action that
 // needs to happen when the kernel itself is instantiated. We need to run the
 // ItaniumMangler to mark the names required to name this kernel.
@@ -792,6 +815,12 @@ void Sema::InstantiateAttrs(const MultiLevelTemplateArgumentList &TemplateArgs,
                                                *AMDGPUFlatWorkGroupSize, New);
     }
 
+    if (const auto *AMDGPUMaxNumWorkGroups =
+            dyn_cast<AMDGPUMaxNumWorkGroupsAttr>(TmplAttr)) {
+      instantiateDependentAMDGPUMaxNumWorkGroupsAttr(
+          *this, TemplateArgs, *AMDGPUMaxNumWorkGroups, New);
+    }
+
     if (const auto *ParamAttr = dyn_cast<HLSLParamModifierAttr>(TmplAttr)) {
       instantiateDependentHLSLParamModifierAttr(*this, TemplateArgs, ParamAttr,
                                                 New);
diff --git a/clang/test/CodeGenCUDA/amdgpu-kernel-attrs.cu b/clang/test/CodeGenCUDA/amdgpu-kernel-attrs.cu
index a1642421af2c..11a133fd1351 100644
--- a/clang/test/CodeGenCUDA/amdgpu-kernel-attrs.cu
+++ b/clang/test/CodeGenCUDA/amdgpu-kernel-attrs.cu
@@ -40,12 +40,45 @@ __attribute__((amdgpu_num_vgpr(64))) // expected-no-diagnostics
 __global__ void num_vgpr_64() {
 // CHECK: define{{.*}} amdgpu_kernel void @_Z11num_vgpr_64v() [[NUM_VGPR_64:#[0-9]+]]
 }
+__attribute__((amdgpu_max_num_work_groups(32, 4, 2))) // expected-no-diagnostics
+__global__ void max_num_work_groups_32_4_2() {
+// CHECK: define{{.*}} amdgpu_kernel void @_Z26max_num_work_groups_32_4_2v() [[MAX_NUM_WORK_GROUPS_32_4_2:#[0-9]+]]
+}
+__attribute__((amdgpu_max_num_work_groups(32))) // expected-no-diagnostics
+__global__ void max_num_work_groups_32() {
+// CHECK: define{{.*}} amdgpu_kernel void @_Z22max_num_work_groups_32v() [[MAX_NUM_WORK_GROUPS_32_1_1:#[0-9]+]]
+}
+__attribute__((amdgpu_max_num_work_groups(32,1))) // expected-no-diagnostics
+__global__ void max_num_work_groups_32_1() {
+// CHECK: define{{.*}} amdgpu_kernel void @_Z24max_num_work_groups_32_1v() [[MAX_NUM_WORK_GROUPS_32_1_1:#[0-9]+]]
+}
+
+
+
+template<unsigned a>
+__attribute__((amdgpu_max_num_work_groups(a, 4, 2)))
+__global__ void template_a_4_2_max_num_work_groups() {}
+template __global__ void template_a_4_2_max_num_work_groups<32>();
+// CHECK: define{{.*}} amdgpu_kernel void @_Z34template_a_4_2_max_num_work_groupsILj32EEvv() [[MAX_NUM_WORK_GROUPS_32_4_2:#[0-9]+]]
+
+template<unsigned a>
+__attribute__((amdgpu_max_num_work_groups(32, a, 2)))
+__global__ void template_32_a_2_max_num_work_groups() {}
+template __global__ void template_32_a_2_max_num_work_groups<4>();
+// CHECK: define{{.*}} amdgpu_kernel void @_Z35template_32_a_2_max_num_work_groupsILj4EEvv() [[MAX_NUM_WORK_GROUPS_32_4_2:#[0-9]+]]
+
+template<unsigned a>
+__attribute__((amdgpu_max_num_work_groups(32, 4, a)))
+__global__ void template_32_4_a_max_num_work_groups() {}
+template __global__ void template_32_4_a_max_num_work_groups<2>();
+// CHECK: define{{.*}} amdgpu_kernel void @_Z35template_32_4_a_max_num_work_groupsILj2EEvv() [[MAX_NUM_WORK_GROUPS_32_4_2:#[0-9]+]]
 
 // Make sure this is silently accepted on other targets.
 // NAMD-NOT: "amdgpu-flat-work-group-size"
 // NAMD-NOT: "amdgpu-waves-per-eu"
 // NAMD-NOT: "amdgpu-num-vgpr"
 // NAMD-NOT: "amdgpu-num-sgpr"
+// NAMD-NOT: "amdgpu-max-num-work-groups"
 
 // DEFAULT-DAG: attributes [[FLAT_WORK_GROUP_SIZE_DEFAULT]] = {{.*}}"amdgpu-flat-work-group-size"="1,1024"{{.*}}"uniform-work-group-size"="true"
 // MAX1024-DAG: attributes [[FLAT_WORK_GROUP_SIZE_DEFAULT]] = {{.*}}"amdgpu-flat-work-group-size"="1,1024"
@@ -53,5 +86,7 @@ __global__ void num_vgpr_64() {
 // CHECK-DAG: attributes [[WAVES_PER_EU_2]] = {{.*}}"amdgpu-waves-per-eu"="2"
 // CHECK-DAG: attributes [[NUM_SGPR_32]] = {{.*}}"amdgpu-num-sgpr"="32"
 // CHECK-DAG: attributes [[NUM_VGPR_64]] = {{.*}}"amdgpu-num-vgpr"="64"
+// CHECK-DAG: attributes [[MAX_NUM_WORK_GROUPS_32_4_2]] = {{.*}}"amdgpu-max-num-workgroups"="32,4,2"
+// CHECK-DAG: attributes [[MAX_NUM_WORK_GROUPS_32_1_1]] = {{.*}}"amdgpu-max-num-workgroups"="32,1,1"
 
 // NOUB-NOT: "uniform-work-group-size"="true"
diff --git a/clang/test/CodeGenOpenCL/amdgpu-attrs.cl b/clang/test/CodeGenOpenCL/amdgpu-attrs.cl
index b0dfc97b53b2..5648bc13458e 100644
--- a/clang/test/CodeGenOpenCL/amdgpu-attrs.cl
+++ b/clang/test/CodeGenOpenCL/amdgpu-attrs.cl
@@ -139,6 +139,46 @@ kernel void reqd_work_group_size_32_2_1_flat_work_group_size_16_128() {
 // CHECK: define{{.*}} amdgpu_kernel void @reqd_work_group_size_32_2_1_flat_work_group_size_16_128() [[FLAT_WORK_GROUP_SIZE_16_128:#[0-9]+]]
 }
 
+__attribute__((amdgpu_max_num_work_groups(1, 1, 1))) // expected-no-diagnostics
+kernel void max_num_work_groups_1_1_1() {
+// CHECK: define{{.*}} amdgpu_kernel void @max_num_work_groups_1_1_1() [[MAX_NUM_WORK_GROUPS_1_1_1:#[0-9]+]]
+}
+
+__attribute__((amdgpu_max_num_work_groups(32, 1, 1))) // expected-no-diagnostics
+kernel void max_num_work_groups_32_1_1() {
+// CHECK: define{{.*}} amdgpu_kernel void @max_num_work_groups_32_1_1() [[MAX_NUM_WORK_GROUPS_32_1_1:#[0-9]+]]
+}
+
+__attribute__((amdgpu_max_num_work_groups(32, 8, 1))) // expected-no-diagnostics
+kernel void max_num_work_groups_32_8_1() {
+// CHECK: define{{.*}} amdgpu_kernel void @max_num_work_groups_32_8_1() [[MAX_NUM_WORK_GROUPS_32_8_1:#[0-9]+]]
+}
+
+__attribute__((amdgpu_max_num_work_groups(1, 1, 32))) // expected-no-diagnostics
+kernel void max_num_work_groups_1_1_32() {
+// CHECK: define{{.*}} amdgpu_kernel void @max_num_work_groups_1_1_32() [[MAX_NUM_WORK_GROUPS_1_1_32:#[0-9]+]]
+}
+
+__attribute__((amdgpu_max_num_work_groups(1, 8, 32))) // expected-no-diagnostics
+kernel void max_num_work_groups_1_8_32() {
+// CHECK: define{{.*}} amdgpu_kernel void @max_num_work_groups_1_8_32() [[MAX_NUM_WORK_GROUPS_1_8_32:#[0-9]+]]
+}
+
+__attribute__((amdgpu_max_num_work_groups(4, 8, 32))) // expected-no-diagnostics
+kernel void max_num_work_groups_4_8_32() {
+// CHECK: define{{.*}} amdgpu_kernel void @max_num_work_groups_4_8_32() [[MAX_NUM_WORK_GROUPS_4_8_32:#[0-9]+]]
+}
+
+__attribute__((amdgpu_max_num_work_groups(32))) // expected-no-diagnostics
+kernel void max_num_work_groups_32() {
+// CHECK: define{{.*}} amdgpu_kernel void @max_num_work_groups_32() [[MAX_NUM_WORK_GROUPS_32_1_1:#[0-9]+]]
+}
+
+__attribute__((amdgpu_max_num_work_groups(32,1))) // expected-no-diagnostics
+kernel void max_num_work_groups_32_1() {
+// CHECK: define{{.*}} amdgpu_kernel void @max_num_work_groups_32_1() [[MAX_NUM_WORK_GROUPS_32_1_1:#[0-9]+]]
+}
+
 void a_function() {
 // CHECK: define{{.*}} void @a_function() [[A_FUNCTION:#[0-9]+]]
 }
@@ -189,5 +229,12 @@ kernel void default_kernel() {
 // CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_NUM_SGPR_32_NUM_VGPR_64]] = {{.*}} "amdgpu-flat-work-group-size"="32,64" "amdgpu-num-sgpr"="32" "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2"
 // CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_4_NUM_SGPR_32_NUM_VGPR_64]] = {{.*}} "amdgpu-flat-work-group-size"="32,64" "amdgpu-num-sgpr"="32" "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2,4"
 
+// CHECK-DAG: attributes [[MAX_NUM_WORK_GROUPS_1_1_1]] = {{.*}} "amdgpu-max-num-workgroups"="1,1,1"
+// CHECK-DAG: attributes [[MAX_NUM_WORK_GROUPS_32_1_1]] = {{.*}} "amdgpu-max-num-workgroups"="32,1,1"
+// CHECK-DAG: attributes [[MAX_NUM_WORK_GROUPS_32_8_1]] = {{.*}} "amdgpu-max-num-workgroups"="32,8,1"
+// CHECK-DAG: attributes [[MAX_NUM_WORK_GROUPS_1_1_32]] = {{.*}} "amdgpu-max-num-workgroups"="1,1,32"
+// CHECK-DAG: attributes [[MAX_NUM_WORK_GROUPS_1_8_32]] = {{.*}} "amdgpu-max-num-workgroups"="1,8,32"
+// CHECK-DAG: attributes [[MAX_NUM_WORK_GROUPS_4_8_32]] = {{.*}} "amdgpu-max-num-workgroups"="4,8,32"
+
 // CHECK-DAG: attributes [[A_FUNCTION]] = {{.*}}
 // CHECK-DAG: attributes [[DEFAULT_KERNEL_ATTRS]] = {{.*}} "amdgpu-flat-work-group-size"="1,256"
diff --git a/clang/test/Misc/pragma-attribute-supported-attributes-list.test b/clang/test/Misc/pragma-attribute-supported-attributes-list.test
index ec84ebdc6abe..318bfb2df2a7 100644
--- a/clang/test/Misc/pragma-attribute-supported-attributes-list.test
+++ b/clang/test/Misc/pragma-attribute-supported-attributes-list.test
@@ -4,6 +4,7 @@
 
 // CHECK: #pragma clang attribute supports the following attributes:
 // CHECK-NEXT: AMDGPUFlatWorkGroupSize (SubjectMatchRule_function)
+// CHECK-NEXT: AMDGPUMaxNumWorkGroups (SubjectMatchRule_function)
 // CHECK-NEXT: AMDGPUNumSGPR (SubjectMatchRule_function)
 // CHECK-NEXT: AMDGPUNumVGPR (SubjectMatchRule_function)
 // CHECK-NEXT: AMDGPUWavesPerEU (SubjectMatchRule_function)
diff --git a/clang/test/SemaCUDA/amdgpu-attrs.cu b/clang/test/SemaCUDA/amdgpu-attrs.cu
index 4811ef796c66..e04b32d121bc 100644
--- a/clang/test/SemaCUDA/amdgpu-attrs.cu
+++ b/clang/test/SemaCUDA/amdgpu-attrs.cu
@@ -63,6 +63,16 @@ __global__ void flat_work_group_size_32_64_waves_per_eu_2_num_sgpr_32_num_vgpr_6
 __attribute__((amdgpu_flat_work_group_size(32, 64), amdgpu_waves_per_eu(2, 4), amdgpu_num_sgpr(32), amdgpu_num_vgpr(64)))
 __global__ void flat_work_group_size_32_64_waves_per_eu_2_4_num_sgpr_32_num_vgpr_64() {}
 
+__attribute__((amdgpu_max_num_work_groups(32, 1, 1)))
+__global__ void max_num_work_groups_32_1_1() {}
+
+__attribute__((amdgpu_max_num_work_groups(32, 1, 1), amdgpu_flat_work_group_size(32, 64)))
+__global__ void max_num_work_groups_32_1_1_flat_work_group_size_32_64() {}
+
+__attribute__((amdgpu_max_num_work_groups(32, 1, 1), amdgpu_flat_work_group_size(32, 64), amdgpu_waves_per_eu(2, 4), amdgpu_num_sgpr(32), amdgpu_num_vgpr(64)))
+__global__ void max_num_work_groups_32_1_1_flat_work_group_size_32_64_waves_per_eu_2_4_num_sgpr_32_num_vgpr_64() {}
+
+
 // expected-error@+2{{attribute 'reqd_work_group_size' can only be applied to an OpenCL kernel function}}
 __attribute__((reqd_work_group_size(32, 64, 64)))
 __global__ void reqd_work_group_size_32_64_64() {}
@@ -194,3 +204,125 @@ __global__ void non_cexpr_waves_per_eu_2() {}
 // expected-error@+1{{'amdgpu_waves_per_eu' attribute requires parameter 1 to be an integer constant}}
 __attribute__((amdgpu_waves_per_eu(2, ipow2(2))))
 __global__ void non_cexpr_waves_per_eu_2_4() {}
+
+__attribute__((amdgpu_max_num_work_groups(32)))
+__global__ void max_num_work_groups_32() {}
+
+__attribute__((amdgpu_max_num_work_groups(32, 1)))
+__global__ void max_num_work_groups_32_1() {}
+
+// expected-error@+1{{'amdgpu_max_num_work_groups' attribute takes no more than 3 arguments}}
+__attribute__((amdgpu_max_num_work_groups(32, 1, 1, 1)))
+__global__ void max_num_work_groups_32_1_1_1() {}
+
+// expected-error@+1{{'amdgpu_max_num_work_groups' attribute takes at least 1 argument}}
+__attribute__((amdgpu_max_num_work_groups()))
+__global__ void max_num_work_groups_no_arg() {}
+
+// expected-error@+1{{expected expression}}
+__attribute__((amdgpu_max_num_work_groups(,1,1)))
+__global__ void max_num_work_groups_empty_1_1() {}
+
+// expected-error@+1{{expected expression}}
+__attribute__((amdgpu_max_num_work_groups(32,,1)))
+__global__ void max_num_work_groups_32_empty_1() {}
+
+// expected-error@+1{{'amdgpu_max_num_work_groups' attribute requires parameter 0 to be an integer constant}}
+__attribute__((amdgpu_max_num_work_groups(ipow2(5), 1, 1)))
+__global__ void max_num_work_groups_32_1_1_non_int_arg0() {}
+
+// expected-error@+1{{'amdgpu_max_num_work_groups' attribute requires parameter 1 to be an integer constant}}
+__attribute__((amdgpu_max_num_work_groups(32, "1", 1)))
+__global__ void max_num_work_groups_32_1_1_non_int_arg1() {}
+
+// expected-error@+1{{'amdgpu_max_num_work_groups' attribute requires a non-negative integral compile time constant expression}}
+__attribute__((amdgpu_max_num_work_groups(-32, 1, 1)))
+__global__ void max_num_work_groups_32_1_1_neg_int_arg0() {}
+
+// expected-error@+1{{'amdgpu_max_num_work_groups' attribute requires a non-negative integral compile time constant expression}}
+__attribute__((amdgpu_max_num_work_groups(32, -1, 1)))
+__global__ void max_num_work_groups_32_1_1_neg_int_arg1() {}
+
+// expected-error@+1{{'amdgpu_max_num_work_groups' attribute requires a non-negative integral compile time constant expression}}
+__attribute__((amdgpu_max_num_work_groups(32, 1, -1)))
+__global__ void max_num_work_groups_32_1_1_neg_int_arg2() {}
+
+// expected-error@+1{{'amdgpu_max_num_work_groups' attribute must be greater than 0}}
+__attribute__((amdgpu_max_num_work_groups(0, 1, 1)))
+__global__ void max_num_work_groups_0_1_1() {}
+
+// expected-error@+1{{'amdgpu_max_num_work_groups' attribute must be greater than 0}}
+__attribute__((amdgpu_max_num_work_groups(32, 0, 1)))
+__global__ void max_num_work_groups_32_0_1() {}
+
+// expected-error@+1{{'amdgpu_max_num_work_groups' attribute must be greater than 0}}
+__attribute__((amdgpu_max_num_work_groups(32, 1, 0)))
+__global__ void max_num_work_groups_32_1_0() {}
+
+__attribute__((amdgpu_max_num_work_groups(4294967295)))
+__global__ void max_num_work_groups_max_unsigned_int() {}
+
+// expected-error@+1{{integer constant expression evaluates to value 4294967296 that cannot be represented in a 32-bit unsigned integer type}}
+__attribute__((amdgpu_max_num_work_groups(4294967296)))
+__global__ void max_num_work_groups_max_unsigned_int_plus1() {}
+
+// expected-error@+1{{integer constant expression evaluates to value 10000000000 that cannot be represented in a 32-bit unsigned integer type}}
+__attribute__((amdgpu_max_num_work_groups(10000000000)))
+__global__ void max_num_work_groups_too_large() {}
+
+int num_wg_x = 32;
+int num_wg_y = 1;
+int num_wg_z = 1;
+// expected-error@+1{{'amdgpu_max_num_work_groups' attribute requires parameter 0 to be an integer constant}}
+__attribute__((amdgpu_max_num_work_groups(num_wg_x, 1, 1)))
+__global__ void max_num_work_groups_32_1_1_non_const_arg0() {}
+
+// expected-error@+1{{'amdgpu_max_num_work_groups' attribute requires parameter 1 to be an integer constant}}
+__attribute__((amdgpu_max_num_work_groups(32, num_wg_y, 1)))
+__global__ void max_num_work_groups_32_1_1_non_const_arg1() {}
+
+// expected-error@+1{{'amdgpu_max_num_work_groups' attribute requires parameter 2 to be an integer constant}}
+__attribute__((amdgpu_max_num_work_groups(32, 1, num_wg_z)))
+__global__ void max_num_work_groups_32_1_1_non_const_arg2() {}
+
+const int c_num_wg_x = 32;
+__attribute__((amdgpu_max_num_work_groups(c_num_wg_x, 1, 1)))
+__global__ void max_num_work_groups_32_1_1_const_arg0() {}
+
+template<unsigned a>
+__attribute__((amdgpu_max_num_work_groups(a, 1, 1)))
+__global__ void template_a_1_1_max_num_work_groups() {}
+template __global__ void template_a_1_1_max_num_work_groups<32>();
+
+template<unsigned a>
+__attribute__((amdgpu_max_num_work_groups(32, a, 1)))
+__global__ void template_32_a_1_max_num_work_groups() {}
+template __global__ void template_32_a_1_max_num_work_groups<1>();
+
+template<unsigned a>
+__attribute__((amdgpu_max_num_work_groups(32, 1, a)))
+__global__ void template_32_1_a_max_num_work_groups() {}
+template __global__ void template_32_1_a_max_num_work_groups<1>();
+
+// expected-error@+3{{'amdgpu_max_num_work_groups' attribute must be greater than 0}}
+// expected-note@+4{{in instantiation of}}
+template<unsigned b>
+__attribute__((amdgpu_max_num_work_groups(b, 1, 1)))
+__global__ void template_b_1_1_max_num_work_groups() {}
+template __global__ void template_b_1_1_max_num_work_groups<0>();
+
+// expected-error@+3{{'amdgpu_max_num_work_groups' attribute must be greater than 0}}
+// expected-note@+4{{in instantiation of}}
+template<unsigned b>
+__attribute__((amdgpu_max_num_work_groups(32, b, 1)))
+__global__ void template_32_b_1_max_num_work_groups() {}
+template __global__ void template_32_b_1_max_num_work_groups<0>();
+
+// expected-error@+3{{'amdgpu_max_num_work_groups' attribute must be greater than 0}}
+// expected-note@+4{{in instantiation of}}
+template<unsigned b>
+__attribute__((amdgpu_max_num_work_groups(32, 1, b)))
+__global__ void template_32_1_b_max_num_work_groups() {}
+template __global__ void template_32_1_b_max_num_work_groups<0>();
+
+
diff --git a/llvm/docs/AMDGPUUsage.rst b/llvm/docs/AMDGPUUsage.rst
index f5f37d9e8a3b..99d7a482710f 100644
--- a/llvm/docs/AMDGPUUsage.rst
+++ b/llvm/docs/AMDGPUUsage.rst
@@ -1442,6 +1442,11 @@ The AMDGPU backend supports the following LLVM IR attributes.
                                              the frame. This is an internal detail of how LDS variables are lowered,
                                              language front ends should not set this attribute.
 
+     "amdgpu-max-num-workgroups"="x,y,z"     Specify the maximum number of work groups for the kernel dispatch in the
+                                             X, Y, and Z dimensions. Generated by the ``amdgpu_max_num_work_groups``
+                                             CLANG attribute [CLANG-ATTR]_. Clang only emits this attribute when all
+                                             the three numbers are >= 1.
+
      ======================================= ==========================================================
 
 Calling Conventions
@@ -3917,6 +3922,11 @@ same *vendor-name*.
 
                                                                   If omitted, "normal" is
                                                                   assumed.
+     ".max_num_work_groups_{x,y,z}"      integer                  The max number of
+                                                                  launched work-groups
+                                                                  in the X, Y, and Z
+                                                                  dimensions. Each number
+                                                                  must be >=1.
      =================================== ============== ========= ================================
 
 ..
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp
index c20fdd51607a..9e288ab50e17 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp
@@ -494,6 +494,14 @@ MetadataStreamerMsgPackV4::getHSAKernelProps(const MachineFunction &MF,
 
   Kern[".max_flat_workgroup_size"] =
       Kern.getDocument()->getNode(MFI.getMaxFlatWorkGroupSize());
+  unsigned NumWGX = MFI.getMaxNumWorkGroupsX();
+  unsigned NumWGY = MFI.getMaxNumWorkGroupsY();
+  unsigned NumWGZ = MFI.getMaxNumWorkGroupsZ();
+  if (NumWGX != 0 && NumWGY != 0 && NumWGZ != 0) {
+    Kern[".max_num_workgroups_x"] = Kern.getDocument()->getNode(NumWGX);
+    Kern[".max_num_workgroups_y"] = Kern.getDocument()->getNode(NumWGY);
+    Kern[".max_num_workgroups_z"] = Kern.getDocument()->getNode(NumWGZ);
+  }
   Kern[".sgpr_spill_count"] =
       Kern.getDocument()->getNode(MFI.getNumSpilledSGPRs());
   Kern[".vgpr_spill_count"] =
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
index bcc7dedf3229..fa77b94fc22d 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
@@ -432,7 +432,7 @@ std::pair<unsigned, unsigned> AMDGPUSubtarget::getEffectiveWavesPerEU(
   std::pair<unsigned, unsigned> Default(1, getMaxWavesPerEU());
 
   // If minimum/maximum flat work group sizes were explicitly requested using
-  // "amdgpu-flat-work-group-size" attribute, then set default minimum/maximum
+  // "amdgpu-flat-workgroup-size" attribute, then set default minimum/maximum
   // number of waves per execution unit to values implied by requested
   // minimum/maximum flat work group sizes.
   unsigned MinImpliedByFlatWorkGroupSize =
@@ -1108,3 +1108,8 @@ void GCNUserSGPRUsageInfo::allocKernargPreloadSGPRs(unsigned NumSGPRs) {
 unsigned GCNUserSGPRUsageInfo::getNumFreeUserSGPRs() {
   return AMDGPU::getMaxNumUserSGPRs(ST) - NumUsedUserSGPRs;
 }
+
+SmallVector<unsigned>
+AMDGPUSubtarget::getMaxNumWorkGroups(const Function &F) const {
+  return AMDGPU::getIntegerVecAttribute(F, "amdgpu-max-num-workgroups", 3);
+}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
index b72697973be7..e2d8b5d1ce97 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
@@ -288,6 +288,9 @@ public:
   /// 2) dimension.
   unsigned getMaxWorkitemID(const Function &Kernel, unsigned Dimension) const;
 
+  /// Return the number of work groups for the function.
+  SmallVector<unsigned> getMaxNumWorkGroups(const Function &F) const;
+
   /// Return true if only a single workitem can be active in a wave.
   bool isSingleLaneExecution(const Function &Kernel) const;
 
diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
index 52d6fe6c7ba5..2569f40fec0e 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
@@ -46,6 +46,8 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const Function &F,
   const GCNSubtarget &ST = *static_cast<const GCNSubtarget *>(STI);
   FlatWorkGroupSizes = ST.getFlatWorkGroupSizes(F);
   WavesPerEU = ST.getWavesPerEU(F);
+  MaxNumWorkGroups = ST.getMaxNumWorkGroups(F);
+  assert(MaxNumWorkGroups.size() == 3);
 
   Occupancy = ST.computeOccupancy(F, getLDSSize());
   CallingConv::ID CC = F.getCallingConv();
diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
index 0336ec4985ea..7d0c1ba8448e 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
@@ -426,6 +426,9 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction,
 
   const AMDGPUGWSResourcePseudoSourceValue GWSResourcePSV;
 
+  // Default/requested number of work groups for the function.
+  SmallVector<unsigned> MaxNumWorkGroups = {0, 0, 0};
+
 private:
   unsigned NumUserSGPRs = 0;
   unsigned NumSystemSGPRs = 0;
@@ -1072,6 +1075,13 @@ public:
 
   // \returns true if a function needs or may need AGPRs.
   bool usesAGPRs(const MachineFunction &MF) const;
+
+  /// \returns Default/requested number of work groups for this function.
+  SmallVector<unsigned> getMaxNumWorkGroups() const { return MaxNumWorkGroups; }
+
+  unsigned getMaxNumWorkGroupsX() const { return MaxNumWorkGroups[0]; }
+  unsigned getMaxNumWorkGroupsY() const { return MaxNumWorkGroups[1]; }
+  unsigned getMaxNumWorkGroupsZ() const { return MaxNumWorkGroups[2]; }
 };
 
 } // end namespace llvm
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index edb0e50da289..aa47dccf2dd2 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -11,6 +11,7 @@
 #include "AMDGPUAsmUtils.h"
 #include "AMDKernelCodeT.h"
 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "llvm/ADT/StringExtras.h"
 #include "llvm/BinaryFormat/ELF.h"
 #include "llvm/IR/Attributes.h"
 #include "llvm/IR/Constants.h"
@@ -1298,6 +1299,42 @@ getIntegerPairAttribute(const Function &F, StringRef Name,
   return Ints;
 }
 
+SmallVector<unsigned> getIntegerVecAttribute(const Function &F, StringRef Name,
+                                             unsigned Size) {
+  assert(Size > 2);
+  SmallVector<unsigned> Default(Size, 0);
+
+  Attribute A = F.getFnAttribute(Name);
+  if (!A.isStringAttribute())
+    return Default;
+
+  SmallVector<unsigned> Vals(Size, 0);
+
+  LLVMContext &Ctx = F.getContext();
+
+  StringRef S = A.getValueAsString();
+  unsigned i = 0;
+  for (; !S.empty() && i < Size; i++) {
+    std::pair<StringRef, StringRef> Strs = S.split(',');
+    unsigned IntVal;
+    if (Strs.first.trim().getAsInteger(0, IntVal)) {
+      Ctx.emitError("can't parse integer attribute " + Strs.first + " in " +
+                    Name);
+      return Default;
+    }
+    Vals[i] = IntVal;
+    S = Strs.second;
+  }
+
+  if (!S.empty() || i < Size) {
+    Ctx.emitError("attribute " + Name +
+                  " has incorrect number of integers; expected " +
+                  llvm::utostr(Size));
+    return Default;
+  }
+  return Vals;
+}
+
 unsigned getVmcntBitMask(const IsaVersion &Version) {
   return (1 << (getVmcntBitWidthLo(Version.Major) +
                 getVmcntBitWidthHi(Version.Major))) -
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index d7ea2a3eff4b..f8521cba077c 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -863,6 +863,14 @@ bool isReadOnlySegment(const GlobalValue *GV);
 /// target triple \p TT, false otherwise.
 bool shouldEmitConstantsToTextSection(const Triple &TT);
 
+/// \returns Integer value requested using \p F's \p Name attribute.
+///
+/// \returns \p Default if attribute is not present.
+///
+/// \returns \p Default and emits error if requested value cannot be converted
+/// to integer.
+int getIntegerAttribute(const Function &F, StringRef Name, int Default);
+
 /// \returns A pair of integer values requested using \p F's \p Name attribute
 /// in "first[,second]" format ("second" is optional unless \p OnlyFirstRequired
 /// is false).
@@ -877,6 +885,16 @@ getIntegerPairAttribute(const Function &F, StringRef Name,
                         std::pair<unsigned, unsigned> Default,
                         bool OnlyFirstRequired = false);
 
+/// \returns Generate a vector of integer values requested using \p F's \p Name
+/// attribute.
+///
+/// \returns true if exactly Size (>2) number of integers are found in the
+/// attribute.
+///
+/// \returns false if any error occurs.
+SmallVector<unsigned> getIntegerVecAttribute(const Function &F, StringRef Name,
+                                             unsigned Size);
+
 /// Represents the counter values to wait for in an s_waitcnt instruction.
 ///
 /// Large values (including the maximum possible integer) can be used to
diff --git a/llvm/test/CodeGen/AMDGPU/attr-amdgpu-num-workgroups.ll b/llvm/test/CodeGen/AMDGPU/attr-amdgpu-num-workgroups.ll
new file mode 100644
index 000000000000..bc58222076ac
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/attr-amdgpu-num-workgroups.ll
@@ -0,0 +1,84 @@
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck %s
+
+; Attribute not specified.
+; CHECK-LABEL: {{^}}empty_no_attribute:
+define amdgpu_kernel void @empty_no_attribute() {
+entry:
+  ret void
+}
+
+; Ignore if number of work groups for x dimension is 0.
+; CHECK-LABEL: {{^}}empty_max_num_workgroups_x0:
+define amdgpu_kernel void @empty_max_num_workgroups_x0() #0 {
+entry:
+  ret void
+}
+attributes #0 = {"amdgpu-max-num-workgroups"="0,2,3"}
+
+; Ignore if number of work groups for y dimension is 0.
+; CHECK-LABEL: {{^}}empty_max_num_workgroups_y0:
+define amdgpu_kernel void @empty_max_num_workgroups_y0() #1 {
+entry:
+  ret void
+}
+attributes #1 = {"amdgpu-max-num-workgroups"="1,0,3"}
+
+; Ignore if number of work groups for z dimension is 0.
+; CHECK-LABEL: {{^}}empty_max_num_workgroups_z0:
+define amdgpu_kernel void @empty_max_num_workgroups_z0() #2 {
+entry:
+  ret void
+}
+attributes #2 = {"amdgpu-max-num-workgroups"="1,2,0"}
+
+; CHECK-LABEL: {{^}}empty_max_num_workgroups_1_2_3:
+define amdgpu_kernel void @empty_max_num_workgroups_1_2_3() #3 {
+entry:
+  ret void
+}
+attributes #3 = {"amdgpu-max-num-workgroups"="1,2,3"}
+
+; CHECK-LABEL: {{^}}empty_max_num_workgroups_1024_1024_1024:
+define amdgpu_kernel void @empty_max_num_workgroups_1024_1024_1024() #4 {
+entry:
+  ret void
+}
+attributes #4 = {"amdgpu-max-num-workgroups"="1024,1024,1024"}
+
+
+; CHECK: .amdgpu_metadata
+; CHECK: - .args:
+; CHECK:        .max_flat_workgroup_size: 1024
+; CHECK-NEXT:   .name:           empty_no_attribute
+; CHECK-NEXT:   .private_segment_fixed_size: 0
+
+; CHECK: - .args:
+; CHECK:        .max_flat_workgroup_size: 1024
+; CHECK-NEXT:   .name:           empty_max_num_workgroups_x0
+; CHECK-NEXT:   .private_segment_fixed_size: 0
+
+; CHECK: - .args:
+; CHECK:        .max_flat_workgroup_size: 1024
+; CHECK-NEXT:   .name:           empty_max_num_workgroups_y0
+; CHECK-NEXT:   .private_segment_fixed_size: 0
+
+; CHECK: - .args:
+; CHECK:        .max_flat_workgroup_size: 1024
+; CHECK-NEXT:   .name:           empty_max_num_workgroups_z0
+; CHECK-NEXT:   .private_segment_fixed_size: 0
+
+; CHECK: - .args:
+; CHECK:        .max_flat_workgroup_size: 1024
+; CHECK-NEXT:   .max_num_workgroups_x: 1
+; CHECK-NEXT:   .max_num_workgroups_y: 2
+; CHECK-NEXT:   .max_num_workgroups_z: 3
+; CHECK-NEXT:   .name:           empty_max_num_workgroups_1_2_3
+; CHECK-NEXT:   .private_segment_fixed_size: 0
+
+; CHECK: - .args:
+; CHECK:        .max_flat_workgroup_size: 1024
+; CHECK-NEXT:   .max_num_workgroups_x: 1024
+; CHECK-NEXT:   .max_num_workgroups_y: 1024
+; CHECK-NEXT:   .max_num_workgroups_z: 1024
+; CHECK-NEXT:   .name:           empty_max_num_workgroups_1024_1024_1024
+; CHECK-NEXT:   .private_segment_fixed_size: 0
diff --git a/llvm/test/CodeGen/AMDGPU/attr-amdgpu-num-workgroups_error_check.ll b/llvm/test/CodeGen/AMDGPU/attr-amdgpu-num-workgroups_error_check.ll
new file mode 100644
index 000000000000..6d86d2d7c1a3
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/attr-amdgpu-num-workgroups_error_check.ll
@@ -0,0 +1,71 @@
+; RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s 2>&1 | FileCheck --check-prefix=ERROR %s
+
+; ERROR: error: can't parse integer attribute -1 in amdgpu-max-num-workgroups
+define amdgpu_kernel void @empty_max_num_workgroups_neg_num1() #21 {
+entry:
+  ret void
+}
+attributes #21 = {"amdgpu-max-num-workgroups"="-1,2,3"}
+
+; ERROR: error: can't parse integer attribute -2 in amdgpu-max-num-workgroups
+define amdgpu_kernel void @empty_max_num_workgroups_neg_num2() #22 {
+entry:
+  ret void
+}
+attributes #22 = {"amdgpu-max-num-workgroups"="1,-2,3"}
+
+; ERROR: error: can't parse integer attribute -3 in amdgpu-max-num-workgroups
+define amdgpu_kernel void @empty_max_num_workgroups_neg_num3() #23 {
+entry:
+  ret void
+}
+attributes #23 = {"amdgpu-max-num-workgroups"="1,2,-3"}
+
+; ERROR: error: can't parse integer attribute 1.0 in amdgpu-max-num-workgroups
+define amdgpu_kernel void @empty_max_num_workgroups_non_int1() #31 {
+entry:
+  ret void
+}
+attributes #31 = {"amdgpu-max-num-workgroups"="1.0,2,3"}
+
+; ERROR: error: can't parse integer attribute 2.0 in amdgpu-max-num-workgroups
+define amdgpu_kernel void @empty_max_num_workgroups_non_int2() #32 {
+entry:
+  ret void
+}
+attributes #32 = {"amdgpu-max-num-workgroups"="1,2.0,3"}
+
+; ERROR: error: can't parse integer attribute 3.0 in amdgpu-max-num-workgroups
+define amdgpu_kernel void @empty_max_num_workgroups_non_int3() #33 {
+entry:
+  ret void
+}
+attributes #33 = {"amdgpu-max-num-workgroups"="1,2,3.0"}
+
+; ERROR: error: can't parse integer attribute 10000000000 in amdgpu-max-num-workgroups
+define amdgpu_kernel void @empty_max_num_workgroups_too_large() #41 {
+entry:
+  ret void
+}
+attributes #41 = {"amdgpu-max-num-workgroups"="10000000000,2,3"}
+
+; ERROR: error: attribute amdgpu-max-num-workgroups has incorrect number of integers; expected 3
+define amdgpu_kernel void @empty_max_num_workgroups_1_arg() #51 {
+entry:
+  ret void
+}
+attributes #51 = {"amdgpu-max-num-workgroups"="1"}
+
+; ERROR: error: attribute amdgpu-max-num-workgroups has incorrect number of integers; expected 3
+define amdgpu_kernel void @empty_max_num_workgroups_2_args() #52 {
+entry:
+  ret void
+}
+attributes #52 = {"amdgpu-max-num-workgroups"="1,2"}
+
+; ERROR: error: attribute amdgpu-max-num-workgroups has incorrect number of integers; expected 3
+define amdgpu_kernel void @empty_max_num_workgroups_4_args() #53 {
+entry:
+  ret void
+}
+attributes #53 = {"amdgpu-max-num-workgroups"="1,2,3,4"}
-- 
cgit v1.2.3


From c1af6ab505a83bfb4fc8752591ad333190bc9389 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Tue, 12 Mar 2024 17:29:30 +0000
Subject: [X86] getFauxShuffleMask - recognise CONCAT(SUB0, SUB1) style
 patterns

Handles the INSERT_SUBVECTOR(INSERT_SUBVECTOR(UNDEF,SUB0,0),SUB1,N) pattern

Currently limited to v8i64/v8f64 cases as only AVX512 has decent cross lane 2-input shuffles, the plan is to relax this as I deal with some regressions
---
 llvm/lib/Target/X86/X86ISelLowering.cpp            |  17 ++
 .../X86/vector-interleaved-store-i16-stride-7.ll   |  10 +-
 .../X86/vector-interleaved-store-i16-stride-8.ll   |  28 +--
 .../X86/vector-interleaved-store-i32-stride-7.ll   |  20 +-
 .../X86/vector-interleaved-store-i32-stride-8.ll   | 280 ++++++++++-----------
 .../X86/vector-interleaved-store-i64-stride-4.ll   | 120 +++++----
 6 files changed, 233 insertions(+), 242 deletions(-)

diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 72b45d462dfe..2b5e3c0379a1 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -5858,6 +5858,23 @@ static bool getFauxShuffleMask(SDValue N, const APInt &DemandedElts,
       Ops.push_back(SubBCSrc);
       return true;
     }
+    // Handle CONCAT(SUB0, SUB1).
+    // Limit this to vXi64 512-bit vector cases to make the most of AVX512
+    // cross lane shuffles.
+    if (Depth > 0 && InsertIdx == NumSubElts && NumElts == (2 * NumSubElts) &&
+        NumBitsPerElt == 64 && NumSizeInBits == 512 &&
+        Src.getOpcode() == ISD::INSERT_SUBVECTOR &&
+        Src.getOperand(0).isUndef() &&
+        Src.getOperand(1).getValueType() == SubVT &&
+        Src.getConstantOperandVal(2) == 0) {
+      for (int i = 0; i != (int)NumSubElts; ++i)
+        Mask.push_back(i);
+      for (int i = 0; i != (int)NumSubElts; ++i)
+        Mask.push_back(i + NumElts);
+      Ops.push_back(Src.getOperand(1));
+      Ops.push_back(Sub);
+      return true;
+    }
     // Handle INSERT_SUBVECTOR(SRC0, SHUFFLE(SRC1)).
     SmallVector<int, 64> SubMask;
     SmallVector<SDValue, 2> SubInputs;
diff --git a/llvm/test/CodeGen/X86/vector-interleaved-store-i16-stride-7.ll b/llvm/test/CodeGen/X86/vector-interleaved-store-i16-stride-7.ll
index 79cc8e49f1fd..9e70aef86885 100644
--- a/llvm/test/CodeGen/X86/vector-interleaved-store-i16-stride-7.ll
+++ b/llvm/test/CodeGen/X86/vector-interleaved-store-i16-stride-7.ll
@@ -821,9 +821,8 @@ define void @store_i16_stride7_vf4(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
 ; AVX512BW-FCP-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
 ; AVX512BW-FCP-NEXT:    vpmovsxbq {{.*#+}} ymm1 = [0,2,4,0]
 ; AVX512BW-FCP-NEXT:    vpermi2q %ymm3, %ymm0, %ymm1
-; AVX512BW-FCP-NEXT:    vinserti64x4 $1, %ymm1, %zmm2, %zmm0
-; AVX512BW-FCP-NEXT:    vpmovsxbw {{.*#+}} zmm1 = [0,4,8,12,16,20,24,1,5,9,13,17,21,25,2,6,10,14,18,22,26,3,7,11,15,19,23,27,0,0,0,0]
-; AVX512BW-FCP-NEXT:    vpermw %zmm0, %zmm1, %zmm0
+; AVX512BW-FCP-NEXT:    vpmovsxbw {{.*#+}} zmm0 = [0,4,8,12,32,36,40,1,5,9,13,33,37,41,2,6,10,14,34,38,42,3,7,11,15,35,39,43,0,0,0,0]
+; AVX512BW-FCP-NEXT:    vpermi2w %zmm1, %zmm2, %zmm0
 ; AVX512BW-FCP-NEXT:    vextracti32x4 $2, %zmm0, 32(%rax)
 ; AVX512BW-FCP-NEXT:    vextracti32x4 $3, %zmm0, %xmm1
 ; AVX512BW-FCP-NEXT:    vmovq %xmm1, 48(%rax)
@@ -873,9 +872,8 @@ define void @store_i16_stride7_vf4(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
 ; AVX512DQ-BW-FCP-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
 ; AVX512DQ-BW-FCP-NEXT:    vpmovsxbq {{.*#+}} ymm1 = [0,2,4,0]
 ; AVX512DQ-BW-FCP-NEXT:    vpermi2q %ymm3, %ymm0, %ymm1
-; AVX512DQ-BW-FCP-NEXT:    vinserti64x4 $1, %ymm1, %zmm2, %zmm0
-; AVX512DQ-BW-FCP-NEXT:    vpmovsxbw {{.*#+}} zmm1 = [0,4,8,12,16,20,24,1,5,9,13,17,21,25,2,6,10,14,18,22,26,3,7,11,15,19,23,27,0,0,0,0]
-; AVX512DQ-BW-FCP-NEXT:    vpermw %zmm0, %zmm1, %zmm0
+; AVX512DQ-BW-FCP-NEXT:    vpmovsxbw {{.*#+}} zmm0 = [0,4,8,12,32,36,40,1,5,9,13,33,37,41,2,6,10,14,34,38,42,3,7,11,15,35,39,43,0,0,0,0]
+; AVX512DQ-BW-FCP-NEXT:    vpermi2w %zmm1, %zmm2, %zmm0
 ; AVX512DQ-BW-FCP-NEXT:    vextracti32x4 $2, %zmm0, 32(%rax)
 ; AVX512DQ-BW-FCP-NEXT:    vextracti32x4 $3, %zmm0, %xmm1
 ; AVX512DQ-BW-FCP-NEXT:    vmovq %xmm1, 48(%rax)
diff --git a/llvm/test/CodeGen/X86/vector-interleaved-store-i16-stride-8.ll b/llvm/test/CodeGen/X86/vector-interleaved-store-i16-stride-8.ll
index 194b715b6594..32825f291e98 100644
--- a/llvm/test/CodeGen/X86/vector-interleaved-store-i16-stride-8.ll
+++ b/llvm/test/CodeGen/X86/vector-interleaved-store-i16-stride-8.ll
@@ -762,10 +762,9 @@ define void @store_i16_stride8_vf4(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
 ; AVX512BW-NEXT:    vpunpcklqdq {{.*#+}} xmm3 = xmm4[0],xmm3[0]
 ; AVX512BW-NEXT:    vinserti128 $1, %xmm3, %ymm2, %ymm2
 ; AVX512BW-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
-; AVX512BW-NEXT:    vinserti64x4 $1, %ymm2, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpmovsxbw {{.*#+}} zmm1 = [0,4,8,12,16,20,24,28,1,5,9,13,17,21,25,29,2,6,10,14,18,22,26,30,3,7,11,15,19,23,27,31]
-; AVX512BW-NEXT:    vpermw %zmm0, %zmm1, %zmm0
-; AVX512BW-NEXT:    vmovdqa64 %zmm0, (%rax)
+; AVX512BW-NEXT:    vpmovsxbw {{.*#+}} zmm1 = [0,4,8,12,32,36,40,44,1,5,9,13,33,37,41,45,2,6,10,14,34,38,42,46,3,7,11,15,35,39,43,47]
+; AVX512BW-NEXT:    vpermi2w %zmm2, %zmm0, %zmm1
+; AVX512BW-NEXT:    vmovdqa64 %zmm1, (%rax)
 ; AVX512BW-NEXT:    vzeroupper
 ; AVX512BW-NEXT:    retq
 ;
@@ -788,10 +787,9 @@ define void @store_i16_stride8_vf4(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
 ; AVX512BW-FCP-NEXT:    vpunpcklqdq {{.*#+}} xmm3 = xmm4[0],xmm3[0]
 ; AVX512BW-FCP-NEXT:    vinserti128 $1, %xmm3, %ymm2, %ymm2
 ; AVX512BW-FCP-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
-; AVX512BW-FCP-NEXT:    vinserti64x4 $1, %ymm2, %zmm0, %zmm0
-; AVX512BW-FCP-NEXT:    vpmovsxbw {{.*#+}} zmm1 = [0,4,8,12,16,20,24,28,1,5,9,13,17,21,25,29,2,6,10,14,18,22,26,30,3,7,11,15,19,23,27,31]
-; AVX512BW-FCP-NEXT:    vpermw %zmm0, %zmm1, %zmm0
-; AVX512BW-FCP-NEXT:    vmovdqa64 %zmm0, (%rax)
+; AVX512BW-FCP-NEXT:    vpmovsxbw {{.*#+}} zmm1 = [0,4,8,12,32,36,40,44,1,5,9,13,33,37,41,45,2,6,10,14,34,38,42,46,3,7,11,15,35,39,43,47]
+; AVX512BW-FCP-NEXT:    vpermi2w %zmm2, %zmm0, %zmm1
+; AVX512BW-FCP-NEXT:    vmovdqa64 %zmm1, (%rax)
 ; AVX512BW-FCP-NEXT:    vzeroupper
 ; AVX512BW-FCP-NEXT:    retq
 ;
@@ -814,10 +812,9 @@ define void @store_i16_stride8_vf4(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
 ; AVX512DQ-BW-NEXT:    vpunpcklqdq {{.*#+}} xmm3 = xmm4[0],xmm3[0]
 ; AVX512DQ-BW-NEXT:    vinserti128 $1, %xmm3, %ymm2, %ymm2
 ; AVX512DQ-BW-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
-; AVX512DQ-BW-NEXT:    vinserti64x4 $1, %ymm2, %zmm0, %zmm0
-; AVX512DQ-BW-NEXT:    vpmovsxbw {{.*#+}} zmm1 = [0,4,8,12,16,20,24,28,1,5,9,13,17,21,25,29,2,6,10,14,18,22,26,30,3,7,11,15,19,23,27,31]
-; AVX512DQ-BW-NEXT:    vpermw %zmm0, %zmm1, %zmm0
-; AVX512DQ-BW-NEXT:    vmovdqa64 %zmm0, (%rax)
+; AVX512DQ-BW-NEXT:    vpmovsxbw {{.*#+}} zmm1 = [0,4,8,12,32,36,40,44,1,5,9,13,33,37,41,45,2,6,10,14,34,38,42,46,3,7,11,15,35,39,43,47]
+; AVX512DQ-BW-NEXT:    vpermi2w %zmm2, %zmm0, %zmm1
+; AVX512DQ-BW-NEXT:    vmovdqa64 %zmm1, (%rax)
 ; AVX512DQ-BW-NEXT:    vzeroupper
 ; AVX512DQ-BW-NEXT:    retq
 ;
@@ -840,10 +837,9 @@ define void @store_i16_stride8_vf4(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
 ; AVX512DQ-BW-FCP-NEXT:    vpunpcklqdq {{.*#+}} xmm3 = xmm4[0],xmm3[0]
 ; AVX512DQ-BW-FCP-NEXT:    vinserti128 $1, %xmm3, %ymm2, %ymm2
 ; AVX512DQ-BW-FCP-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
-; AVX512DQ-BW-FCP-NEXT:    vinserti64x4 $1, %ymm2, %zmm0, %zmm0
-; AVX512DQ-BW-FCP-NEXT:    vpmovsxbw {{.*#+}} zmm1 = [0,4,8,12,16,20,24,28,1,5,9,13,17,21,25,29,2,6,10,14,18,22,26,30,3,7,11,15,19,23,27,31]
-; AVX512DQ-BW-FCP-NEXT:    vpermw %zmm0, %zmm1, %zmm0
-; AVX512DQ-BW-FCP-NEXT:    vmovdqa64 %zmm0, (%rax)
+; AVX512DQ-BW-FCP-NEXT:    vpmovsxbw {{.*#+}} zmm1 = [0,4,8,12,32,36,40,44,1,5,9,13,33,37,41,45,2,6,10,14,34,38,42,46,3,7,11,15,35,39,43,47]
+; AVX512DQ-BW-FCP-NEXT:    vpermi2w %zmm2, %zmm0, %zmm1
+; AVX512DQ-BW-FCP-NEXT:    vmovdqa64 %zmm1, (%rax)
 ; AVX512DQ-BW-FCP-NEXT:    vzeroupper
 ; AVX512DQ-BW-FCP-NEXT:    retq
   %in.vec0 = load <4 x i16>, ptr %in.vecptr0, align 64
diff --git a/llvm/test/CodeGen/X86/vector-interleaved-store-i32-stride-7.ll b/llvm/test/CodeGen/X86/vector-interleaved-store-i32-stride-7.ll
index 837d990596a5..45a76599d3e9 100644
--- a/llvm/test/CodeGen/X86/vector-interleaved-store-i32-stride-7.ll
+++ b/llvm/test/CodeGen/X86/vector-interleaved-store-i32-stride-7.ll
@@ -227,9 +227,8 @@ define void @store_i32_stride7_vf2(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
 ; AVX512-FCP-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
 ; AVX512-FCP-NEXT:    vpmovsxbq {{.*#+}} ymm1 = [0,2,4,0]
 ; AVX512-FCP-NEXT:    vpermi2q %ymm3, %ymm0, %ymm1
-; AVX512-FCP-NEXT:    vinserti64x4 $1, %ymm1, %zmm2, %zmm0
-; AVX512-FCP-NEXT:    vpmovsxbd {{.*#+}} zmm1 = [0,2,4,6,8,10,12,1,3,5,7,9,11,13,0,0]
-; AVX512-FCP-NEXT:    vpermd %zmm0, %zmm1, %zmm0
+; AVX512-FCP-NEXT:    vpmovsxbd {{.*#+}} zmm0 = [0,2,4,6,16,18,20,1,3,5,7,17,19,21,0,0]
+; AVX512-FCP-NEXT:    vpermi2d %zmm1, %zmm2, %zmm0
 ; AVX512-FCP-NEXT:    vextracti32x4 $2, %zmm0, 32(%rax)
 ; AVX512-FCP-NEXT:    vextracti32x4 $3, %zmm0, %xmm1
 ; AVX512-FCP-NEXT:    vmovq %xmm1, 48(%rax)
@@ -279,9 +278,8 @@ define void @store_i32_stride7_vf2(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
 ; AVX512DQ-FCP-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
 ; AVX512DQ-FCP-NEXT:    vpmovsxbq {{.*#+}} ymm1 = [0,2,4,0]
 ; AVX512DQ-FCP-NEXT:    vpermi2q %ymm3, %ymm0, %ymm1
-; AVX512DQ-FCP-NEXT:    vinserti64x4 $1, %ymm1, %zmm2, %zmm0
-; AVX512DQ-FCP-NEXT:    vpmovsxbd {{.*#+}} zmm1 = [0,2,4,6,8,10,12,1,3,5,7,9,11,13,0,0]
-; AVX512DQ-FCP-NEXT:    vpermd %zmm0, %zmm1, %zmm0
+; AVX512DQ-FCP-NEXT:    vpmovsxbd {{.*#+}} zmm0 = [0,2,4,6,16,18,20,1,3,5,7,17,19,21,0,0]
+; AVX512DQ-FCP-NEXT:    vpermi2d %zmm1, %zmm2, %zmm0
 ; AVX512DQ-FCP-NEXT:    vextracti32x4 $2, %zmm0, 32(%rax)
 ; AVX512DQ-FCP-NEXT:    vextracti32x4 $3, %zmm0, %xmm1
 ; AVX512DQ-FCP-NEXT:    vmovq %xmm1, 48(%rax)
@@ -331,9 +329,8 @@ define void @store_i32_stride7_vf2(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
 ; AVX512BW-FCP-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
 ; AVX512BW-FCP-NEXT:    vpmovsxbq {{.*#+}} ymm1 = [0,2,4,0]
 ; AVX512BW-FCP-NEXT:    vpermi2q %ymm3, %ymm0, %ymm1
-; AVX512BW-FCP-NEXT:    vinserti64x4 $1, %ymm1, %zmm2, %zmm0
-; AVX512BW-FCP-NEXT:    vpmovsxbd {{.*#+}} zmm1 = [0,2,4,6,8,10,12,1,3,5,7,9,11,13,0,0]
-; AVX512BW-FCP-NEXT:    vpermd %zmm0, %zmm1, %zmm0
+; AVX512BW-FCP-NEXT:    vpmovsxbd {{.*#+}} zmm0 = [0,2,4,6,16,18,20,1,3,5,7,17,19,21,0,0]
+; AVX512BW-FCP-NEXT:    vpermi2d %zmm1, %zmm2, %zmm0
 ; AVX512BW-FCP-NEXT:    vextracti32x4 $2, %zmm0, 32(%rax)
 ; AVX512BW-FCP-NEXT:    vextracti32x4 $3, %zmm0, %xmm1
 ; AVX512BW-FCP-NEXT:    vmovq %xmm1, 48(%rax)
@@ -383,9 +380,8 @@ define void @store_i32_stride7_vf2(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
 ; AVX512DQ-BW-FCP-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
 ; AVX512DQ-BW-FCP-NEXT:    vpmovsxbq {{.*#+}} ymm1 = [0,2,4,0]
 ; AVX512DQ-BW-FCP-NEXT:    vpermi2q %ymm3, %ymm0, %ymm1
-; AVX512DQ-BW-FCP-NEXT:    vinserti64x4 $1, %ymm1, %zmm2, %zmm0
-; AVX512DQ-BW-FCP-NEXT:    vpmovsxbd {{.*#+}} zmm1 = [0,2,4,6,8,10,12,1,3,5,7,9,11,13,0,0]
-; AVX512DQ-BW-FCP-NEXT:    vpermd %zmm0, %zmm1, %zmm0
+; AVX512DQ-BW-FCP-NEXT:    vpmovsxbd {{.*#+}} zmm0 = [0,2,4,6,16,18,20,1,3,5,7,17,19,21,0,0]
+; AVX512DQ-BW-FCP-NEXT:    vpermi2d %zmm1, %zmm2, %zmm0
 ; AVX512DQ-BW-FCP-NEXT:    vextracti32x4 $2, %zmm0, 32(%rax)
 ; AVX512DQ-BW-FCP-NEXT:    vextracti32x4 $3, %zmm0, %xmm1
 ; AVX512DQ-BW-FCP-NEXT:    vmovq %xmm1, 48(%rax)
diff --git a/llvm/test/CodeGen/X86/vector-interleaved-store-i32-stride-8.ll b/llvm/test/CodeGen/X86/vector-interleaved-store-i32-stride-8.ll
index 955927eb7691..265f6daeb200 100644
--- a/llvm/test/CodeGen/X86/vector-interleaved-store-i32-stride-8.ll
+++ b/llvm/test/CodeGen/X86/vector-interleaved-store-i32-stride-8.ll
@@ -160,24 +160,23 @@ define void @store_i32_stride8_vf2(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
 ; AVX512-NEXT:    movq {{[0-9]+}}(%rsp), %rax
 ; AVX512-NEXT:    movq {{[0-9]+}}(%rsp), %r10
 ; AVX512-NEXT:    movq {{[0-9]+}}(%rsp), %r11
-; AVX512-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
-; AVX512-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
-; AVX512-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
-; AVX512-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
-; AVX512-NEXT:    vmovsd {{.*#+}} xmm2 = mem[0],zero
-; AVX512-NEXT:    vmovlhps {{.*#+}} xmm1 = xmm2[0],xmm1[0]
-; AVX512-NEXT:    vmovsd {{.*#+}} xmm2 = mem[0],zero
-; AVX512-NEXT:    vmovsd {{.*#+}} xmm3 = mem[0],zero
-; AVX512-NEXT:    vmovlhps {{.*#+}} xmm2 = xmm3[0],xmm2[0]
-; AVX512-NEXT:    vmovsd {{.*#+}} xmm3 = mem[0],zero
-; AVX512-NEXT:    vmovsd {{.*#+}} xmm4 = mem[0],zero
-; AVX512-NEXT:    vmovlhps {{.*#+}} xmm3 = xmm4[0],xmm3[0]
-; AVX512-NEXT:    vinsertf128 $1, %xmm3, %ymm2, %ymm2
-; AVX512-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; AVX512-NEXT:    vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
-; AVX512-NEXT:    vmovaps {{.*#+}} zmm1 = [0,2,4,6,8,10,12,14,1,3,5,7,9,11,13,15]
-; AVX512-NEXT:    vpermps %zmm0, %zmm1, %zmm0
-; AVX512-NEXT:    vmovaps %zmm0, (%rax)
+; AVX512-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero
+; AVX512-NEXT:    vmovq {{.*#+}} xmm1 = mem[0],zero
+; AVX512-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; AVX512-NEXT:    vmovq {{.*#+}} xmm1 = mem[0],zero
+; AVX512-NEXT:    vmovq {{.*#+}} xmm2 = mem[0],zero
+; AVX512-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
+; AVX512-NEXT:    vmovq {{.*#+}} xmm2 = mem[0],zero
+; AVX512-NEXT:    vmovq {{.*#+}} xmm3 = mem[0],zero
+; AVX512-NEXT:    vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0]
+; AVX512-NEXT:    vmovq {{.*#+}} xmm3 = mem[0],zero
+; AVX512-NEXT:    vmovq {{.*#+}} xmm4 = mem[0],zero
+; AVX512-NEXT:    vpunpcklqdq {{.*#+}} xmm3 = xmm4[0],xmm3[0]
+; AVX512-NEXT:    vinserti128 $1, %xmm3, %ymm2, %ymm2
+; AVX512-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX512-NEXT:    vpmovsxbd {{.*#+}} zmm1 = [0,2,4,6,16,18,20,22,1,3,5,7,17,19,21,23]
+; AVX512-NEXT:    vpermi2d %zmm2, %zmm0, %zmm1
+; AVX512-NEXT:    vmovdqa64 %zmm1, (%rax)
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
 ;
@@ -186,24 +185,23 @@ define void @store_i32_stride8_vf2(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
 ; AVX512-FCP-NEXT:    movq {{[0-9]+}}(%rsp), %rax
 ; AVX512-FCP-NEXT:    movq {{[0-9]+}}(%rsp), %r10
 ; AVX512-FCP-NEXT:    movq {{[0-9]+}}(%rsp), %r11
-; AVX512-FCP-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
-; AVX512-FCP-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
-; AVX512-FCP-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
-; AVX512-FCP-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
-; AVX512-FCP-NEXT:    vmovsd {{.*#+}} xmm2 = mem[0],zero
-; AVX512-FCP-NEXT:    vmovlhps {{.*#+}} xmm1 = xmm2[0],xmm1[0]
-; AVX512-FCP-NEXT:    vmovsd {{.*#+}} xmm2 = mem[0],zero
-; AVX512-FCP-NEXT:    vmovsd {{.*#+}} xmm3 = mem[0],zero
-; AVX512-FCP-NEXT:    vmovlhps {{.*#+}} xmm2 = xmm3[0],xmm2[0]
-; AVX512-FCP-NEXT:    vmovsd {{.*#+}} xmm3 = mem[0],zero
-; AVX512-FCP-NEXT:    vmovsd {{.*#+}} xmm4 = mem[0],zero
-; AVX512-FCP-NEXT:    vmovlhps {{.*#+}} xmm3 = xmm4[0],xmm3[0]
-; AVX512-FCP-NEXT:    vinsertf128 $1, %xmm3, %ymm2, %ymm2
-; AVX512-FCP-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; AVX512-FCP-NEXT:    vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
-; AVX512-FCP-NEXT:    vmovaps {{.*#+}} zmm1 = [0,2,4,6,8,10,12,14,1,3,5,7,9,11,13,15]
-; AVX512-FCP-NEXT:    vpermps %zmm0, %zmm1, %zmm0
-; AVX512-FCP-NEXT:    vmovaps %zmm0, (%rax)
+; AVX512-FCP-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero
+; AVX512-FCP-NEXT:    vmovq {{.*#+}} xmm1 = mem[0],zero
+; AVX512-FCP-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; AVX512-FCP-NEXT:    vmovq {{.*#+}} xmm1 = mem[0],zero
+; AVX512-FCP-NEXT:    vmovq {{.*#+}} xmm2 = mem[0],zero
+; AVX512-FCP-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
+; AVX512-FCP-NEXT:    vmovq {{.*#+}} xmm2 = mem[0],zero
+; AVX512-FCP-NEXT:    vmovq {{.*#+}} xmm3 = mem[0],zero
+; AVX512-FCP-NEXT:    vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0]
+; AVX512-FCP-NEXT:    vmovq {{.*#+}} xmm3 = mem[0],zero
+; AVX512-FCP-NEXT:    vmovq {{.*#+}} xmm4 = mem[0],zero
+; AVX512-FCP-NEXT:    vpunpcklqdq {{.*#+}} xmm3 = xmm4[0],xmm3[0]
+; AVX512-FCP-NEXT:    vinserti128 $1, %xmm3, %ymm2, %ymm2
+; AVX512-FCP-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX512-FCP-NEXT:    vpmovsxbd {{.*#+}} zmm1 = [0,2,4,6,16,18,20,22,1,3,5,7,17,19,21,23]
+; AVX512-FCP-NEXT:    vpermi2d %zmm2, %zmm0, %zmm1
+; AVX512-FCP-NEXT:    vmovdqa64 %zmm1, (%rax)
 ; AVX512-FCP-NEXT:    vzeroupper
 ; AVX512-FCP-NEXT:    retq
 ;
@@ -212,24 +210,23 @@ define void @store_i32_stride8_vf2(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
 ; AVX512DQ-NEXT:    movq {{[0-9]+}}(%rsp), %rax
 ; AVX512DQ-NEXT:    movq {{[0-9]+}}(%rsp), %r10
 ; AVX512DQ-NEXT:    movq {{[0-9]+}}(%rsp), %r11
-; AVX512DQ-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
-; AVX512DQ-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
-; AVX512DQ-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
-; AVX512DQ-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
-; AVX512DQ-NEXT:    vmovsd {{.*#+}} xmm2 = mem[0],zero
-; AVX512DQ-NEXT:    vmovlhps {{.*#+}} xmm1 = xmm2[0],xmm1[0]
-; AVX512DQ-NEXT:    vmovsd {{.*#+}} xmm2 = mem[0],zero
-; AVX512DQ-NEXT:    vmovsd {{.*#+}} xmm3 = mem[0],zero
-; AVX512DQ-NEXT:    vmovlhps {{.*#+}} xmm2 = xmm3[0],xmm2[0]
-; AVX512DQ-NEXT:    vmovsd {{.*#+}} xmm3 = mem[0],zero
-; AVX512DQ-NEXT:    vmovsd {{.*#+}} xmm4 = mem[0],zero
-; AVX512DQ-NEXT:    vmovlhps {{.*#+}} xmm3 = xmm4[0],xmm3[0]
-; AVX512DQ-NEXT:    vinsertf128 $1, %xmm3, %ymm2, %ymm2
-; AVX512DQ-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; AVX512DQ-NEXT:    vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
-; AVX512DQ-NEXT:    vmovaps {{.*#+}} zmm1 = [0,2,4,6,8,10,12,14,1,3,5,7,9,11,13,15]
-; AVX512DQ-NEXT:    vpermps %zmm0, %zmm1, %zmm0
-; AVX512DQ-NEXT:    vmovaps %zmm0, (%rax)
+; AVX512DQ-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero
+; AVX512DQ-NEXT:    vmovq {{.*#+}} xmm1 = mem[0],zero
+; AVX512DQ-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; AVX512DQ-NEXT:    vmovq {{.*#+}} xmm1 = mem[0],zero
+; AVX512DQ-NEXT:    vmovq {{.*#+}} xmm2 = mem[0],zero
+; AVX512DQ-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
+; AVX512DQ-NEXT:    vmovq {{.*#+}} xmm2 = mem[0],zero
+; AVX512DQ-NEXT:    vmovq {{.*#+}} xmm3 = mem[0],zero
+; AVX512DQ-NEXT:    vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0]
+; AVX512DQ-NEXT:    vmovq {{.*#+}} xmm3 = mem[0],zero
+; AVX512DQ-NEXT:    vmovq {{.*#+}} xmm4 = mem[0],zero
+; AVX512DQ-NEXT:    vpunpcklqdq {{.*#+}} xmm3 = xmm4[0],xmm3[0]
+; AVX512DQ-NEXT:    vinserti128 $1, %xmm3, %ymm2, %ymm2
+; AVX512DQ-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX512DQ-NEXT:    vpmovsxbd {{.*#+}} zmm1 = [0,2,4,6,16,18,20,22,1,3,5,7,17,19,21,23]
+; AVX512DQ-NEXT:    vpermi2d %zmm2, %zmm0, %zmm1
+; AVX512DQ-NEXT:    vmovdqa64 %zmm1, (%rax)
 ; AVX512DQ-NEXT:    vzeroupper
 ; AVX512DQ-NEXT:    retq
 ;
@@ -238,24 +235,23 @@ define void @store_i32_stride8_vf2(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
 ; AVX512DQ-FCP-NEXT:    movq {{[0-9]+}}(%rsp), %rax
 ; AVX512DQ-FCP-NEXT:    movq {{[0-9]+}}(%rsp), %r10
 ; AVX512DQ-FCP-NEXT:    movq {{[0-9]+}}(%rsp), %r11
-; AVX512DQ-FCP-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
-; AVX512DQ-FCP-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
-; AVX512DQ-FCP-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
-; AVX512DQ-FCP-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
-; AVX512DQ-FCP-NEXT:    vmovsd {{.*#+}} xmm2 = mem[0],zero
-; AVX512DQ-FCP-NEXT:    vmovlhps {{.*#+}} xmm1 = xmm2[0],xmm1[0]
-; AVX512DQ-FCP-NEXT:    vmovsd {{.*#+}} xmm2 = mem[0],zero
-; AVX512DQ-FCP-NEXT:    vmovsd {{.*#+}} xmm3 = mem[0],zero
-; AVX512DQ-FCP-NEXT:    vmovlhps {{.*#+}} xmm2 = xmm3[0],xmm2[0]
-; AVX512DQ-FCP-NEXT:    vmovsd {{.*#+}} xmm3 = mem[0],zero
-; AVX512DQ-FCP-NEXT:    vmovsd {{.*#+}} xmm4 = mem[0],zero
-; AVX512DQ-FCP-NEXT:    vmovlhps {{.*#+}} xmm3 = xmm4[0],xmm3[0]
-; AVX512DQ-FCP-NEXT:    vinsertf128 $1, %xmm3, %ymm2, %ymm2
-; AVX512DQ-FCP-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; AVX512DQ-FCP-NEXT:    vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
-; AVX512DQ-FCP-NEXT:    vmovaps {{.*#+}} zmm1 = [0,2,4,6,8,10,12,14,1,3,5,7,9,11,13,15]
-; AVX512DQ-FCP-NEXT:    vpermps %zmm0, %zmm1, %zmm0
-; AVX512DQ-FCP-NEXT:    vmovaps %zmm0, (%rax)
+; AVX512DQ-FCP-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero
+; AVX512DQ-FCP-NEXT:    vmovq {{.*#+}} xmm1 = mem[0],zero
+; AVX512DQ-FCP-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; AVX512DQ-FCP-NEXT:    vmovq {{.*#+}} xmm1 = mem[0],zero
+; AVX512DQ-FCP-NEXT:    vmovq {{.*#+}} xmm2 = mem[0],zero
+; AVX512DQ-FCP-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
+; AVX512DQ-FCP-NEXT:    vmovq {{.*#+}} xmm2 = mem[0],zero
+; AVX512DQ-FCP-NEXT:    vmovq {{.*#+}} xmm3 = mem[0],zero
+; AVX512DQ-FCP-NEXT:    vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0]
+; AVX512DQ-FCP-NEXT:    vmovq {{.*#+}} xmm3 = mem[0],zero
+; AVX512DQ-FCP-NEXT:    vmovq {{.*#+}} xmm4 = mem[0],zero
+; AVX512DQ-FCP-NEXT:    vpunpcklqdq {{.*#+}} xmm3 = xmm4[0],xmm3[0]
+; AVX512DQ-FCP-NEXT:    vinserti128 $1, %xmm3, %ymm2, %ymm2
+; AVX512DQ-FCP-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX512DQ-FCP-NEXT:    vpmovsxbd {{.*#+}} zmm1 = [0,2,4,6,16,18,20,22,1,3,5,7,17,19,21,23]
+; AVX512DQ-FCP-NEXT:    vpermi2d %zmm2, %zmm0, %zmm1
+; AVX512DQ-FCP-NEXT:    vmovdqa64 %zmm1, (%rax)
 ; AVX512DQ-FCP-NEXT:    vzeroupper
 ; AVX512DQ-FCP-NEXT:    retq
 ;
@@ -264,24 +260,23 @@ define void @store_i32_stride8_vf2(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
 ; AVX512BW-NEXT:    movq {{[0-9]+}}(%rsp), %rax
 ; AVX512BW-NEXT:    movq {{[0-9]+}}(%rsp), %r10
 ; AVX512BW-NEXT:    movq {{[0-9]+}}(%rsp), %r11
-; AVX512BW-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
-; AVX512BW-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
-; AVX512BW-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
-; AVX512BW-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
-; AVX512BW-NEXT:    vmovsd {{.*#+}} xmm2 = mem[0],zero
-; AVX512BW-NEXT:    vmovlhps {{.*#+}} xmm1 = xmm2[0],xmm1[0]
-; AVX512BW-NEXT:    vmovsd {{.*#+}} xmm2 = mem[0],zero
-; AVX512BW-NEXT:    vmovsd {{.*#+}} xmm3 = mem[0],zero
-; AVX512BW-NEXT:    vmovlhps {{.*#+}} xmm2 = xmm3[0],xmm2[0]
-; AVX512BW-NEXT:    vmovsd {{.*#+}} xmm3 = mem[0],zero
-; AVX512BW-NEXT:    vmovsd {{.*#+}} xmm4 = mem[0],zero
-; AVX512BW-NEXT:    vmovlhps {{.*#+}} xmm3 = xmm4[0],xmm3[0]
-; AVX512BW-NEXT:    vinsertf128 $1, %xmm3, %ymm2, %ymm2
-; AVX512BW-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; AVX512BW-NEXT:    vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
-; AVX512BW-NEXT:    vmovaps {{.*#+}} zmm1 = [0,2,4,6,8,10,12,14,1,3,5,7,9,11,13,15]
-; AVX512BW-NEXT:    vpermps %zmm0, %zmm1, %zmm0
-; AVX512BW-NEXT:    vmovaps %zmm0, (%rax)
+; AVX512BW-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero
+; AVX512BW-NEXT:    vmovq {{.*#+}} xmm1 = mem[0],zero
+; AVX512BW-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; AVX512BW-NEXT:    vmovq {{.*#+}} xmm1 = mem[0],zero
+; AVX512BW-NEXT:    vmovq {{.*#+}} xmm2 = mem[0],zero
+; AVX512BW-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
+; AVX512BW-NEXT:    vmovq {{.*#+}} xmm2 = mem[0],zero
+; AVX512BW-NEXT:    vmovq {{.*#+}} xmm3 = mem[0],zero
+; AVX512BW-NEXT:    vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0]
+; AVX512BW-NEXT:    vmovq {{.*#+}} xmm3 = mem[0],zero
+; AVX512BW-NEXT:    vmovq {{.*#+}} xmm4 = mem[0],zero
+; AVX512BW-NEXT:    vpunpcklqdq {{.*#+}} xmm3 = xmm4[0],xmm3[0]
+; AVX512BW-NEXT:    vinserti128 $1, %xmm3, %ymm2, %ymm2
+; AVX512BW-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX512BW-NEXT:    vpmovsxbd {{.*#+}} zmm1 = [0,2,4,6,16,18,20,22,1,3,5,7,17,19,21,23]
+; AVX512BW-NEXT:    vpermi2d %zmm2, %zmm0, %zmm1
+; AVX512BW-NEXT:    vmovdqa64 %zmm1, (%rax)
 ; AVX512BW-NEXT:    vzeroupper
 ; AVX512BW-NEXT:    retq
 ;
@@ -290,24 +285,23 @@ define void @store_i32_stride8_vf2(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
 ; AVX512BW-FCP-NEXT:    movq {{[0-9]+}}(%rsp), %rax
 ; AVX512BW-FCP-NEXT:    movq {{[0-9]+}}(%rsp), %r10
 ; AVX512BW-FCP-NEXT:    movq {{[0-9]+}}(%rsp), %r11
-; AVX512BW-FCP-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
-; AVX512BW-FCP-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
-; AVX512BW-FCP-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
-; AVX512BW-FCP-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
-; AVX512BW-FCP-NEXT:    vmovsd {{.*#+}} xmm2 = mem[0],zero
-; AVX512BW-FCP-NEXT:    vmovlhps {{.*#+}} xmm1 = xmm2[0],xmm1[0]
-; AVX512BW-FCP-NEXT:    vmovsd {{.*#+}} xmm2 = mem[0],zero
-; AVX512BW-FCP-NEXT:    vmovsd {{.*#+}} xmm3 = mem[0],zero
-; AVX512BW-FCP-NEXT:    vmovlhps {{.*#+}} xmm2 = xmm3[0],xmm2[0]
-; AVX512BW-FCP-NEXT:    vmovsd {{.*#+}} xmm3 = mem[0],zero
-; AVX512BW-FCP-NEXT:    vmovsd {{.*#+}} xmm4 = mem[0],zero
-; AVX512BW-FCP-NEXT:    vmovlhps {{.*#+}} xmm3 = xmm4[0],xmm3[0]
-; AVX512BW-FCP-NEXT:    vinsertf128 $1, %xmm3, %ymm2, %ymm2
-; AVX512BW-FCP-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; AVX512BW-FCP-NEXT:    vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
-; AVX512BW-FCP-NEXT:    vmovaps {{.*#+}} zmm1 = [0,2,4,6,8,10,12,14,1,3,5,7,9,11,13,15]
-; AVX512BW-FCP-NEXT:    vpermps %zmm0, %zmm1, %zmm0
-; AVX512BW-FCP-NEXT:    vmovaps %zmm0, (%rax)
+; AVX512BW-FCP-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero
+; AVX512BW-FCP-NEXT:    vmovq {{.*#+}} xmm1 = mem[0],zero
+; AVX512BW-FCP-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; AVX512BW-FCP-NEXT:    vmovq {{.*#+}} xmm1 = mem[0],zero
+; AVX512BW-FCP-NEXT:    vmovq {{.*#+}} xmm2 = mem[0],zero
+; AVX512BW-FCP-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
+; AVX512BW-FCP-NEXT:    vmovq {{.*#+}} xmm2 = mem[0],zero
+; AVX512BW-FCP-NEXT:    vmovq {{.*#+}} xmm3 = mem[0],zero
+; AVX512BW-FCP-NEXT:    vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0]
+; AVX512BW-FCP-NEXT:    vmovq {{.*#+}} xmm3 = mem[0],zero
+; AVX512BW-FCP-NEXT:    vmovq {{.*#+}} xmm4 = mem[0],zero
+; AVX512BW-FCP-NEXT:    vpunpcklqdq {{.*#+}} xmm3 = xmm4[0],xmm3[0]
+; AVX512BW-FCP-NEXT:    vinserti128 $1, %xmm3, %ymm2, %ymm2
+; AVX512BW-FCP-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX512BW-FCP-NEXT:    vpmovsxbd {{.*#+}} zmm1 = [0,2,4,6,16,18,20,22,1,3,5,7,17,19,21,23]
+; AVX512BW-FCP-NEXT:    vpermi2d %zmm2, %zmm0, %zmm1
+; AVX512BW-FCP-NEXT:    vmovdqa64 %zmm1, (%rax)
 ; AVX512BW-FCP-NEXT:    vzeroupper
 ; AVX512BW-FCP-NEXT:    retq
 ;
@@ -316,24 +310,23 @@ define void @store_i32_stride8_vf2(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
 ; AVX512DQ-BW-NEXT:    movq {{[0-9]+}}(%rsp), %rax
 ; AVX512DQ-BW-NEXT:    movq {{[0-9]+}}(%rsp), %r10
 ; AVX512DQ-BW-NEXT:    movq {{[0-9]+}}(%rsp), %r11
-; AVX512DQ-BW-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
-; AVX512DQ-BW-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
-; AVX512DQ-BW-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
-; AVX512DQ-BW-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
-; AVX512DQ-BW-NEXT:    vmovsd {{.*#+}} xmm2 = mem[0],zero
-; AVX512DQ-BW-NEXT:    vmovlhps {{.*#+}} xmm1 = xmm2[0],xmm1[0]
-; AVX512DQ-BW-NEXT:    vmovsd {{.*#+}} xmm2 = mem[0],zero
-; AVX512DQ-BW-NEXT:    vmovsd {{.*#+}} xmm3 = mem[0],zero
-; AVX512DQ-BW-NEXT:    vmovlhps {{.*#+}} xmm2 = xmm3[0],xmm2[0]
-; AVX512DQ-BW-NEXT:    vmovsd {{.*#+}} xmm3 = mem[0],zero
-; AVX512DQ-BW-NEXT:    vmovsd {{.*#+}} xmm4 = mem[0],zero
-; AVX512DQ-BW-NEXT:    vmovlhps {{.*#+}} xmm3 = xmm4[0],xmm3[0]
-; AVX512DQ-BW-NEXT:    vinsertf128 $1, %xmm3, %ymm2, %ymm2
-; AVX512DQ-BW-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; AVX512DQ-BW-NEXT:    vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
-; AVX512DQ-BW-NEXT:    vmovaps {{.*#+}} zmm1 = [0,2,4,6,8,10,12,14,1,3,5,7,9,11,13,15]
-; AVX512DQ-BW-NEXT:    vpermps %zmm0, %zmm1, %zmm0
-; AVX512DQ-BW-NEXT:    vmovaps %zmm0, (%rax)
+; AVX512DQ-BW-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero
+; AVX512DQ-BW-NEXT:    vmovq {{.*#+}} xmm1 = mem[0],zero
+; AVX512DQ-BW-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; AVX512DQ-BW-NEXT:    vmovq {{.*#+}} xmm1 = mem[0],zero
+; AVX512DQ-BW-NEXT:    vmovq {{.*#+}} xmm2 = mem[0],zero
+; AVX512DQ-BW-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
+; AVX512DQ-BW-NEXT:    vmovq {{.*#+}} xmm2 = mem[0],zero
+; AVX512DQ-BW-NEXT:    vmovq {{.*#+}} xmm3 = mem[0],zero
+; AVX512DQ-BW-NEXT:    vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0]
+; AVX512DQ-BW-NEXT:    vmovq {{.*#+}} xmm3 = mem[0],zero
+; AVX512DQ-BW-NEXT:    vmovq {{.*#+}} xmm4 = mem[0],zero
+; AVX512DQ-BW-NEXT:    vpunpcklqdq {{.*#+}} xmm3 = xmm4[0],xmm3[0]
+; AVX512DQ-BW-NEXT:    vinserti128 $1, %xmm3, %ymm2, %ymm2
+; AVX512DQ-BW-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX512DQ-BW-NEXT:    vpmovsxbd {{.*#+}} zmm1 = [0,2,4,6,16,18,20,22,1,3,5,7,17,19,21,23]
+; AVX512DQ-BW-NEXT:    vpermi2d %zmm2, %zmm0, %zmm1
+; AVX512DQ-BW-NEXT:    vmovdqa64 %zmm1, (%rax)
 ; AVX512DQ-BW-NEXT:    vzeroupper
 ; AVX512DQ-BW-NEXT:    retq
 ;
@@ -342,24 +335,23 @@ define void @store_i32_stride8_vf2(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
 ; AVX512DQ-BW-FCP-NEXT:    movq {{[0-9]+}}(%rsp), %rax
 ; AVX512DQ-BW-FCP-NEXT:    movq {{[0-9]+}}(%rsp), %r10
 ; AVX512DQ-BW-FCP-NEXT:    movq {{[0-9]+}}(%rsp), %r11
-; AVX512DQ-BW-FCP-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
-; AVX512DQ-BW-FCP-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
-; AVX512DQ-BW-FCP-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
-; AVX512DQ-BW-FCP-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
-; AVX512DQ-BW-FCP-NEXT:    vmovsd {{.*#+}} xmm2 = mem[0],zero
-; AVX512DQ-BW-FCP-NEXT:    vmovlhps {{.*#+}} xmm1 = xmm2[0],xmm1[0]
-; AVX512DQ-BW-FCP-NEXT:    vmovsd {{.*#+}} xmm2 = mem[0],zero
-; AVX512DQ-BW-FCP-NEXT:    vmovsd {{.*#+}} xmm3 = mem[0],zero
-; AVX512DQ-BW-FCP-NEXT:    vmovlhps {{.*#+}} xmm2 = xmm3[0],xmm2[0]
-; AVX512DQ-BW-FCP-NEXT:    vmovsd {{.*#+}} xmm3 = mem[0],zero
-; AVX512DQ-BW-FCP-NEXT:    vmovsd {{.*#+}} xmm4 = mem[0],zero
-; AVX512DQ-BW-FCP-NEXT:    vmovlhps {{.*#+}} xmm3 = xmm4[0],xmm3[0]
-; AVX512DQ-BW-FCP-NEXT:    vinsertf128 $1, %xmm3, %ymm2, %ymm2
-; AVX512DQ-BW-FCP-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; AVX512DQ-BW-FCP-NEXT:    vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
-; AVX512DQ-BW-FCP-NEXT:    vmovaps {{.*#+}} zmm1 = [0,2,4,6,8,10,12,14,1,3,5,7,9,11,13,15]
-; AVX512DQ-BW-FCP-NEXT:    vpermps %zmm0, %zmm1, %zmm0
-; AVX512DQ-BW-FCP-NEXT:    vmovaps %zmm0, (%rax)
+; AVX512DQ-BW-FCP-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero
+; AVX512DQ-BW-FCP-NEXT:    vmovq {{.*#+}} xmm1 = mem[0],zero
+; AVX512DQ-BW-FCP-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; AVX512DQ-BW-FCP-NEXT:    vmovq {{.*#+}} xmm1 = mem[0],zero
+; AVX512DQ-BW-FCP-NEXT:    vmovq {{.*#+}} xmm2 = mem[0],zero
+; AVX512DQ-BW-FCP-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
+; AVX512DQ-BW-FCP-NEXT:    vmovq {{.*#+}} xmm2 = mem[0],zero
+; AVX512DQ-BW-FCP-NEXT:    vmovq {{.*#+}} xmm3 = mem[0],zero
+; AVX512DQ-BW-FCP-NEXT:    vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0]
+; AVX512DQ-BW-FCP-NEXT:    vmovq {{.*#+}} xmm3 = mem[0],zero
+; AVX512DQ-BW-FCP-NEXT:    vmovq {{.*#+}} xmm4 = mem[0],zero
+; AVX512DQ-BW-FCP-NEXT:    vpunpcklqdq {{.*#+}} xmm3 = xmm4[0],xmm3[0]
+; AVX512DQ-BW-FCP-NEXT:    vinserti128 $1, %xmm3, %ymm2, %ymm2
+; AVX512DQ-BW-FCP-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX512DQ-BW-FCP-NEXT:    vpmovsxbd {{.*#+}} zmm1 = [0,2,4,6,16,18,20,22,1,3,5,7,17,19,21,23]
+; AVX512DQ-BW-FCP-NEXT:    vpermi2d %zmm2, %zmm0, %zmm1
+; AVX512DQ-BW-FCP-NEXT:    vmovdqa64 %zmm1, (%rax)
 ; AVX512DQ-BW-FCP-NEXT:    vzeroupper
 ; AVX512DQ-BW-FCP-NEXT:    retq
   %in.vec0 = load <2 x i32>, ptr %in.vecptr0, align 64
diff --git a/llvm/test/CodeGen/X86/vector-interleaved-store-i64-stride-4.ll b/llvm/test/CodeGen/X86/vector-interleaved-store-i64-stride-4.ll
index 38623c6ce0cb..ded7c002c873 100644
--- a/llvm/test/CodeGen/X86/vector-interleaved-store-i64-stride-4.ll
+++ b/llvm/test/CodeGen/X86/vector-interleaved-store-i64-stride-4.ll
@@ -94,105 +94,97 @@ define void @store_i64_stride4_vf2(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
 ;
 ; AVX512-LABEL: store_i64_stride4_vf2:
 ; AVX512:       # %bb.0:
-; AVX512-NEXT:    vmovaps (%rdi), %xmm0
-; AVX512-NEXT:    vmovaps (%rdx), %xmm1
-; AVX512-NEXT:    vinsertf128 $1, (%rcx), %ymm1, %ymm1
-; AVX512-NEXT:    vinsertf128 $1, (%rsi), %ymm0, %ymm0
-; AVX512-NEXT:    vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512-NEXT:    vmovaps {{.*#+}} zmm1 = [0,2,4,6,1,3,5,7]
-; AVX512-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
-; AVX512-NEXT:    vmovaps %zmm0, (%r8)
+; AVX512-NEXT:    vmovdqa (%rdi), %xmm0
+; AVX512-NEXT:    vmovdqa (%rdx), %xmm1
+; AVX512-NEXT:    vinserti128 $1, (%rcx), %ymm1, %ymm1
+; AVX512-NEXT:    vinserti128 $1, (%rsi), %ymm0, %ymm0
+; AVX512-NEXT:    vpmovsxbq {{.*#+}} zmm2 = [0,2,8,10,1,3,9,11]
+; AVX512-NEXT:    vpermi2q %zmm1, %zmm0, %zmm2
+; AVX512-NEXT:    vmovdqa64 %zmm2, (%r8)
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
 ;
 ; AVX512-FCP-LABEL: store_i64_stride4_vf2:
 ; AVX512-FCP:       # %bb.0:
-; AVX512-FCP-NEXT:    vmovaps (%rdi), %xmm0
-; AVX512-FCP-NEXT:    vmovaps (%rdx), %xmm1
-; AVX512-FCP-NEXT:    vinsertf128 $1, (%rcx), %ymm1, %ymm1
-; AVX512-FCP-NEXT:    vinsertf128 $1, (%rsi), %ymm0, %ymm0
-; AVX512-FCP-NEXT:    vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512-FCP-NEXT:    vmovaps {{.*#+}} zmm1 = [0,2,4,6,1,3,5,7]
-; AVX512-FCP-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
-; AVX512-FCP-NEXT:    vmovaps %zmm0, (%r8)
+; AVX512-FCP-NEXT:    vmovdqa (%rdi), %xmm0
+; AVX512-FCP-NEXT:    vmovdqa (%rdx), %xmm1
+; AVX512-FCP-NEXT:    vinserti128 $1, (%rcx), %ymm1, %ymm1
+; AVX512-FCP-NEXT:    vinserti128 $1, (%rsi), %ymm0, %ymm0
+; AVX512-FCP-NEXT:    vpmovsxbq {{.*#+}} zmm2 = [0,2,8,10,1,3,9,11]
+; AVX512-FCP-NEXT:    vpermi2q %zmm1, %zmm0, %zmm2
+; AVX512-FCP-NEXT:    vmovdqa64 %zmm2, (%r8)
 ; AVX512-FCP-NEXT:    vzeroupper
 ; AVX512-FCP-NEXT:    retq
 ;
 ; AVX512DQ-LABEL: store_i64_stride4_vf2:
 ; AVX512DQ:       # %bb.0:
-; AVX512DQ-NEXT:    vmovaps (%rdi), %xmm0
-; AVX512DQ-NEXT:    vmovaps (%rdx), %xmm1
-; AVX512DQ-NEXT:    vinsertf128 $1, (%rcx), %ymm1, %ymm1
-; AVX512DQ-NEXT:    vinsertf128 $1, (%rsi), %ymm0, %ymm0
-; AVX512DQ-NEXT:    vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512DQ-NEXT:    vmovaps {{.*#+}} zmm1 = [0,2,4,6,1,3,5,7]
-; AVX512DQ-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
-; AVX512DQ-NEXT:    vmovaps %zmm0, (%r8)
+; AVX512DQ-NEXT:    vmovdqa (%rdi), %xmm0
+; AVX512DQ-NEXT:    vmovdqa (%rdx), %xmm1
+; AVX512DQ-NEXT:    vinserti128 $1, (%rcx), %ymm1, %ymm1
+; AVX512DQ-NEXT:    vinserti128 $1, (%rsi), %ymm0, %ymm0
+; AVX512DQ-NEXT:    vpmovsxbq {{.*#+}} zmm2 = [0,2,8,10,1,3,9,11]
+; AVX512DQ-NEXT:    vpermi2q %zmm1, %zmm0, %zmm2
+; AVX512DQ-NEXT:    vmovdqa64 %zmm2, (%r8)
 ; AVX512DQ-NEXT:    vzeroupper
 ; AVX512DQ-NEXT:    retq
 ;
 ; AVX512DQ-FCP-LABEL: store_i64_stride4_vf2:
 ; AVX512DQ-FCP:       # %bb.0:
-; AVX512DQ-FCP-NEXT:    vmovaps (%rdi), %xmm0
-; AVX512DQ-FCP-NEXT:    vmovaps (%rdx), %xmm1
-; AVX512DQ-FCP-NEXT:    vinsertf128 $1, (%rcx), %ymm1, %ymm1
-; AVX512DQ-FCP-NEXT:    vinsertf128 $1, (%rsi), %ymm0, %ymm0
-; AVX512DQ-FCP-NEXT:    vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512DQ-FCP-NEXT:    vmovaps {{.*#+}} zmm1 = [0,2,4,6,1,3,5,7]
-; AVX512DQ-FCP-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
-; AVX512DQ-FCP-NEXT:    vmovaps %zmm0, (%r8)
+; AVX512DQ-FCP-NEXT:    vmovdqa (%rdi), %xmm0
+; AVX512DQ-FCP-NEXT:    vmovdqa (%rdx), %xmm1
+; AVX512DQ-FCP-NEXT:    vinserti128 $1, (%rcx), %ymm1, %ymm1
+; AVX512DQ-FCP-NEXT:    vinserti128 $1, (%rsi), %ymm0, %ymm0
+; AVX512DQ-FCP-NEXT:    vpmovsxbq {{.*#+}} zmm2 = [0,2,8,10,1,3,9,11]
+; AVX512DQ-FCP-NEXT:    vpermi2q %zmm1, %zmm0, %zmm2
+; AVX512DQ-FCP-NEXT:    vmovdqa64 %zmm2, (%r8)
 ; AVX512DQ-FCP-NEXT:    vzeroupper
 ; AVX512DQ-FCP-NEXT:    retq
 ;
 ; AVX512BW-LABEL: store_i64_stride4_vf2:
 ; AVX512BW:       # %bb.0:
-; AVX512BW-NEXT:    vmovaps (%rdi), %xmm0
-; AVX512BW-NEXT:    vmovaps (%rdx), %xmm1
-; AVX512BW-NEXT:    vinsertf128 $1, (%rcx), %ymm1, %ymm1
-; AVX512BW-NEXT:    vinsertf128 $1, (%rsi), %ymm0, %ymm0
-; AVX512BW-NEXT:    vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512BW-NEXT:    vmovaps {{.*#+}} zmm1 = [0,2,4,6,1,3,5,7]
-; AVX512BW-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
-; AVX512BW-NEXT:    vmovaps %zmm0, (%r8)
+; AVX512BW-NEXT:    vmovdqa (%rdi), %xmm0
+; AVX512BW-NEXT:    vmovdqa (%rdx), %xmm1
+; AVX512BW-NEXT:    vinserti128 $1, (%rcx), %ymm1, %ymm1
+; AVX512BW-NEXT:    vinserti128 $1, (%rsi), %ymm0, %ymm0
+; AVX512BW-NEXT:    vpmovsxbq {{.*#+}} zmm2 = [0,2,8,10,1,3,9,11]
+; AVX512BW-NEXT:    vpermi2q %zmm1, %zmm0, %zmm2
+; AVX512BW-NEXT:    vmovdqa64 %zmm2, (%r8)
 ; AVX512BW-NEXT:    vzeroupper
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512BW-FCP-LABEL: store_i64_stride4_vf2:
 ; AVX512BW-FCP:       # %bb.0:
-; AVX512BW-FCP-NEXT:    vmovaps (%rdi), %xmm0
-; AVX512BW-FCP-NEXT:    vmovaps (%rdx), %xmm1
-; AVX512BW-FCP-NEXT:    vinsertf128 $1, (%rcx), %ymm1, %ymm1
-; AVX512BW-FCP-NEXT:    vinsertf128 $1, (%rsi), %ymm0, %ymm0
-; AVX512BW-FCP-NEXT:    vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512BW-FCP-NEXT:    vmovaps {{.*#+}} zmm1 = [0,2,4,6,1,3,5,7]
-; AVX512BW-FCP-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
-; AVX512BW-FCP-NEXT:    vmovaps %zmm0, (%r8)
+; AVX512BW-FCP-NEXT:    vmovdqa (%rdi), %xmm0
+; AVX512BW-FCP-NEXT:    vmovdqa (%rdx), %xmm1
+; AVX512BW-FCP-NEXT:    vinserti128 $1, (%rcx), %ymm1, %ymm1
+; AVX512BW-FCP-NEXT:    vinserti128 $1, (%rsi), %ymm0, %ymm0
+; AVX512BW-FCP-NEXT:    vpmovsxbq {{.*#+}} zmm2 = [0,2,8,10,1,3,9,11]
+; AVX512BW-FCP-NEXT:    vpermi2q %zmm1, %zmm0, %zmm2
+; AVX512BW-FCP-NEXT:    vmovdqa64 %zmm2, (%r8)
 ; AVX512BW-FCP-NEXT:    vzeroupper
 ; AVX512BW-FCP-NEXT:    retq
 ;
 ; AVX512DQ-BW-LABEL: store_i64_stride4_vf2:
 ; AVX512DQ-BW:       # %bb.0:
-; AVX512DQ-BW-NEXT:    vmovaps (%rdi), %xmm0
-; AVX512DQ-BW-NEXT:    vmovaps (%rdx), %xmm1
-; AVX512DQ-BW-NEXT:    vinsertf128 $1, (%rcx), %ymm1, %ymm1
-; AVX512DQ-BW-NEXT:    vinsertf128 $1, (%rsi), %ymm0, %ymm0
-; AVX512DQ-BW-NEXT:    vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512DQ-BW-NEXT:    vmovaps {{.*#+}} zmm1 = [0,2,4,6,1,3,5,7]
-; AVX512DQ-BW-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
-; AVX512DQ-BW-NEXT:    vmovaps %zmm0, (%r8)
+; AVX512DQ-BW-NEXT:    vmovdqa (%rdi), %xmm0
+; AVX512DQ-BW-NEXT:    vmovdqa (%rdx), %xmm1
+; AVX512DQ-BW-NEXT:    vinserti128 $1, (%rcx), %ymm1, %ymm1
+; AVX512DQ-BW-NEXT:    vinserti128 $1, (%rsi), %ymm0, %ymm0
+; AVX512DQ-BW-NEXT:    vpmovsxbq {{.*#+}} zmm2 = [0,2,8,10,1,3,9,11]
+; AVX512DQ-BW-NEXT:    vpermi2q %zmm1, %zmm0, %zmm2
+; AVX512DQ-BW-NEXT:    vmovdqa64 %zmm2, (%r8)
 ; AVX512DQ-BW-NEXT:    vzeroupper
 ; AVX512DQ-BW-NEXT:    retq
 ;
 ; AVX512DQ-BW-FCP-LABEL: store_i64_stride4_vf2:
 ; AVX512DQ-BW-FCP:       # %bb.0:
-; AVX512DQ-BW-FCP-NEXT:    vmovaps (%rdi), %xmm0
-; AVX512DQ-BW-FCP-NEXT:    vmovaps (%rdx), %xmm1
-; AVX512DQ-BW-FCP-NEXT:    vinsertf128 $1, (%rcx), %ymm1, %ymm1
-; AVX512DQ-BW-FCP-NEXT:    vinsertf128 $1, (%rsi), %ymm0, %ymm0
-; AVX512DQ-BW-FCP-NEXT:    vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512DQ-BW-FCP-NEXT:    vmovaps {{.*#+}} zmm1 = [0,2,4,6,1,3,5,7]
-; AVX512DQ-BW-FCP-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
-; AVX512DQ-BW-FCP-NEXT:    vmovaps %zmm0, (%r8)
+; AVX512DQ-BW-FCP-NEXT:    vmovdqa (%rdi), %xmm0
+; AVX512DQ-BW-FCP-NEXT:    vmovdqa (%rdx), %xmm1
+; AVX512DQ-BW-FCP-NEXT:    vinserti128 $1, (%rcx), %ymm1, %ymm1
+; AVX512DQ-BW-FCP-NEXT:    vinserti128 $1, (%rsi), %ymm0, %ymm0
+; AVX512DQ-BW-FCP-NEXT:    vpmovsxbq {{.*#+}} zmm2 = [0,2,8,10,1,3,9,11]
+; AVX512DQ-BW-FCP-NEXT:    vpermi2q %zmm1, %zmm0, %zmm2
+; AVX512DQ-BW-FCP-NEXT:    vmovdqa64 %zmm2, (%r8)
 ; AVX512DQ-BW-FCP-NEXT:    vzeroupper
 ; AVX512DQ-BW-FCP-NEXT:    retq
   %in.vec0 = load <2 x i64>, ptr %in.vecptr0, align 64
-- 
cgit v1.2.3


From f1ca2a09671e4d4acc2bea362b39268ed7883b6d Mon Sep 17 00:00:00 2001
From: Fangrui Song <i@maskray.me>
Date: Tue, 12 Mar 2024 10:56:14 -0700
Subject: [ELF] Add --compress-section to compress matched non-SHF_ALLOC
 sections

--compress-sections <section-glib>=[none|zlib|zstd] is similar to
--compress-debug-sections but applies to broader sections without the
SHF_ALLOC flag. lld will report an error if a SHF_ALLOC section is
matched. An interesting use case is to compress `.strtab`/`.symtab`,
which consume a significant portion of the file size (15.1% for a
release build of Clang).

An older revision is available at https://reviews.llvm.org/D154641 .
This patch focuses on non-allocated sections for safety. Moving
`maybeCompress` as D154641 does not handle STT_SECTION symbols for
`-r --compress-debug-sections=zlib` (see `relocatable-section-symbol.s`
from #66804).

Since different output sections may use different compression
algorithms, we need CompressedData::type to generalize
config->compressDebugSections.

GNU ld feature request: https://sourceware.org/bugzilla/show_bug.cgi?id=27452

Link: https://discourse.llvm.org/t/rfc-compress-arbitrary-sections-with-ld-lld-compress-sections/71674

Pull Request: https://github.com/llvm/llvm-project/pull/84855
---
 lld/ELF/Config.h                              |  4 +-
 lld/ELF/Driver.cpp                            | 24 ++++++-
 lld/ELF/Options.td                            |  4 ++
 lld/ELF/OutputSections.cpp                    | 43 +++++++++----
 lld/ELF/OutputSections.h                      |  8 ++-
 lld/docs/ReleaseNotes.rst                     |  4 ++
 lld/docs/ld.lld.1                             |  4 ++
 lld/test/ELF/compress-sections-err.s          |  3 +
 lld/test/ELF/compress-sections-special.s      | 31 +++++++++
 lld/test/ELF/compress-sections.s              | 91 +++++++++++++++++++++++++++
 lld/test/ELF/linkerscript/compress-sections.s | 62 ++++++++++++++++++
 11 files changed, 259 insertions(+), 19 deletions(-)
 create mode 100644 lld/test/ELF/compress-sections-special.s
 create mode 100644 lld/test/ELF/compress-sections.s
 create mode 100644 lld/test/ELF/linkerscript/compress-sections.s

diff --git a/lld/ELF/Config.h b/lld/ELF/Config.h
index 691ebfc07432..9ae01eb90fa4 100644
--- a/lld/ELF/Config.h
+++ b/lld/ELF/Config.h
@@ -222,7 +222,9 @@ struct Config {
   CGProfileSortKind callGraphProfileSort;
   bool checkSections;
   bool checkDynamicRelocs;
-  llvm::DebugCompressionType compressDebugSections;
+  std::optional<llvm::DebugCompressionType> compressDebugSections;
+  llvm::SmallVector<std::pair<llvm::GlobPattern, llvm::DebugCompressionType>, 0>
+      compressSections;
   bool cref;
   llvm::SmallVector<std::pair<llvm::GlobPattern, uint64_t>, 0>
       deadRelocInNonAlloc;
diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp
index de4b2e345ac9..2439d141fb66 100644
--- a/lld/ELF/Driver.cpp
+++ b/lld/ELF/Driver.cpp
@@ -1224,9 +1224,10 @@ static void readConfigs(opt::InputArgList &args) {
   config->checkSections =
       args.hasFlag(OPT_check_sections, OPT_no_check_sections, true);
   config->chroot = args.getLastArgValue(OPT_chroot);
-  config->compressDebugSections = getCompressionType(
-      args.getLastArgValue(OPT_compress_debug_sections, "none"),
-      "--compress-debug-sections");
+  if (auto *arg = args.getLastArg(OPT_compress_debug_sections)) {
+    config->compressDebugSections =
+        getCompressionType(arg->getValue(), "--compress-debug-sections");
+  }
   config->cref = args.hasArg(OPT_cref);
   config->optimizeBBJumps =
       args.hasFlag(OPT_optimize_bb_jumps, OPT_no_optimize_bb_jumps, false);
@@ -1516,6 +1517,23 @@ static void readConfigs(opt::InputArgList &args) {
     }
   }
 
+  for (opt::Arg *arg : args.filtered(OPT_compress_sections)) {
+    SmallVector<StringRef, 0> fields;
+    StringRef(arg->getValue()).split(fields, '=');
+    if (fields.size() != 2 || fields[1].empty()) {
+      error(arg->getSpelling() +
+            ": parse error, not 'section-glob=[none|zlib|zstd]'");
+      continue;
+    }
+    auto type = getCompressionType(fields[1], arg->getSpelling());
+    if (Expected<GlobPattern> pat = GlobPattern::create(fields[0])) {
+      config->compressSections.emplace_back(std::move(*pat), type);
+    } else {
+      error(arg->getSpelling() + ": " + toString(pat.takeError()));
+      continue;
+    }
+  }
+
   for (opt::Arg *arg : args.filtered(OPT_z)) {
     std::pair<StringRef, StringRef> option =
         StringRef(arg->getValue()).split('=');
diff --git a/lld/ELF/Options.td b/lld/ELF/Options.td
index c10a73e2d9c3..3819b86238ea 100644
--- a/lld/ELF/Options.td
+++ b/lld/ELF/Options.td
@@ -67,6 +67,10 @@ defm compress_debug_sections:
   Eq<"compress-debug-sections", "Compress DWARF debug sections">,
   MetaVarName<"[none,zlib,zstd]">;
 
+defm compress_sections: EEq<"compress-sections",
+  "Compress non-SHF_ALLOC output sections matching <section-glob>">,
+  MetaVarName<"<section-glob>=[none|zlib|zstd]">;
+
 defm defsym: Eq<"defsym", "Define a symbol alias">, MetaVarName<"<symbol>=<value>">;
 
 defm optimize_bb_jumps: BB<"optimize-bb-jumps",
diff --git a/lld/ELF/OutputSections.cpp b/lld/ELF/OutputSections.cpp
index ee9374186787..55e6a14f103e 100644
--- a/lld/ELF/OutputSections.cpp
+++ b/lld/ELF/OutputSections.cpp
@@ -326,17 +326,30 @@ static SmallVector<uint8_t, 0> deflateShard(ArrayRef<uint8_t> in, int level,
 }
 #endif
 
-// Compress section contents if this section contains debug info.
+// Compress certain non-SHF_ALLOC sections:
+//
+// * (if --compress-debug-sections is specified) non-empty .debug_* sections
+// * (if --compress-sections is specified) matched sections
 template <class ELFT> void OutputSection::maybeCompress() {
   using Elf_Chdr = typename ELFT::Chdr;
   (void)sizeof(Elf_Chdr);
 
-  // Compress only DWARF debug sections.
-  if (config->compressDebugSections == DebugCompressionType::None ||
-      (flags & SHF_ALLOC) || !name.starts_with(".debug_") || size == 0)
+  DebugCompressionType ctype = DebugCompressionType::None;
+  for (auto &[glob, t] : config->compressSections)
+    if (glob.match(name))
+      ctype = t;
+  if (!(flags & SHF_ALLOC) && config->compressDebugSections &&
+      name.starts_with(".debug_") && size)
+    ctype = *config->compressDebugSections;
+  if (ctype == DebugCompressionType::None)
+    return;
+  if (flags & SHF_ALLOC) {
+    errorOrWarn("--compress-sections: section '" + name +
+                "' with the SHF_ALLOC flag cannot be compressed");
     return;
+  }
 
-  llvm::TimeTraceScope timeScope("Compress debug sections");
+  llvm::TimeTraceScope timeScope("Compress sections");
   compressed.uncompressedSize = size;
   auto buf = std::make_unique<uint8_t[]>(size);
   // Write uncompressed data to a temporary zero-initialized buffer.
@@ -344,14 +357,21 @@ template <class ELFT> void OutputSection::maybeCompress() {
     parallel::TaskGroup tg;
     writeTo<ELFT>(buf.get(), tg);
   }
+  // The generic ABI specifies "The sh_size and sh_addralign fields of the
+  // section header for a compressed section reflect the requirements of the
+  // compressed section." However, 1-byte alignment has been wildly accepted
+  // and utilized for a long time. Removing alignment padding is particularly
+  // useful when there are many compressed output sections.
+  addralign = 1;
 
 #if LLVM_ENABLE_ZSTD
   // Use ZSTD's streaming compression API which permits parallel workers working
   // on the stream. See http://facebook.github.io/zstd/zstd_manual.html
   // "Streaming compression - HowTo".
-  if (config->compressDebugSections == DebugCompressionType::Zstd) {
+  if (ctype == DebugCompressionType::Zstd) {
     // Allocate a buffer of half of the input size, and grow it by 1.5x if
     // insufficient.
+    compressed.type = ELFCOMPRESS_ZSTD;
     compressed.shards = std::make_unique<SmallVector<uint8_t, 0>[]>(1);
     SmallVector<uint8_t, 0> &out = compressed.shards[0];
     out.resize_for_overwrite(std::max<size_t>(size / 2, 32));
@@ -424,6 +444,7 @@ template <class ELFT> void OutputSection::maybeCompress() {
   }
   size += 4; // checksum
 
+  compressed.type = ELFCOMPRESS_ZLIB;
   compressed.shards = std::move(shardsOut);
   compressed.numShards = numShards;
   compressed.checksum = checksum;
@@ -450,20 +471,18 @@ void OutputSection::writeTo(uint8_t *buf, parallel::TaskGroup &tg) {
   if (type == SHT_NOBITS)
     return;
 
-  // If --compress-debug-section is specified and if this is a debug section,
-  // we've already compressed section contents. If that's the case,
-  // just write it down.
+  // If the section is compressed due to
+  // --compress-debug-section/--compress-sections, the content is already known.
   if (compressed.shards) {
     auto *chdr = reinterpret_cast<typename ELFT::Chdr *>(buf);
+    chdr->ch_type = compressed.type;
     chdr->ch_size = compressed.uncompressedSize;
     chdr->ch_addralign = addralign;
     buf += sizeof(*chdr);
-    if (config->compressDebugSections == DebugCompressionType::Zstd) {
-      chdr->ch_type = ELFCOMPRESS_ZSTD;
+    if (compressed.type == ELFCOMPRESS_ZSTD) {
       memcpy(buf, compressed.shards[0].data(), compressed.shards[0].size());
       return;
     }
-    chdr->ch_type = ELFCOMPRESS_ZLIB;
 
     // Compute shard offsets.
     auto offsets = std::make_unique<size_t[]>(compressed.numShards);
diff --git a/lld/ELF/OutputSections.h b/lld/ELF/OutputSections.h
index c7931471a6ed..421a0181feb5 100644
--- a/lld/ELF/OutputSections.h
+++ b/lld/ELF/OutputSections.h
@@ -23,6 +23,7 @@ struct PhdrEntry;
 
 struct CompressedData {
   std::unique_ptr<SmallVector<uint8_t, 0>[]> shards;
+  uint32_t type = 0;
   uint32_t numShards = 0;
   uint32_t checksum = 0;
   uint64_t uncompressedSize;
@@ -116,12 +117,13 @@ public:
   void sortInitFini();
   void sortCtorsDtors();
 
+  // Used for implementation of --compress-debug-sections and
+  // --compress-sections.
+  CompressedData compressed;
+
 private:
   SmallVector<InputSection *, 0> storage;
 
-  // Used for implementation of --compress-debug-sections option.
-  CompressedData compressed;
-
   std::array<uint8_t, 4> getFiller();
 };
 
diff --git a/lld/docs/ReleaseNotes.rst b/lld/docs/ReleaseNotes.rst
index 6f60efd87c97..97ed06048910 100644
--- a/lld/docs/ReleaseNotes.rst
+++ b/lld/docs/ReleaseNotes.rst
@@ -26,6 +26,10 @@ Non-comprehensive list of changes in this release
 ELF Improvements
 ----------------
 
+* ``--compress-sections <section-glib>=[none|zlib|zstd]`` is added to compress
+  matched output sections without the ``SHF_ALLOC`` flag.
+  (`#84855 <https://github.com/llvm/llvm-project/pull/84855>`_)
+
 Breaking changes
 ----------------
 
diff --git a/lld/docs/ld.lld.1 b/lld/docs/ld.lld.1
index e4d39e47f5c5..e759776c8d55 100644
--- a/lld/docs/ld.lld.1
+++ b/lld/docs/ld.lld.1
@@ -164,6 +164,10 @@ to set the compression level to 6.
 The compression level is 5.
 .El
 .Pp
+.It Fl -compress-sections Ns = Ns Ar section-glob=[none|zlib|zstd]
+Compress output sections that match the glob and do not have the SHF_ALLOC flag.
+This is like a generalized
+.Cm --compress-debug-sections.
 .It Fl -cref
 Output cross reference table. If
 .Fl Map
diff --git a/lld/test/ELF/compress-sections-err.s b/lld/test/ELF/compress-sections-err.s
index 097803807083..1b46aea12e9c 100644
--- a/lld/test/ELF/compress-sections-err.s
+++ b/lld/test/ELF/compress-sections-err.s
@@ -5,8 +5,11 @@
 # RUN: ld.lld %t.o --compress-debug-sections=zlib --compress-debug-sections=none -o /dev/null 2>&1 | count 0
 # RUN: not ld.lld %t.o --compress-debug-sections=zlib -o /dev/null 2>&1 | \
 # RUN:   FileCheck %s --implicit-check-not=error:
+# RUN: not ld.lld %t.o --compress-sections=foo=zlib -o /dev/null 2>&1 | \
+# RUN:   FileCheck %s --check-prefix=CHECK2 --implicit-check-not=error:
 
 # CHECK: error: --compress-debug-sections: LLVM was not built with LLVM_ENABLE_ZLIB or did not find zlib at build time
+# CHECK2: error: --compress-sections: LLVM was not built with LLVM_ENABLE_ZLIB or did not find zlib at build time
 
 .globl _start
 _start:
diff --git a/lld/test/ELF/compress-sections-special.s b/lld/test/ELF/compress-sections-special.s
new file mode 100644
index 000000000000..80c61fe626a4
--- /dev/null
+++ b/lld/test/ELF/compress-sections-special.s
@@ -0,0 +1,31 @@
+# REQUIRES: x86, zlib
+
+# RUN: rm -rf %t && mkdir %t && cd %t
+# RUN: llvm-mc -filetype=obj -triple=x86_64 %s -o a.o
+# RUN: ld.lld -pie a.o --compress-sections .strtab=zlib --compress-sections .symtab=zlib -o out
+# RUN: llvm-readelf -Ss -x .strtab out 2>&1 | FileCheck %s
+
+# CHECK:      nonalloc0  PROGBITS 0000000000000000 [[#%x,]] [[#%x,]] 00     0 0  1
+# CHECK:      .symtab    SYMTAB   0000000000000000 [[#%x,]] [[#%x,]] 18  C 12 3  1
+# CHECK-NEXT: .shstrtab  STRTAB   0000000000000000 [[#%x,]] [[#%x,]] 00     0 0  1
+# CHECK-NEXT: .strtab    STRTAB   0000000000000000 [[#%x,]] [[#%x,]] 00  C  0 0  1
+
+## TODO Add compressed SHT_STRTAB/SHT_SYMTAB support to llvm-readelf
+# CHECK:      warning: {{.*}}: unable to get the string table for the SHT_SYMTAB section: SHT_STRTAB string table section
+
+# CHECK:      Hex dump of section '.strtab':
+# CHECK-NEXT: 01000000 00000000 1a000000 00000000
+# CHECK-NEXT: 01000000 00000000 {{.*}}
+
+# RUN: not ld.lld -shared a.o --compress-sections .dynstr=zlib 2>&1 | FileCheck %s --check-prefix=ERR-ALLOC
+# ERR-ALLOC: error: --compress-sections: section '.dynstr' with the SHF_ALLOC flag cannot be compressed
+
+.globl _start, g0, g1
+_start:
+l0:
+g0:
+g1:
+
+.section nonalloc0,""
+.quad .text+1
+.quad .text+2
diff --git a/lld/test/ELF/compress-sections.s b/lld/test/ELF/compress-sections.s
new file mode 100644
index 000000000000..59b5408c9624
--- /dev/null
+++ b/lld/test/ELF/compress-sections.s
@@ -0,0 +1,91 @@
+# REQUIRES: x86, zlib, zstd
+
+# RUN: rm -rf %t && mkdir %t && cd %t
+# RUN: llvm-mc -filetype=obj -triple=x86_64 %s -o a.o
+# RUN: ld.lld -pie a.o -o out --compress-sections '*0=zlib' --compress-sections '*0=none' --compress-sections 'nomatch=none'
+# RUN: llvm-readelf -SrsX out | FileCheck %s --check-prefix=CHECK1
+
+# CHECK1:      Name       Type          Address     Off      Size     ES Flg Lk Inf Al
+# CHECK1:      foo0       PROGBITS [[#%x,FOO0:]]    [[#%x,]] [[#%x,]] 00 A    0   0  8
+# CHECK1-NEXT: foo1       PROGBITS [[#%x,FOO1:]]    [[#%x,]] [[#%x,]] 00 A    0   0  8
+# CHECK1-NEXT: .text      PROGBITS [[#%x,TEXT:]]    [[#%x,]] [[#%x,]] 00 AX   0   0  4
+# CHECK1:      nonalloc0  PROGBITS 0000000000000000 [[#%x,]] [[#%x,]] 00      0   0  8
+# CHECK1-NEXT: nonalloc1  PROGBITS 0000000000000000 [[#%x,]] [[#%x,]] 00      0   0  8
+# CHECK1-NEXT: .debug_str PROGBITS 0000000000000000 [[#%x,]] [[#%x,]] 01 MS   0   0  1
+
+# CHECK1: 0000000000000010  0 NOTYPE  LOCAL  DEFAULT   [[#]] (nonalloc0) sym0
+# CHECK1: 0000000000000008  0 NOTYPE  LOCAL  DEFAULT   [[#]] (nonalloc1) sym1
+
+# RUN: ld.lld -pie a.o --compress-sections '*c0=zlib' --compress-sections .debug_str=zstd -o out2
+# RUN: llvm-readelf -SrsX -x nonalloc0 -x .debug_str out2 | FileCheck %s --check-prefix=CHECK2
+
+# CHECK2:      Name       Type          Address     Off      Size     ES Flg Lk Inf Al
+# CHECK2:      foo0       PROGBITS [[#%x,FOO0:]]    [[#%x,]] [[#%x,]] 00 A    0   0  8
+# CHECK2-NEXT: foo1       PROGBITS [[#%x,FOO1:]]    [[#%x,]] [[#%x,]] 00 A    0   0  8
+# CHECK2-NEXT: .text      PROGBITS [[#%x,TEXT:]]    [[#%x,]] [[#%x,]] 00 AX   0   0  4
+# CHECK2:      nonalloc0  PROGBITS 0000000000000000 [[#%x,]] [[#%x,]] 00 C    0   0  1
+# CHECK2-NEXT: nonalloc1  PROGBITS 0000000000000000 [[#%x,]] [[#%x,]] 00      0   0  8
+# CHECK2-NEXT: .debug_str PROGBITS 0000000000000000 [[#%x,]] [[#%x,]] 01 MSC  0   0  1
+
+# CHECK2: 0000000000000010  0 NOTYPE  LOCAL  DEFAULT   [[#]] (nonalloc0) sym0
+# CHECK2: 0000000000000008  0 NOTYPE  LOCAL  DEFAULT   [[#]] (nonalloc1) sym1
+
+# CHECK2:      Hex dump of section 'nonalloc0':
+## zlib with ch_size=0x10
+# CHECK2-NEXT: 01000000 00000000 10000000 00000000
+# CHECK2-NEXT: 01000000 00000000 {{.*}}
+# CHECK2:      Hex dump of section '.debug_str':
+## zstd with ch_size=0x38
+# CHECK2-NEXT: 02000000 00000000 38000000 00000000
+# CHECK2-NEXT: 01000000 00000000 {{.*}}
+
+## --compress-debug-sections=none takes precedence.
+# RUN: ld.lld a.o --compress-debug-sections=none --compress-sections .debug_str=zstd -o out3
+# RUN: llvm-readelf -S out3 | FileCheck %s --check-prefix=CHECK3
+
+# CHECK3:      .debug_str PROGBITS 0000000000000000 [[#%x,]] [[#%x,]] 01 MS   0   0  1
+
+# RUN: not ld.lld a.o --compress-sections '*0=zlib' 2>&1 | \
+# RUN:   FileCheck %s --check-prefix=ERR-ALLOC --implicit-check-not=error:
+# ERR-ALLOC: error: --compress-sections: section 'foo0' with the SHF_ALLOC flag cannot be compressed
+
+# RUN: not ld.lld --compress-sections=foo a.o 2>&1 | \
+# RUN:   FileCheck %s --check-prefix=ERR1 --implicit-check-not=error:
+# ERR1:      error: --compress-sections: parse error, not 'section-glob=[none|zlib|zstd]'
+
+# RUN: not ld.lld --compress-sections 'a[=zlib' a.o 2>&1 | \
+# RUN:   FileCheck %s --check-prefix=ERR2 --implicit-check-not=error:
+# ERR2:      error: --compress-sections: invalid glob pattern, unmatched '['
+
+# RUN: not ld.lld a.o --compress-sections='.debug*=zlib-gabi' --compress-sections='.debug*=' 2>&1 | \
+# RUN:   FileCheck -check-prefix=ERR3 %s
+# ERR3:      unknown --compress-sections value: zlib-gabi
+# ERR3-NEXT: --compress-sections: parse error, not 'section-glob=[none|zlib|zstd]'
+
+.globl _start
+_start:
+  ret
+
+.section foo0,"a"
+.balign 8
+.quad .text-.
+.quad .text-.
+.section foo1,"a"
+.balign 8
+.quad .text-.
+.quad .text-.
+.section nonalloc0,""
+.balign 8
+.quad .text+1
+.quad .text+2
+sym0:
+.section nonalloc1,""
+.balign 8
+.quad 42
+sym1:
+
+.section .debug_str,"MS",@progbits,1
+.Linfo_string0:
+  .asciz "AAAAAAAAAAAAAAAAAAAAAAAAAAA"
+.Linfo_string1:
+  .asciz "BBBBBBBBBBBBBBBBBBBBBBBBBBB"
diff --git a/lld/test/ELF/linkerscript/compress-sections.s b/lld/test/ELF/linkerscript/compress-sections.s
new file mode 100644
index 000000000000..9b4574a1778c
--- /dev/null
+++ b/lld/test/ELF/linkerscript/compress-sections.s
@@ -0,0 +1,62 @@
+# REQUIRES: x86, zlib
+
+# RUN: rm -rf %t && split-file %s %t && cd %t
+# RUN: llvm-mc -filetype=obj -triple=x86_64 a.s -o a.o
+# RUN: ld.lld -T a.lds a.o --compress-sections nonalloc=zlib --compress-sections str=zlib -o out
+# RUN: llvm-readelf -SsXz -p str out | FileCheck %s
+
+# CHECK:      Name     Type            Address   Off      Size     ES Flg  Lk Inf Al
+# CHECK:      nonalloc PROGBITS 0000000000000000 [[#%x,]] [[#%x,]] 00   C   0   0  1
+# CHECK-NEXT: str      PROGBITS 0000000000000000 [[#%x,]] [[#%x,]] 01 MSC   0   0  1
+
+# CHECK:      0000000000000000  0 NOTYPE  GLOBAL DEFAULT [[#]] (nonalloc) nonalloc_start
+# CHECK:      0000000000000023  0 NOTYPE  GLOBAL DEFAULT [[#]] (nonalloc) nonalloc_end
+# CHECK:      String dump of section 'str':
+# CHECK-NEXT: [     0] AAA
+# CHECK-NEXT: [     4] BBB
+
+## TODO The uncompressed size of 'nonalloc' is dependent on linker script
+## commands, which is not handled. We should report an error.
+# RUN: ld.lld -T b.lds a.o --compress-sections nonalloc=zlib
+
+#--- a.s
+.globl _start
+_start:
+  ret
+
+.section nonalloc0,""
+.balign 8
+.quad .text
+.quad .text
+.section nonalloc1,""
+.balign 8
+.quad 42
+
+.section str,"MS",@progbits,1
+  .asciz "AAA"
+  .asciz "BBB"
+
+#--- a.lds
+SECTIONS {
+  .text : { *(.text) }
+  c = SIZEOF(.text);
+  b = c+1;
+  a = b+1;
+  nonalloc : {
+    nonalloc_start = .;
+## In general, using data commands is error-prone. This case is correct, though.
+    *(nonalloc*) QUAD(SIZEOF(.text))
+    . += a;
+    nonalloc_end = .;
+  }
+  str : { *(str) }
+}
+
+#--- b.lds
+SECTIONS {
+  nonalloc : { *(nonalloc*) . += a; }
+  .text : { *(.text) }
+  a = b+1;
+  b = c+1;
+  c = SIZEOF(.text);
+}
-- 
cgit v1.2.3


From 536e0ebaaa842471ae91bbda4f8cc1821690861e Mon Sep 17 00:00:00 2001
From: Adrian Prantl <aprantl@apple.com>
Date: Tue, 12 Mar 2024 11:06:23 -0700
Subject: Remove XFAIL from tests passing on green dragon

---
 cross-project-tests/debuginfo-tests/llgdb-tests/static-member-2.cpp | 1 -
 cross-project-tests/debuginfo-tests/llgdb-tests/static-member.cpp   | 1 -
 2 files changed, 2 deletions(-)

diff --git a/cross-project-tests/debuginfo-tests/llgdb-tests/static-member-2.cpp b/cross-project-tests/debuginfo-tests/llgdb-tests/static-member-2.cpp
index 3f11ae018fc8..5b6647c0631c 100644
--- a/cross-project-tests/debuginfo-tests/llgdb-tests/static-member-2.cpp
+++ b/cross-project-tests/debuginfo-tests/llgdb-tests/static-member-2.cpp
@@ -2,7 +2,6 @@
 // RUN: %clangxx %target_itanium_abi_host_triple %t -o %t.out
 // RUN: %test_debuginfo %s %t.out
 // XFAIL: gdb-clang-incompatibility
-// XFAIL: system-darwin
 
 // DEBUGGER: delete breakpoints
 // DEBUGGER: break static-member.cpp:33
diff --git a/cross-project-tests/debuginfo-tests/llgdb-tests/static-member.cpp b/cross-project-tests/debuginfo-tests/llgdb-tests/static-member.cpp
index 57316dfd6404..29dd84dc8325 100644
--- a/cross-project-tests/debuginfo-tests/llgdb-tests/static-member.cpp
+++ b/cross-project-tests/debuginfo-tests/llgdb-tests/static-member.cpp
@@ -2,7 +2,6 @@
 // RUN: %clangxx %target_itanium_abi_host_triple %t -o %t.out
 // RUN: %test_debuginfo %s %t.out
 // XFAIL: !system-darwin && gdb-clang-incompatibility
-// XFAIL: system-darwin
 // DEBUGGER: delete breakpoints
 // DEBUGGER: break static-member.cpp:33
 // DEBUGGER: r
-- 
cgit v1.2.3


From 42ecccfe346daf342b6c46a6a471ba5ed99b1139 Mon Sep 17 00:00:00 2001
From: amilendra <amilendra.kodithuwakku@arm.com>
Date: Tue, 12 Mar 2024 18:17:13 +0000
Subject: [libcxx] Fix incorrect type in the has-1024-bit-atomics feature test
 (#84904)

---
 libcxx/utils/libcxx/test/features.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libcxx/utils/libcxx/test/features.py b/libcxx/utils/libcxx/test/features.py
index 4fd8798b794a..872bff372b3d 100644
--- a/libcxx/utils/libcxx/test/features.py
+++ b/libcxx/utils/libcxx/test/features.py
@@ -176,7 +176,7 @@ DEFAULT_FEATURES = [
             cfg,
             """
             #include <atomic>
-            struct Large { int storage[1024/8]; };
+            struct Large { char storage[1024/8]; };
             std::atomic<Large> x;
             int main(int, char**) { (void)x.load(); (void)x.is_lock_free(); return 0; }
           """,
-- 
cgit v1.2.3


From a843f26a77dee7900891b6748d43cf8d4e423bfe Mon Sep 17 00:00:00 2001
From: David Blaikie <dblaikie@gmail.com>
Date: Tue, 12 Mar 2024 11:25:12 -0700
Subject: [NFC] SLVectorizer comparator refactoring that preserves behavior
 (#84966)

Spinning off from #79321 / 35f4592 - looked like the comparator could be
simplified & made more clear/less risk of leaving hidden bugs.
---
 llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 87 ++++++++++++-------------
 1 file changed, 43 insertions(+), 44 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 7b99c3ac8c55..6ef46e3c5258 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -16614,36 +16614,11 @@ bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
     if (Opcodes1.size() > Opcodes2.size())
       return false;
     for (int I = 0, E = Opcodes1.size(); I < E; ++I) {
-      // Undefs are compatible with any other value.
-      if (isa<UndefValue>(Opcodes1[I]) || isa<UndefValue>(Opcodes2[I])) {
-        if (isa<UndefValue>(Opcodes1[I]) && isa<UndefValue>(Opcodes2[I]))
-          continue;
-        if (isa<Instruction>(Opcodes1[I])) {
-          assert(isa<UndefValue>(Opcodes2[I]) && "Expected 2nd undef value");
-          return true;
-        }
-        if (isa<Instruction>(Opcodes2[I])) {
-          assert(isa<UndefValue>(Opcodes1[I]) && "Expected 1st undef value");
-          return false;
-        }
-        if (isa<Constant>(Opcodes1[I]) && !isa<UndefValue>(Opcodes1[I])) {
-          assert(isa<UndefValue>(Opcodes2[I]) && "Expected 2nd undef value");
-          return true;
-        }
-        if (isa<Constant>(Opcodes2[I]) && !isa<UndefValue>(Opcodes2[I])) {
-          assert(isa<UndefValue>(Opcodes1[I]) && "Expected 1st undef value");
-          return false;
-        }
-        if (!isa<UndefValue>(Opcodes2[I])) {
-          assert(isa<UndefValue>(Opcodes1[I]) && "Expected 1st undef value");
-          return false;
-        }
-        assert(!isa<UndefValue>(Opcodes1[I]) && isa<UndefValue>(Opcodes2[I]) &&
-               "Expected 1st non-undef and 2nd undef value");
-        return true;
-      }
-      if (auto *I1 = dyn_cast<Instruction>(Opcodes1[I]))
-        if (auto *I2 = dyn_cast<Instruction>(Opcodes2[I])) {
+      {
+        // Instructions come first.
+        auto *I1 = dyn_cast<Instruction>(Opcodes1[I]);
+        auto *I2 = dyn_cast<Instruction>(Opcodes2[I]);
+        if (I1 && I2) {
           DomTreeNodeBase<BasicBlock> *NodeI1 = DT->getNode(I1->getParent());
           DomTreeNodeBase<BasicBlock> *NodeI2 = DT->getNode(I2->getParent());
           if (!NodeI1)
@@ -16660,20 +16635,44 @@ bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
             continue;
           return I1->getOpcode() < I2->getOpcode();
         }
-      if (isa<Constant>(Opcodes1[I]) && isa<Constant>(Opcodes2[I]))
-        continue;
-      if (isa<Instruction>(Opcodes1[I]) && !isa<Instruction>(Opcodes2[I]))
-        return true;
-      if (!isa<Instruction>(Opcodes1[I]) && isa<Instruction>(Opcodes2[I]))
-        return false;
-      if (isa<Constant>(Opcodes1[I]) && !isa<Constant>(Opcodes2[I]))
-        return true;
-      if (!isa<Constant>(Opcodes1[I]) && isa<Constant>(Opcodes2[I]))
-        return false;
-      if (Opcodes1[I]->getValueID() < Opcodes2[I]->getValueID())
-        return true;
-      if (Opcodes1[I]->getValueID() > Opcodes2[I]->getValueID())
-        return false;
+        if (I1)
+          return true;
+        if (I2)
+          return false;
+      }
+      {
+        // Non-undef constants come next.
+        bool C1 = isa<Constant>(Opcodes1[I]) && !isa<UndefValue>(Opcodes1[I]);
+        bool C2 = isa<Constant>(Opcodes2[I]) && !isa<UndefValue>(Opcodes2[I]);
+        if (C1 && C2)
+          continue;
+        if (C1)
+          return true;
+        if (C2)
+          return false;
+      }
+      bool U1 = isa<UndefValue>(Opcodes1[I]);
+      bool U2 = isa<UndefValue>(Opcodes2[I]);
+      {
+        // Non-constant non-instructions come next.
+        if (!U1 && !U2) {
+          auto ValID1 = Opcodes1[I]->getValueID();
+          auto ValID2 = Opcodes2[I]->getValueID();
+          if (ValID1 == ValID2)
+            continue;
+          if (ValID1 < ValID2)
+            return true;
+          if (ValID1 > ValID2)
+            return false;
+        }
+        if (!U1)
+          return true;
+        if (!U2)
+          return false;
+      }
+      // Undefs come last.
+      assert(U1 && U2);
+      continue;
     }
     return false;
   };
-- 
cgit v1.2.3


From 377da51546b2f514c3e90fca351004a2cf3f1eed Mon Sep 17 00:00:00 2001
From: Noah Goldstein <goldstein.w.n@gmail.com>
Date: Mon, 11 Mar 2024 22:52:20 -0500
Subject: [InstCombine] Add test for detecting `(x ^ -x)` as a ~Mask; NFC

---
 .../Transforms/InstCombine/icmp-and-lowbit-mask.ll    | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/llvm/test/Transforms/InstCombine/icmp-and-lowbit-mask.ll b/llvm/test/Transforms/InstCombine/icmp-and-lowbit-mask.ll
index 640a95b05616..903d70685ab5 100644
--- a/llvm/test/Transforms/InstCombine/icmp-and-lowbit-mask.ll
+++ b/llvm/test/Transforms/InstCombine/icmp-and-lowbit-mask.ll
@@ -453,6 +453,25 @@ define i1 @src_is_notmask_shl(i8 %x_in, i8 %y, i1 %cond) {
   ret i1 %r
 }
 
+define i1 @src_is_notmask_x_xor_neg_x(i8 %x_in, i8 %y, i1 %cond) {
+; CHECK-LABEL: @src_is_notmask_x_xor_neg_x(
+; CHECK-NEXT:    [[X:%.*]] = xor i8 [[X_IN:%.*]], 123
+; CHECK-NEXT:    [[NEG_Y:%.*]] = sub i8 0, [[Y:%.*]]
+; CHECK-NEXT:    [[NOTMASK0:%.*]] = xor i8 [[NEG_Y]], [[Y]]
+; CHECK-NEXT:    [[NOTMASK:%.*]] = select i1 [[COND:%.*]], i8 [[NOTMASK0]], i8 -8
+; CHECK-NEXT:    [[AND:%.*]] = and i8 [[X]], [[NOTMASK]]
+; CHECK-NEXT:    [[R:%.*]] = icmp eq i8 [[AND]], 0
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %x = xor i8 %x_in, 123
+  %neg_y = sub i8 0, %y
+  %nmask0 = xor i8 %y, %neg_y
+  %notmask = select i1 %cond, i8 %nmask0, i8 -8
+  %and = and i8 %x, %notmask
+  %r = icmp eq i8 %and, 0
+  ret i1 %r
+}
+
 define i1 @src_is_notmask_shl_fail_multiuse_invert(i8 %x_in, i8 %y, i1 %cond) {
 ; CHECK-LABEL: @src_is_notmask_shl_fail_multiuse_invert(
 ; CHECK-NEXT:    [[X:%.*]] = xor i8 [[X_IN:%.*]], 122
-- 
cgit v1.2.3


From 5ca325e49cedee2aa5e80581ba95dcab56292c32 Mon Sep 17 00:00:00 2001
From: Noah Goldstein <goldstein.w.n@gmail.com>
Date: Mon, 11 Mar 2024 22:52:29 -0500
Subject: [InstCombine] Detect `(x ^ -x)` as a ~Mask

Proof: https://alive2.llvm.org/ce/z/TAFmPw

This is a lemma for clearing up some of the regressions that #84688
causes.

Closes #84868
---
 llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp  | 7 +++++--
 llvm/test/Transforms/InstCombine/icmp-and-lowbit-mask.ll | 7 +++----
 2 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index e71f3e113b96..0dce0077bf15 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -4113,9 +4113,12 @@ static bool isMaskOrZero(const Value *V, bool Not, const SimplifyQuery &Q,
     if (match(V, m_Not(m_Value(X))))
       return isMaskOrZero(X, !Not, Q, Depth);
 
+    // (X ^ -X) is a ~Mask
+    if (Not)
+      return match(V, m_c_Xor(m_Value(X), m_Neg(m_Deferred(X))));
     // (X ^ (X - 1)) is a Mask
-    return !Not &&
-           match(V, m_c_Xor(m_Value(X), m_Add(m_Deferred(X), m_AllOnes())));
+    else
+      return match(V, m_c_Xor(m_Value(X), m_Add(m_Deferred(X), m_AllOnes())));
   case Instruction::Select:
     // c ? Mask0 : Mask1 is a Mask.
     return isMaskOrZero(I->getOperand(1), Not, Q, Depth) &&
diff --git a/llvm/test/Transforms/InstCombine/icmp-and-lowbit-mask.ll b/llvm/test/Transforms/InstCombine/icmp-and-lowbit-mask.ll
index 903d70685ab5..070609228958 100644
--- a/llvm/test/Transforms/InstCombine/icmp-and-lowbit-mask.ll
+++ b/llvm/test/Transforms/InstCombine/icmp-and-lowbit-mask.ll
@@ -456,11 +456,10 @@ define i1 @src_is_notmask_shl(i8 %x_in, i8 %y, i1 %cond) {
 define i1 @src_is_notmask_x_xor_neg_x(i8 %x_in, i8 %y, i1 %cond) {
 ; CHECK-LABEL: @src_is_notmask_x_xor_neg_x(
 ; CHECK-NEXT:    [[X:%.*]] = xor i8 [[X_IN:%.*]], 123
-; CHECK-NEXT:    [[NEG_Y:%.*]] = sub i8 0, [[Y:%.*]]
+; CHECK-NEXT:    [[NEG_Y:%.*]] = add i8 [[Y:%.*]], -1
 ; CHECK-NEXT:    [[NOTMASK0:%.*]] = xor i8 [[NEG_Y]], [[Y]]
-; CHECK-NEXT:    [[NOTMASK:%.*]] = select i1 [[COND:%.*]], i8 [[NOTMASK0]], i8 -8
-; CHECK-NEXT:    [[AND:%.*]] = and i8 [[X]], [[NOTMASK]]
-; CHECK-NEXT:    [[R:%.*]] = icmp eq i8 [[AND]], 0
+; CHECK-NEXT:    [[TMP3:%.*]] = select i1 [[COND:%.*]], i8 [[NOTMASK0]], i8 7
+; CHECK-NEXT:    [[R:%.*]] = icmp ule i8 [[X]], [[TMP3]]
 ; CHECK-NEXT:    ret i1 [[R]]
 ;
   %x = xor i8 %x_in, 123
-- 
cgit v1.2.3


From 9ac03158987802706110ef465c4b6a7553cc4b86 Mon Sep 17 00:00:00 2001
From: David Blaikie <dblaikie@gmail.com>
Date: Tue, 12 Mar 2024 18:28:30 +0000
Subject: Add comment to assert from a843f26

---
 llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 6ef46e3c5258..b8b67609d755 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -16671,7 +16671,7 @@ bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
           return false;
       }
       // Undefs come last.
-      assert(U1 && U2);
+      assert(U1 && U2 && "The only thing left should be undef & undef.");
       continue;
     }
     return false;
-- 
cgit v1.2.3


From f5334f5da5166ac26fb24ca44669b3d425f0d131 Mon Sep 17 00:00:00 2001
From: Jonathan Peyton <jonathan.l.peyton@intel.com>
Date: Tue, 12 Mar 2024 11:36:19 -0700
Subject: [OpenMP] Add debug checks for divide by zero (#83300)

---
 openmp/runtime/src/kmp_affinity.cpp |  2 ++
 openmp/runtime/src/kmp_sched.cpp    | 23 ++++++++++++++++++++---
 2 files changed, 22 insertions(+), 3 deletions(-)

diff --git a/openmp/runtime/src/kmp_affinity.cpp b/openmp/runtime/src/kmp_affinity.cpp
index b79b57eafd6a..c3ee4de75a23 100644
--- a/openmp/runtime/src/kmp_affinity.cpp
+++ b/openmp/runtime/src/kmp_affinity.cpp
@@ -1777,6 +1777,8 @@ static bool __kmp_affinity_create_hwloc_map(kmp_i18n_id_t *const msg_id) {
       __kmp_nThreadsPerCore = __kmp_hwloc_get_nobjs_under_obj(o, HWLOC_OBJ_PU);
     else
       __kmp_nThreadsPerCore = 1; // no CORE found
+    if (__kmp_nThreadsPerCore == 0)
+      __kmp_nThreadsPerCore = 1;
     __kmp_ncores = __kmp_xproc / __kmp_nThreadsPerCore;
     if (nCoresPerPkg == 0)
       nCoresPerPkg = 1; // to prevent possible division by 0
diff --git a/openmp/runtime/src/kmp_sched.cpp b/openmp/runtime/src/kmp_sched.cpp
index 53182bef5873..4d764e441f28 100644
--- a/openmp/runtime/src/kmp_sched.cpp
+++ b/openmp/runtime/src/kmp_sched.cpp
@@ -52,6 +52,7 @@ char const *traits_t<long>::spec = "ld";
     } else if (i > 0) {                                                        \
       t = (u - l) / i + 1;                                                     \
     } else {                                                                   \
+      KMP_DEBUG_ASSERT(i != 0);                                                \
       t = (l - u) / (-i) + 1;                                                  \
     }                                                                          \
     KMP_COUNT_VALUE(stat, t);                                                  \
@@ -284,6 +285,7 @@ static void __kmp_for_static_init(ident_t *loc, kmp_int32 global_tid,
     // upper-lower can exceed the limit of signed type
     trip_count = (UT)(*pupper - *plower) / incr + 1;
   } else {
+    KMP_DEBUG_ASSERT(incr != 0);
     trip_count = (UT)(*plower - *pupper) / (-incr) + 1;
   }
 
@@ -318,6 +320,7 @@ static void __kmp_for_static_init(ident_t *loc, kmp_int32 global_tid,
       if (plastiter != NULL)
         *plastiter = (tid == trip_count - 1);
     } else {
+      KMP_DEBUG_ASSERT(nth != 0);
       if (__kmp_static == kmp_sch_static_balanced) {
         UT small_chunk = trip_count / nth;
         UT extras = trip_count % nth;
@@ -358,6 +361,7 @@ static void __kmp_for_static_init(ident_t *loc, kmp_int32 global_tid,
   case kmp_sch_static_chunked: {
     ST span;
     UT nchunks;
+    KMP_DEBUG_ASSERT(chunk != 0);
     if (chunk < 1)
       chunk = 1;
     else if ((UT)chunk > trip_count)
@@ -383,6 +387,7 @@ static void __kmp_for_static_init(ident_t *loc, kmp_int32 global_tid,
   }
   case kmp_sch_static_balanced_chunked: {
     T old_upper = *pupper;
+    KMP_DEBUG_ASSERT(nth != 0);
     // round up to make sure the chunk is enough to cover all iterations
     UT span = (trip_count + nth - 1) / nth;
 
@@ -398,8 +403,10 @@ static void __kmp_for_static_init(ident_t *loc, kmp_int32 global_tid,
     } else if (*pupper < old_upper)
       *pupper = old_upper;
 
-    if (plastiter != NULL)
+    if (plastiter != NULL) {
+      KMP_DEBUG_ASSERT(chunk != 0);
       *plastiter = (tid == ((trip_count - 1) / (UT)chunk));
+    }
     break;
   }
   default:
@@ -417,6 +424,7 @@ static void __kmp_for_static_init(ident_t *loc, kmp_int32 global_tid,
     // Calculate chunk in case it was not specified; it is specified for
     // kmp_sch_static_chunked
     if (schedtype == kmp_sch_static) {
+      KMP_DEBUG_ASSERT(nth != 0);
       cur_chunk = trip_count / nth + ((trip_count % nth) ? 1 : 0);
     }
     // 0 - "static" schedule
@@ -547,6 +555,7 @@ static void __kmp_dist_for_static_init(ident_t *loc, kmp_int32 gtid,
     // upper-lower can exceed the limit of signed type
     trip_count = (UT)(*pupper - *plower) / incr + 1;
   } else {
+    KMP_DEBUG_ASSERT(incr != 0);
     trip_count = (UT)(*plower - *pupper) / (-incr) + 1;
   }
 
@@ -568,6 +577,7 @@ static void __kmp_dist_for_static_init(ident_t *loc, kmp_int32 gtid,
       *plastiter = (tid == 0 && team_id == trip_count - 1);
   } else {
     // Get the team's chunk first (each team gets at most one chunk)
+    KMP_DEBUG_ASSERT(nteams != 0);
     if (__kmp_static == kmp_sch_static_balanced) {
       UT chunkD = trip_count / nteams;
       UT extras = trip_count % nteams;
@@ -619,6 +629,7 @@ static void __kmp_dist_for_static_init(ident_t *loc, kmp_int32 gtid,
       // upper-lower can exceed the limit of signed type
       trip_count = (UT)(*pupperDist - *plower) / incr + 1;
     } else {
+      KMP_DEBUG_ASSERT(incr != 0);
       trip_count = (UT)(*plower - *pupperDist) / (-incr) + 1;
     }
     KMP_DEBUG_ASSERT(trip_count);
@@ -637,6 +648,7 @@ static void __kmp_dist_for_static_init(ident_t *loc, kmp_int32 gtid,
           if (*plastiter != 0 && !(tid == trip_count - 1))
             *plastiter = 0;
       } else {
+        KMP_DEBUG_ASSERT(nth != 0);
         if (__kmp_static == kmp_sch_static_balanced) {
           UT chunkL = trip_count / nth;
           UT extras = trip_count % nth;
@@ -684,9 +696,11 @@ static void __kmp_dist_for_static_init(ident_t *loc, kmp_int32 gtid,
       *pstride = span * nth;
       *plower = *plower + (span * tid);
       *pupper = *plower + span - incr;
-      if (plastiter != NULL)
+      if (plastiter != NULL) {
+        KMP_DEBUG_ASSERT(chunk != 0);
         if (*plastiter != 0 && !(tid == ((trip_count - 1) / (UT)chunk) % nth))
           *plastiter = 0;
+      }
       break;
     }
     default:
@@ -809,6 +823,7 @@ static void __kmp_team_static_init(ident_t *loc, kmp_int32 gtid,
     // upper-lower can exceed the limit of signed type
     trip_count = (UT)(upper - lower) / incr + 1;
   } else {
+    KMP_DEBUG_ASSERT(incr != 0);
     trip_count = (UT)(lower - upper) / (-incr) + 1;
   }
   if (chunk < 1)
@@ -817,8 +832,10 @@ static void __kmp_team_static_init(ident_t *loc, kmp_int32 gtid,
   *p_st = span * nteams;
   *p_lb = lower + (span * team_id);
   *p_ub = *p_lb + span - incr;
-  if (p_last != NULL)
+  if (p_last != NULL) {
+    KMP_DEBUG_ASSERT(chunk != 0);
     *p_last = (team_id == ((trip_count - 1) / (UT)chunk) % nteams);
+  }
   // Correct upper bound if needed
   if (incr > 0) {
     if (*p_ub < *p_lb) // overflow?
-- 
cgit v1.2.3


From 3303be63fc2ac196568b03f58c146655e19183f6 Mon Sep 17 00:00:00 2001
From: Jonathan Peyton <jonathan.l.peyton@intel.com>
Date: Tue, 12 Mar 2024 11:36:43 -0700
Subject: [OpenMP] Make sure mask is set to nullptr (#83299)

---
 openmp/runtime/src/kmp.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/openmp/runtime/src/kmp.h b/openmp/runtime/src/kmp.h
index 48d7124e56c5..de758d37269d 100644
--- a/openmp/runtime/src/kmp.h
+++ b/openmp/runtime/src/kmp.h
@@ -825,7 +825,7 @@ class kmp_affinity_raii_t {
 
 public:
   kmp_affinity_raii_t(const kmp_affin_mask_t *new_mask = nullptr)
-      : restored(false) {
+      : mask(nullptr), restored(false) {
     if (KMP_AFFINITY_CAPABLE()) {
       KMP_CPU_ALLOC(mask);
       KMP_ASSERT(mask != NULL);
@@ -835,7 +835,7 @@ public:
     }
   }
   void restore() {
-    if (!restored && KMP_AFFINITY_CAPABLE()) {
+    if (mask && KMP_AFFINITY_CAPABLE() && !restored) {
       __kmp_set_system_affinity(mask, /*abort_on_error=*/true);
       KMP_CPU_FREE(mask);
     }
-- 
cgit v1.2.3


From 6272500e0b1456a87256f0c8659fdc86cfe3ef9a Mon Sep 17 00:00:00 2001
From: Jonathan Peyton <jonathan.l.peyton@intel.com>
Date: Tue, 12 Mar 2024 11:37:01 -0700
Subject: [OpenMP] Remove unused logical/physical CPUID information (#83298)

---
 openmp/runtime/src/kmp.h           |  2 --
 openmp/runtime/src/kmp_utility.cpp | 68 +-------------------------------------
 2 files changed, 1 insertion(+), 69 deletions(-)

diff --git a/openmp/runtime/src/kmp.h b/openmp/runtime/src/kmp.h
index de758d37269d..569a1ab9b477 100644
--- a/openmp/runtime/src/kmp.h
+++ b/openmp/runtime/src/kmp.h
@@ -1392,8 +1392,6 @@ typedef struct kmp_cpuinfo {
   int stepping; // CPUID(1).EAX[3:0] ( Stepping )
   kmp_cpuinfo_flags_t flags;
   int apic_id;
-  int physical_id;
-  int logical_id;
   kmp_uint64 frequency; // Nominal CPU frequency in Hz.
   char name[3 * sizeof(kmp_cpuid_t)]; // CPUID(0x80000002,0x80000003,0x80000004)
 } kmp_cpuinfo_t;
diff --git a/openmp/runtime/src/kmp_utility.cpp b/openmp/runtime/src/kmp_utility.cpp
index f901eaca92f4..bfa450c9ced2 100644
--- a/openmp/runtime/src/kmp_utility.cpp
+++ b/openmp/runtime/src/kmp_utility.cpp
@@ -28,68 +28,6 @@ static const char *unknown = "unknown";
 static int trace_level = 5;
 #endif
 
-/* LOG_ID_BITS  = ( 1 + floor( log_2( max( log_per_phy - 1, 1 ))))
- * APIC_ID      = (PHY_ID << LOG_ID_BITS) | LOG_ID
- * PHY_ID       = APIC_ID >> LOG_ID_BITS
- */
-int __kmp_get_physical_id(int log_per_phy, int apic_id) {
-  int index_lsb, index_msb, temp;
-
-  if (log_per_phy > 1) {
-    index_lsb = 0;
-    index_msb = 31;
-
-    temp = log_per_phy;
-    while ((temp & 1) == 0) {
-      temp >>= 1;
-      index_lsb++;
-    }
-
-    temp = log_per_phy;
-    while ((temp & 0x80000000) == 0) {
-      temp <<= 1;
-      index_msb--;
-    }
-
-    /* If >1 bits were set in log_per_phy, choose next higher power of 2 */
-    if (index_lsb != index_msb)
-      index_msb++;
-
-    return ((int)(apic_id >> index_msb));
-  }
-
-  return apic_id;
-}
-
-/*
- * LOG_ID_BITS  = ( 1 + floor( log_2( max( log_per_phy - 1, 1 ))))
- * APIC_ID      = (PHY_ID << LOG_ID_BITS) | LOG_ID
- * LOG_ID       = APIC_ID & (( 1 << LOG_ID_BITS ) - 1 )
- */
-int __kmp_get_logical_id(int log_per_phy, int apic_id) {
-  unsigned current_bit;
-  int bits_seen;
-
-  if (log_per_phy <= 1)
-    return (0);
-
-  bits_seen = 0;
-
-  for (current_bit = 1; log_per_phy != 0; current_bit <<= 1) {
-    if (log_per_phy & current_bit) {
-      log_per_phy &= ~current_bit;
-      bits_seen++;
-    }
-  }
-
-  /* If exactly 1 bit was set in log_per_phy, choose next lower power of 2 */
-  if (bits_seen == 1) {
-    current_bit >>= 1;
-  }
-
-  return ((int)((current_bit - 1) & apic_id));
-}
-
 static kmp_uint64 __kmp_parse_frequency( // R: Frequency in Hz.
     char const *frequency // I: Float number and unit: MHz, GHz, or TGz.
 ) {
@@ -122,7 +60,6 @@ static kmp_uint64 __kmp_parse_frequency( // R: Frequency in Hz.
 void __kmp_query_cpuid(kmp_cpuinfo_t *p) {
   struct kmp_cpuid buf;
   int max_arg;
-  int log_per_phy;
 #ifdef KMP_DEBUG
   int cflush_size;
 #endif
@@ -227,11 +164,8 @@ void __kmp_query_cpuid(kmp_cpuinfo_t *p) {
 
     if ((buf.edx >> 28) & 1) {
       /* Bits 23-16: Logical Processors per Physical Processor (1 for P4) */
-      log_per_phy = data[2];
       p->apic_id = data[3]; /* Bits 31-24: Processor Initial APIC ID (X) */
-      KA_TRACE(trace_level, (" HT(%d TPUs)", log_per_phy));
-      p->physical_id = __kmp_get_physical_id(log_per_phy, p->apic_id);
-      p->logical_id = __kmp_get_logical_id(log_per_phy, p->apic_id);
+      KA_TRACE(trace_level, (" HT(%d TPUs)", data[2]));
     }
 #ifdef KMP_DEBUG
     if ((buf.edx >> 29) & 1) {
-- 
cgit v1.2.3


From 7c83d1bd612783634aae33baf8765ecfdcc5cd0d Mon Sep 17 00:00:00 2001
From: Han-Chung Wang <hanhan0912@gmail.com>
Date: Tue, 12 Mar 2024 11:46:05 -0700
Subject: [mlir][vector] Use inferRankReducedResultType for subview type
 inference. (#84395)

Fixes https://github.com/openxla/iree/issues/16475
---
 .../Dialect/Vector/Transforms/VectorTransforms.cpp | 48 ++++------------------
 .../vector-transfer-collapse-inner-most-dims.mlir  | 35 +++++++++-------
 2 files changed, 28 insertions(+), 55 deletions(-)

diff --git a/mlir/lib/Dialect/Vector/Transforms/VectorTransforms.cpp b/mlir/lib/Dialect/Vector/Transforms/VectorTransforms.cpp
index a2d4e2166331..6f6b6dcdad20 100644
--- a/mlir/lib/Dialect/Vector/Transforms/VectorTransforms.cpp
+++ b/mlir/lib/Dialect/Vector/Transforms/VectorTransforms.cpp
@@ -1255,42 +1255,6 @@ getTransferFoldableInnerUnitDims(MemRefType srcType, VectorType vectorType) {
   return result;
 }
 
-/// Returns a MemRef type that drops inner `dimsToDrop` dimensions from
-/// `srcType`. E.g., if `srcType` is memref<512x16x1x1xf32> and `dimsToDrop` is
-/// two, it returns memref<512x16x16> type.
-static MemRefType getMemRefTypeWithDroppingInnerDims(OpBuilder &builder,
-                                                     MemRefType srcType,
-                                                     size_t dimsToDrop) {
-  MemRefLayoutAttrInterface layout = srcType.getLayout();
-  if (isa<AffineMapAttr>(layout) && layout.isIdentity()) {
-    return MemRefType::get(srcType.getShape().drop_back(dimsToDrop),
-                           srcType.getElementType(), nullptr,
-                           srcType.getMemorySpace());
-  }
-  MemRefLayoutAttrInterface updatedLayout;
-  if (auto strided = dyn_cast<StridedLayoutAttr>(layout)) {
-    auto strides = llvm::to_vector(strided.getStrides().drop_back(dimsToDrop));
-    updatedLayout = StridedLayoutAttr::get(strided.getContext(),
-                                           strided.getOffset(), strides);
-    return MemRefType::get(srcType.getShape().drop_back(dimsToDrop),
-                           srcType.getElementType(), updatedLayout,
-                           srcType.getMemorySpace());
-  }
-
-  // Non-strided layout case.
-  AffineMap map = srcType.getLayout().getAffineMap();
-  int numSymbols = map.getNumSymbols();
-  for (size_t i = 0; i < dimsToDrop; ++i) {
-    int dim = srcType.getRank() - i - 1;
-    map = map.replace(builder.getAffineDimExpr(dim),
-                      builder.getAffineConstantExpr(0), map.getNumDims() - 1,
-                      numSymbols);
-  }
-  return MemRefType::get(srcType.getShape().drop_back(dimsToDrop),
-                         srcType.getElementType(), updatedLayout,
-                         srcType.getMemorySpace());
-}
-
 /// Drop inner most contiguous unit dimensions from transfer_read operand.
 class DropInnerMostUnitDimsTransferRead
     : public OpRewritePattern<vector::TransferReadOp> {
@@ -1337,8 +1301,10 @@ class DropInnerMostUnitDimsTransferRead
                                       rewriter.getIndexAttr(0));
     SmallVector<OpFoldResult> strides(srcType.getRank(),
                                       rewriter.getIndexAttr(1));
-    MemRefType resultMemrefType =
-        getMemRefTypeWithDroppingInnerDims(rewriter, srcType, dimsToDrop);
+    auto resultMemrefType =
+        cast<MemRefType>(memref::SubViewOp::inferRankReducedResultType(
+            srcType.getShape().drop_back(dimsToDrop), srcType, offsets, sizes,
+            strides));
     ArrayAttr inBoundsAttr =
         readOp.getInBounds()
             ? rewriter.getArrayAttr(
@@ -1421,8 +1387,10 @@ class DropInnerMostUnitDimsTransferWrite
                                       rewriter.getIndexAttr(0));
     SmallVector<OpFoldResult> strides(srcType.getRank(),
                                       rewriter.getIndexAttr(1));
-    MemRefType resultMemrefType =
-        getMemRefTypeWithDroppingInnerDims(rewriter, srcType, dimsToDrop);
+    auto resultMemrefType =
+        cast<MemRefType>(memref::SubViewOp::inferRankReducedResultType(
+            srcType.getShape().drop_back(dimsToDrop), srcType, offsets, sizes,
+            strides));
     ArrayAttr inBoundsAttr =
         writeOp.getInBounds()
             ? rewriter.getArrayAttr(
diff --git a/mlir/test/Dialect/Vector/vector-transfer-collapse-inner-most-dims.mlir b/mlir/test/Dialect/Vector/vector-transfer-collapse-inner-most-dims.mlir
index 3984f17f9e8c..477755b66c02 100644
--- a/mlir/test/Dialect/Vector/vector-transfer-collapse-inner-most-dims.mlir
+++ b/mlir/test/Dialect/Vector/vector-transfer-collapse-inner-most-dims.mlir
@@ -16,22 +16,27 @@ func.func @contiguous_inner_most_view(%in: memref<1x1x8x1xf32, strided<[3072, 8,
 
 // -----
 
-func.func @contiguous_outer_dyn_inner_most_view(%in: memref<?x1x8x1xf32, strided<[3072, 8, 1, 1], offset: ?>>) -> vector<1x8x1xf32>{
+func.func @contiguous_outer_dyn_inner_most_view(%a: index, %b: index, %memref: memref<?x?x8x1xf32>) -> vector<8x1xf32> {
   %c0 = arith.constant 0 : index
-  %cst = arith.constant 0.0 : f32
-  %0 = vector.transfer_read %in[%c0, %c0, %c0, %c0], %cst {in_bounds = [true, true, true]} : memref<?x1x8x1xf32, strided<[3072, 8, 1, 1], offset: ?>>, vector<1x8x1xf32>
-  return %0 : vector<1x8x1xf32>
+  %pad = arith.constant 0.0 : f32
+  %v = vector.transfer_read %memref[%a, %b, %c0, %c0], %pad {in_bounds = [true, true]} : memref<?x?x8x1xf32>, vector<8x1xf32>
+  return %v : vector<8x1xf32>
 }
-//      CHECK: func @contiguous_outer_dyn_inner_most_view(
+// CHECK: func.func @contiguous_outer_dyn_inner_most_view(
+// CHECK-SAME:   %[[IDX0:[a-zA-Z0-9]+]]
+// CHECK-SAME:   %[[IDX1:[a-zA-Z0-9]+]]
 // CHECK-SAME:   %[[SRC:[a-zA-Z0-9]+]]
-//  CHECK-DAG:   %[[C0:.+]] = arith.constant 0 : index
-//  CHECK-DAG:   %[[D0:.+]] = memref.dim %[[SRC]], %[[C0]]
-//      CHECK:   %[[SRC_0:.+]] = memref.subview %[[SRC]][0, 0, 0, 0] [%[[D0]], 1, 8, 1] [1, 1, 1, 1]
-// CHECK-SAME:    memref<?x1x8x1xf32, strided<[3072, 8, 1, 1], offset: ?>> to memref<?x1x8xf32, strided<[3072, 8, 1], offset: ?>>
-//      CHECK:   %[[VEC:.+]] = vector.transfer_read %[[SRC_0]]
-// CHECK-SAME:    memref<?x1x8xf32, strided<[3072, 8, 1], offset: ?>>, vector<1x8xf32>
-//      CHECK:   %[[RESULT:.+]] = vector.shape_cast %[[VEC]]
-//      CHECK:   return %[[RESULT]]
+// CHECK-DAG:    %[[C0:.+]] = arith.constant 0 : index
+// CHECK-DAG:    %[[C1:.+]] = arith.constant 1 : index
+// CHECK-DAG:    %[[PAD:.+]] = arith.constant 0.000000e+00 : f32
+// CHECK:        %[[D0:.+]] = memref.dim %[[SRC]], %[[C0]]
+// CHECK:        %[[D1:.+]] = memref.dim %[[SRC]], %[[C1]]
+// CHECK:        %[[VIEW:.+]] = memref.subview %[[SRC]][0, 0, 0, 0] [%[[D0]], %[[D1]], 8, 1] [1, 1, 1, 1]
+// CHECK-SAME:     memref<?x?x8x1xf32> to memref<?x?x8xf32, strided<[?, 8, 1], offset: ?>>
+// CHECK:        %[[VEC:.+]] = vector.transfer_read %[[VIEW]]
+// CHECK-SAME:     memref<?x?x8xf32, strided<[?, 8, 1], offset: ?>>, vector<8xf32>
+// CHECK:        %[[RESULT:.+]] = vector.shape_cast %[[VEC]]
+// CHECK:        return %[[RESULT]]
 
 // -----
 
@@ -43,7 +48,7 @@ func.func @contiguous_inner_most_dim(%A: memref<16x1xf32>, %i:index, %j:index) -
 }
 //      CHECK: func @contiguous_inner_most_dim(%[[SRC:.+]]: memref<16x1xf32>, %[[I:.+]]: index, %[[J:.+]]: index) -> vector<8x1xf32>
 //      CHECK:   %[[SRC_0:.+]] = memref.subview %[[SRC]]
-// CHECK-SAME:     memref<16x1xf32> to memref<16xf32>
+// CHECK-SAME:     memref<16x1xf32> to memref<16xf32, strided<[1]>>
 //      CHECK:   %[[V:.+]] = vector.transfer_read %[[SRC_0]]
 //      CHECK:   %[[RESULT]] = vector.shape_cast %[[V]] : vector<8xf32> to vector<8x1xf32>
 //      CHECK:   return %[[RESULT]]
@@ -111,7 +116,7 @@ func.func @drop_two_inner_most_dim_for_transfer_write(%arg0: memref<1x512x16x1x1
 // CHECK-SAME:   %[[IDX:[a-zA-Z0-9]+]]
 // CHECK-DAG:    %[[C0:.+]] = arith.constant 0 : index
 // CHECK:        %[[SUBVIEW:.+]] = memref.subview %[[DEST]]
-// CHECK-SAME:     memref<1x512x16x1x1xf32> to memref<1x512x16xf32>
+// CHECK-SAME:     memref<1x512x16x1x1xf32> to memref<1x512x16xf32, strided<[8192, 16, 1]>>
 // CHECK:        %[[CAST:.+]] = vector.shape_cast %[[VEC]] : vector<1x16x16x1x1xf32> to vector<1x16x16xf32>
 // CHECK:        vector.transfer_write %[[CAST]], %[[SUBVIEW]]
 // CHECK-SAME:     [%[[C0]], %[[IDX]], %[[C0]]]
-- 
cgit v1.2.3


From 45219702e77f6f834ea2dfe2a68b140e59f7e0e2 Mon Sep 17 00:00:00 2001
From: Arthur Eubanks <aeubanks@google.com>
Date: Tue, 12 Mar 2024 19:07:37 +0000
Subject: [test][X86] Precommit test for large data threshold and i1 global

---
 llvm/test/CodeGen/X86/code-model-elf.ll | 181 +++++++++++++++++++++-----------
 1 file changed, 121 insertions(+), 60 deletions(-)

diff --git a/llvm/test/CodeGen/X86/code-model-elf.ll b/llvm/test/CodeGen/X86/code-model-elf.ll
index 71a6a310906e..4e96d39d153f 100644
--- a/llvm/test/CodeGen/X86/code-model-elf.ll
+++ b/llvm/test/CodeGen/X86/code-model-elf.ll
@@ -54,6 +54,7 @@ target triple = "x86_64--linux"
 @extern_data = external global [10 x i32], align 16
 @thread_data = external thread_local global i32, align 4
 @unknown_size_data = dso_local global [0 x i32] zeroinitializer, align 16
+@bool = dso_local global i1 false
 @opaque = external dso_local global %t
 @forced_small_data = dso_local global [10 x i32] zeroinitializer, code_model "small", align 16
 @forced_large_data = dso_local global [10 x i32] zeroinitializer, code_model "large", align 16
@@ -746,6 +747,66 @@ define dso_local i32 @load_unknown_size_data() #0 {
   ret i32 %rv
 }
 
+define dso_local i1 @load_bool() #0 {
+; SMALL-STATIC-LABEL: load_bool:
+; SMALL-STATIC:       # %bb.0:
+; SMALL-STATIC-NEXT:    movzbl bool(%rip), %eax
+; SMALL-STATIC-NEXT:    retq
+;
+; MEDIUM-STATIC-LABEL: load_bool:
+; MEDIUM-STATIC:       # %bb.0:
+; MEDIUM-STATIC-NEXT:    movabsq $bool, %rax
+; MEDIUM-STATIC-NEXT:    movzbl (%rax), %eax
+; MEDIUM-STATIC-NEXT:    retq
+;
+; LARGE-STATIC-LABEL: load_bool:
+; LARGE-STATIC:       # %bb.0:
+; LARGE-STATIC-NEXT:    movabsq $bool, %rax
+; LARGE-STATIC-NEXT:    movzbl (%rax), %eax
+; LARGE-STATIC-NEXT:    retq
+;
+; SMALL-PIC-LABEL: load_bool:
+; SMALL-PIC:       # %bb.0:
+; SMALL-PIC-NEXT:    movzbl bool(%rip), %eax
+; SMALL-PIC-NEXT:    retq
+;
+; MEDIUM-SMALL-DATA-PIC-LABEL: load_bool:
+; MEDIUM-SMALL-DATA-PIC:       # %bb.0:
+; MEDIUM-SMALL-DATA-PIC-NEXT:    leaq _GLOBAL_OFFSET_TABLE_(%rip), %rax
+; MEDIUM-SMALL-DATA-PIC-NEXT:    movabsq $bool@GOTOFF, %rcx
+; MEDIUM-SMALL-DATA-PIC-NEXT:    movzbl (%rax,%rcx), %eax
+; MEDIUM-SMALL-DATA-PIC-NEXT:    retq
+;
+; MEDIUM-PIC-LABEL: load_bool:
+; MEDIUM-PIC:       # %bb.0:
+; MEDIUM-PIC-NEXT:    leaq _GLOBAL_OFFSET_TABLE_(%rip), %rax
+; MEDIUM-PIC-NEXT:    movabsq $bool@GOTOFF, %rcx
+; MEDIUM-PIC-NEXT:    movzbl (%rax,%rcx), %eax
+; MEDIUM-PIC-NEXT:    retq
+;
+; LARGE-PIC-LABEL: load_bool:
+; LARGE-PIC:       # %bb.0:
+; LARGE-PIC-NEXT:  .L12$pb:
+; LARGE-PIC-NEXT:    leaq .L12$pb(%rip), %rax
+; LARGE-PIC-NEXT:    movabsq $_GLOBAL_OFFSET_TABLE_-.L12$pb, %rcx
+; LARGE-PIC-NEXT:    addq %rax, %rcx
+; LARGE-PIC-NEXT:    movabsq $bool@GOTOFF, %rax
+; LARGE-PIC-NEXT:    movzbl (%rcx,%rax), %eax
+; LARGE-PIC-NEXT:    retq
+;
+; LARGE-SMALL-DATA-PIC-LABEL: load_bool:
+; LARGE-SMALL-DATA-PIC:       # %bb.0:
+; LARGE-SMALL-DATA-PIC-NEXT:  .L12$pb:
+; LARGE-SMALL-DATA-PIC-NEXT:    leaq .L12$pb(%rip), %rax
+; LARGE-SMALL-DATA-PIC-NEXT:    movabsq $_GLOBAL_OFFSET_TABLE_-.L12$pb, %rcx
+; LARGE-SMALL-DATA-PIC-NEXT:    addq %rax, %rcx
+; LARGE-SMALL-DATA-PIC-NEXT:    movabsq $bool@GOTOFF, %rax
+; LARGE-SMALL-DATA-PIC-NEXT:    movzbl (%rcx,%rax), %eax
+; LARGE-SMALL-DATA-PIC-NEXT:    retq
+  %rv = load i1, ptr @bool
+  ret i1 %rv
+}
+
 define dso_local ptr @lea_opaque() #0 {
 ; SMALL-STATIC-LABEL: lea_opaque:
 ; SMALL-STATIC:       # %bb.0:
@@ -783,9 +844,9 @@ define dso_local ptr @lea_opaque() #0 {
 ;
 ; LARGE-PIC-LABEL: lea_opaque:
 ; LARGE-PIC:       # %bb.0:
-; LARGE-PIC-NEXT:  .L12$pb:
-; LARGE-PIC-NEXT:    leaq .L12$pb(%rip), %rax
-; LARGE-PIC-NEXT:    movabsq $_GLOBAL_OFFSET_TABLE_-.L12$pb, %rcx
+; LARGE-PIC-NEXT:  .L13$pb:
+; LARGE-PIC-NEXT:    leaq .L13$pb(%rip), %rax
+; LARGE-PIC-NEXT:    movabsq $_GLOBAL_OFFSET_TABLE_-.L13$pb, %rcx
 ; LARGE-PIC-NEXT:    addq %rax, %rcx
 ; LARGE-PIC-NEXT:    movabsq $opaque@GOTOFF, %rax
 ; LARGE-PIC-NEXT:    addq %rcx, %rax
@@ -793,9 +854,9 @@ define dso_local ptr @lea_opaque() #0 {
 ;
 ; LARGE-SMALL-DATA-PIC-LABEL: lea_opaque:
 ; LARGE-SMALL-DATA-PIC:       # %bb.0:
-; LARGE-SMALL-DATA-PIC-NEXT:  .L12$pb:
-; LARGE-SMALL-DATA-PIC-NEXT:    leaq .L12$pb(%rip), %rax
-; LARGE-SMALL-DATA-PIC-NEXT:    movabsq $_GLOBAL_OFFSET_TABLE_-.L12$pb, %rcx
+; LARGE-SMALL-DATA-PIC-NEXT:  .L13$pb:
+; LARGE-SMALL-DATA-PIC-NEXT:    leaq .L13$pb(%rip), %rax
+; LARGE-SMALL-DATA-PIC-NEXT:    movabsq $_GLOBAL_OFFSET_TABLE_-.L13$pb, %rcx
 ; LARGE-SMALL-DATA-PIC-NEXT:    addq %rax, %rcx
 ; LARGE-SMALL-DATA-PIC-NEXT:    movabsq $opaque@GOTOFF, %rax
 ; LARGE-SMALL-DATA-PIC-NEXT:    addq %rcx, %rax
@@ -840,9 +901,9 @@ define dso_local ptr @lea_ehdr_start() #0 {
 ;
 ; LARGE-PIC-LABEL: lea_ehdr_start:
 ; LARGE-PIC:       # %bb.0:
-; LARGE-PIC-NEXT:  .L13$pb:
-; LARGE-PIC-NEXT:    leaq .L13$pb(%rip), %rax
-; LARGE-PIC-NEXT:    movabsq $_GLOBAL_OFFSET_TABLE_-.L13$pb, %rcx
+; LARGE-PIC-NEXT:  .L14$pb:
+; LARGE-PIC-NEXT:    leaq .L14$pb(%rip), %rax
+; LARGE-PIC-NEXT:    movabsq $_GLOBAL_OFFSET_TABLE_-.L14$pb, %rcx
 ; LARGE-PIC-NEXT:    addq %rax, %rcx
 ; LARGE-PIC-NEXT:    movabsq $__ehdr_start@GOTOFF, %rax
 ; LARGE-PIC-NEXT:    addq %rcx, %rax
@@ -850,9 +911,9 @@ define dso_local ptr @lea_ehdr_start() #0 {
 ;
 ; LARGE-SMALL-DATA-PIC-LABEL: lea_ehdr_start:
 ; LARGE-SMALL-DATA-PIC:       # %bb.0:
-; LARGE-SMALL-DATA-PIC-NEXT:  .L13$pb:
-; LARGE-SMALL-DATA-PIC-NEXT:    leaq .L13$pb(%rip), %rax
-; LARGE-SMALL-DATA-PIC-NEXT:    movabsq $_GLOBAL_OFFSET_TABLE_-.L13$pb, %rcx
+; LARGE-SMALL-DATA-PIC-NEXT:  .L14$pb:
+; LARGE-SMALL-DATA-PIC-NEXT:    leaq .L14$pb(%rip), %rax
+; LARGE-SMALL-DATA-PIC-NEXT:    movabsq $_GLOBAL_OFFSET_TABLE_-.L14$pb, %rcx
 ; LARGE-SMALL-DATA-PIC-NEXT:    addq %rax, %rcx
 ; LARGE-SMALL-DATA-PIC-NEXT:    movabsq $__ehdr_start@GOTOFF, %rax
 ; LARGE-SMALL-DATA-PIC-NEXT:    addq %rcx, %rax
@@ -897,9 +958,9 @@ define dso_local ptr @lea_start_foo() #0 {
 ;
 ; LARGE-PIC-LABEL: lea_start_foo:
 ; LARGE-PIC:       # %bb.0:
-; LARGE-PIC-NEXT:  .L14$pb:
-; LARGE-PIC-NEXT:    leaq .L14$pb(%rip), %rax
-; LARGE-PIC-NEXT:    movabsq $_GLOBAL_OFFSET_TABLE_-.L14$pb, %rcx
+; LARGE-PIC-NEXT:  .L15$pb:
+; LARGE-PIC-NEXT:    leaq .L15$pb(%rip), %rax
+; LARGE-PIC-NEXT:    movabsq $_GLOBAL_OFFSET_TABLE_-.L15$pb, %rcx
 ; LARGE-PIC-NEXT:    addq %rax, %rcx
 ; LARGE-PIC-NEXT:    movabsq $__start_foo@GOTOFF, %rax
 ; LARGE-PIC-NEXT:    addq %rcx, %rax
@@ -907,9 +968,9 @@ define dso_local ptr @lea_start_foo() #0 {
 ;
 ; LARGE-SMALL-DATA-PIC-LABEL: lea_start_foo:
 ; LARGE-SMALL-DATA-PIC:       # %bb.0:
-; LARGE-SMALL-DATA-PIC-NEXT:  .L14$pb:
-; LARGE-SMALL-DATA-PIC-NEXT:    leaq .L14$pb(%rip), %rax
-; LARGE-SMALL-DATA-PIC-NEXT:    movabsq $_GLOBAL_OFFSET_TABLE_-.L14$pb, %rcx
+; LARGE-SMALL-DATA-PIC-NEXT:  .L15$pb:
+; LARGE-SMALL-DATA-PIC-NEXT:    leaq .L15$pb(%rip), %rax
+; LARGE-SMALL-DATA-PIC-NEXT:    movabsq $_GLOBAL_OFFSET_TABLE_-.L15$pb, %rcx
 ; LARGE-SMALL-DATA-PIC-NEXT:    addq %rax, %rcx
 ; LARGE-SMALL-DATA-PIC-NEXT:    movabsq $__start_foo@GOTOFF, %rax
 ; LARGE-SMALL-DATA-PIC-NEXT:    addq %rcx, %rax
@@ -954,9 +1015,9 @@ define dso_local ptr @lea_stop_foo() #0 {
 ;
 ; LARGE-PIC-LABEL: lea_stop_foo:
 ; LARGE-PIC:       # %bb.0:
-; LARGE-PIC-NEXT:  .L15$pb:
-; LARGE-PIC-NEXT:    leaq .L15$pb(%rip), %rax
-; LARGE-PIC-NEXT:    movabsq $_GLOBAL_OFFSET_TABLE_-.L15$pb, %rcx
+; LARGE-PIC-NEXT:  .L16$pb:
+; LARGE-PIC-NEXT:    leaq .L16$pb(%rip), %rax
+; LARGE-PIC-NEXT:    movabsq $_GLOBAL_OFFSET_TABLE_-.L16$pb, %rcx
 ; LARGE-PIC-NEXT:    addq %rax, %rcx
 ; LARGE-PIC-NEXT:    movabsq $__stop_foo@GOTOFF, %rax
 ; LARGE-PIC-NEXT:    addq %rcx, %rax
@@ -964,9 +1025,9 @@ define dso_local ptr @lea_stop_foo() #0 {
 ;
 ; LARGE-SMALL-DATA-PIC-LABEL: lea_stop_foo:
 ; LARGE-SMALL-DATA-PIC:       # %bb.0:
-; LARGE-SMALL-DATA-PIC-NEXT:  .L15$pb:
-; LARGE-SMALL-DATA-PIC-NEXT:    leaq .L15$pb(%rip), %rax
-; LARGE-SMALL-DATA-PIC-NEXT:    movabsq $_GLOBAL_OFFSET_TABLE_-.L15$pb, %rcx
+; LARGE-SMALL-DATA-PIC-NEXT:  .L16$pb:
+; LARGE-SMALL-DATA-PIC-NEXT:    leaq .L16$pb(%rip), %rax
+; LARGE-SMALL-DATA-PIC-NEXT:    movabsq $_GLOBAL_OFFSET_TABLE_-.L16$pb, %rcx
 ; LARGE-SMALL-DATA-PIC-NEXT:    addq %rax, %rcx
 ; LARGE-SMALL-DATA-PIC-NEXT:    movabsq $__stop_foo@GOTOFF, %rax
 ; LARGE-SMALL-DATA-PIC-NEXT:    addq %rcx, %rax
@@ -1035,9 +1096,9 @@ define dso_local ptr @lea_static_fn() #0 {
 ;
 ; LARGE-PIC-LABEL: lea_static_fn:
 ; LARGE-PIC:       # %bb.0:
-; LARGE-PIC-NEXT:  .L19$pb:
-; LARGE-PIC-NEXT:    leaq .L19$pb(%rip), %rax
-; LARGE-PIC-NEXT:    movabsq $_GLOBAL_OFFSET_TABLE_-.L19$pb, %rcx
+; LARGE-PIC-NEXT:  .L20$pb:
+; LARGE-PIC-NEXT:    leaq .L20$pb(%rip), %rax
+; LARGE-PIC-NEXT:    movabsq $_GLOBAL_OFFSET_TABLE_-.L20$pb, %rcx
 ; LARGE-PIC-NEXT:    addq %rax, %rcx
 ; LARGE-PIC-NEXT:    movabsq $static_fn@GOTOFF, %rax
 ; LARGE-PIC-NEXT:    addq %rcx, %rax
@@ -1045,9 +1106,9 @@ define dso_local ptr @lea_static_fn() #0 {
 ;
 ; LARGE-SMALL-DATA-PIC-LABEL: lea_static_fn:
 ; LARGE-SMALL-DATA-PIC:       # %bb.0:
-; LARGE-SMALL-DATA-PIC-NEXT:  .L19$pb:
-; LARGE-SMALL-DATA-PIC-NEXT:    leaq .L19$pb(%rip), %rax
-; LARGE-SMALL-DATA-PIC-NEXT:    movabsq $_GLOBAL_OFFSET_TABLE_-.L19$pb, %rcx
+; LARGE-SMALL-DATA-PIC-NEXT:  .L20$pb:
+; LARGE-SMALL-DATA-PIC-NEXT:    leaq .L20$pb(%rip), %rax
+; LARGE-SMALL-DATA-PIC-NEXT:    movabsq $_GLOBAL_OFFSET_TABLE_-.L20$pb, %rcx
 ; LARGE-SMALL-DATA-PIC-NEXT:    addq %rax, %rcx
 ; LARGE-SMALL-DATA-PIC-NEXT:    movabsq $static_fn@GOTOFF, %rax
 ; LARGE-SMALL-DATA-PIC-NEXT:    addq %rcx, %rax
@@ -1088,9 +1149,9 @@ define dso_local ptr @lea_global_fn() #0 {
 ;
 ; LARGE-PIC-LABEL: lea_global_fn:
 ; LARGE-PIC:       # %bb.0:
-; LARGE-PIC-NEXT:  .L20$pb:
-; LARGE-PIC-NEXT:    leaq .L20$pb(%rip), %rax
-; LARGE-PIC-NEXT:    movabsq $_GLOBAL_OFFSET_TABLE_-.L20$pb, %rcx
+; LARGE-PIC-NEXT:  .L21$pb:
+; LARGE-PIC-NEXT:    leaq .L21$pb(%rip), %rax
+; LARGE-PIC-NEXT:    movabsq $_GLOBAL_OFFSET_TABLE_-.L21$pb, %rcx
 ; LARGE-PIC-NEXT:    addq %rax, %rcx
 ; LARGE-PIC-NEXT:    movabsq $global_fn@GOTOFF, %rax
 ; LARGE-PIC-NEXT:    addq %rcx, %rax
@@ -1098,9 +1159,9 @@ define dso_local ptr @lea_global_fn() #0 {
 ;
 ; LARGE-SMALL-DATA-PIC-LABEL: lea_global_fn:
 ; LARGE-SMALL-DATA-PIC:       # %bb.0:
-; LARGE-SMALL-DATA-PIC-NEXT:  .L20$pb:
-; LARGE-SMALL-DATA-PIC-NEXT:    leaq .L20$pb(%rip), %rax
-; LARGE-SMALL-DATA-PIC-NEXT:    movabsq $_GLOBAL_OFFSET_TABLE_-.L20$pb, %rcx
+; LARGE-SMALL-DATA-PIC-NEXT:  .L21$pb:
+; LARGE-SMALL-DATA-PIC-NEXT:    leaq .L21$pb(%rip), %rax
+; LARGE-SMALL-DATA-PIC-NEXT:    movabsq $_GLOBAL_OFFSET_TABLE_-.L21$pb, %rcx
 ; LARGE-SMALL-DATA-PIC-NEXT:    addq %rax, %rcx
 ; LARGE-SMALL-DATA-PIC-NEXT:    movabsq $global_fn@GOTOFF, %rax
 ; LARGE-SMALL-DATA-PIC-NEXT:    addq %rcx, %rax
@@ -1141,9 +1202,9 @@ define dso_local ptr @lea_extern_fn() #0 {
 ;
 ; LARGE-PIC-LABEL: lea_extern_fn:
 ; LARGE-PIC:       # %bb.0:
-; LARGE-PIC-NEXT:  .L21$pb:
-; LARGE-PIC-NEXT:    leaq .L21$pb(%rip), %rax
-; LARGE-PIC-NEXT:    movabsq $_GLOBAL_OFFSET_TABLE_-.L21$pb, %rcx
+; LARGE-PIC-NEXT:  .L22$pb:
+; LARGE-PIC-NEXT:    leaq .L22$pb(%rip), %rax
+; LARGE-PIC-NEXT:    movabsq $_GLOBAL_OFFSET_TABLE_-.L22$pb, %rcx
 ; LARGE-PIC-NEXT:    addq %rax, %rcx
 ; LARGE-PIC-NEXT:    movabsq $extern_fn@GOT, %rax
 ; LARGE-PIC-NEXT:    movq (%rcx,%rax), %rax
@@ -1151,9 +1212,9 @@ define dso_local ptr @lea_extern_fn() #0 {
 ;
 ; LARGE-SMALL-DATA-PIC-LABEL: lea_extern_fn:
 ; LARGE-SMALL-DATA-PIC:       # %bb.0:
-; LARGE-SMALL-DATA-PIC-NEXT:  .L21$pb:
-; LARGE-SMALL-DATA-PIC-NEXT:    leaq .L21$pb(%rip), %rax
-; LARGE-SMALL-DATA-PIC-NEXT:    movabsq $_GLOBAL_OFFSET_TABLE_-.L21$pb, %rcx
+; LARGE-SMALL-DATA-PIC-NEXT:  .L22$pb:
+; LARGE-SMALL-DATA-PIC-NEXT:    leaq .L22$pb(%rip), %rax
+; LARGE-SMALL-DATA-PIC-NEXT:    movabsq $_GLOBAL_OFFSET_TABLE_-.L22$pb, %rcx
 ; LARGE-SMALL-DATA-PIC-NEXT:    addq %rax, %rcx
 ; LARGE-SMALL-DATA-PIC-NEXT:    movabsq $extern_fn@GOT, %rax
 ; LARGE-SMALL-DATA-PIC-NEXT:    movq (%rcx,%rax), %rax
@@ -1194,9 +1255,9 @@ define dso_local ptr @lea_ifunc() #0 {
 ;
 ; LARGE-PIC-LABEL: lea_ifunc:
 ; LARGE-PIC:       # %bb.0:
-; LARGE-PIC-NEXT:  .L22$pb:
-; LARGE-PIC-NEXT:    leaq .L22$pb(%rip), %rax
-; LARGE-PIC-NEXT:    movabsq $_GLOBAL_OFFSET_TABLE_-.L22$pb, %rcx
+; LARGE-PIC-NEXT:  .L23$pb:
+; LARGE-PIC-NEXT:    leaq .L23$pb(%rip), %rax
+; LARGE-PIC-NEXT:    movabsq $_GLOBAL_OFFSET_TABLE_-.L23$pb, %rcx
 ; LARGE-PIC-NEXT:    addq %rax, %rcx
 ; LARGE-PIC-NEXT:    movabsq $ifunc_func@GOT, %rax
 ; LARGE-PIC-NEXT:    movq (%rcx,%rax), %rax
@@ -1204,9 +1265,9 @@ define dso_local ptr @lea_ifunc() #0 {
 ;
 ; LARGE-SMALL-DATA-PIC-LABEL: lea_ifunc:
 ; LARGE-SMALL-DATA-PIC:       # %bb.0:
-; LARGE-SMALL-DATA-PIC-NEXT:  .L22$pb:
-; LARGE-SMALL-DATA-PIC-NEXT:    leaq .L22$pb(%rip), %rax
-; LARGE-SMALL-DATA-PIC-NEXT:    movabsq $_GLOBAL_OFFSET_TABLE_-.L22$pb, %rcx
+; LARGE-SMALL-DATA-PIC-NEXT:  .L23$pb:
+; LARGE-SMALL-DATA-PIC-NEXT:    leaq .L23$pb(%rip), %rax
+; LARGE-SMALL-DATA-PIC-NEXT:    movabsq $_GLOBAL_OFFSET_TABLE_-.L23$pb, %rcx
 ; LARGE-SMALL-DATA-PIC-NEXT:    addq %rax, %rcx
 ; LARGE-SMALL-DATA-PIC-NEXT:    movabsq $ifunc_func@GOT, %rax
 ; LARGE-SMALL-DATA-PIC-NEXT:    movq (%rcx,%rax), %rax
@@ -1247,9 +1308,9 @@ define dso_local ptr @lea_dso_local_ifunc() #0 {
 ;
 ; LARGE-PIC-LABEL: lea_dso_local_ifunc:
 ; LARGE-PIC:       # %bb.0:
-; LARGE-PIC-NEXT:  .L23$pb:
-; LARGE-PIC-NEXT:    leaq .L23$pb(%rip), %rax
-; LARGE-PIC-NEXT:    movabsq $_GLOBAL_OFFSET_TABLE_-.L23$pb, %rcx
+; LARGE-PIC-NEXT:  .L24$pb:
+; LARGE-PIC-NEXT:    leaq .L24$pb(%rip), %rax
+; LARGE-PIC-NEXT:    movabsq $_GLOBAL_OFFSET_TABLE_-.L24$pb, %rcx
 ; LARGE-PIC-NEXT:    addq %rax, %rcx
 ; LARGE-PIC-NEXT:    movabsq $dso_local_ifunc_func@GOTOFF, %rax
 ; LARGE-PIC-NEXT:    addq %rcx, %rax
@@ -1257,9 +1318,9 @@ define dso_local ptr @lea_dso_local_ifunc() #0 {
 ;
 ; LARGE-SMALL-DATA-PIC-LABEL: lea_dso_local_ifunc:
 ; LARGE-SMALL-DATA-PIC:       # %bb.0:
-; LARGE-SMALL-DATA-PIC-NEXT:  .L23$pb:
-; LARGE-SMALL-DATA-PIC-NEXT:    leaq .L23$pb(%rip), %rax
-; LARGE-SMALL-DATA-PIC-NEXT:    movabsq $_GLOBAL_OFFSET_TABLE_-.L23$pb, %rcx
+; LARGE-SMALL-DATA-PIC-NEXT:  .L24$pb:
+; LARGE-SMALL-DATA-PIC-NEXT:    leaq .L24$pb(%rip), %rax
+; LARGE-SMALL-DATA-PIC-NEXT:    movabsq $_GLOBAL_OFFSET_TABLE_-.L24$pb, %rcx
 ; LARGE-SMALL-DATA-PIC-NEXT:    addq %rax, %rcx
 ; LARGE-SMALL-DATA-PIC-NEXT:    movabsq $dso_local_ifunc_func@GOTOFF, %rax
 ; LARGE-SMALL-DATA-PIC-NEXT:    addq %rcx, %rax
@@ -1334,9 +1395,9 @@ define dso_local float @load_constant_pool(float %x) #0 {
 ;
 ; LARGE-PIC-LABEL: load_constant_pool:
 ; LARGE-PIC:       # %bb.0:
-; LARGE-PIC-NEXT:  .L25$pb:
-; LARGE-PIC-NEXT:    leaq .L25$pb(%rip), %rax
-; LARGE-PIC-NEXT:    movabsq $_GLOBAL_OFFSET_TABLE_-.L25$pb, %rcx
+; LARGE-PIC-NEXT:  .L26$pb:
+; LARGE-PIC-NEXT:    leaq .L26$pb(%rip), %rax
+; LARGE-PIC-NEXT:    movabsq $_GLOBAL_OFFSET_TABLE_-.L26$pb, %rcx
 ; LARGE-PIC-NEXT:    addq %rax, %rcx
 ; LARGE-PIC-NEXT:    movabsq ${{\.?LCPI[0-9]+_[0-9]+}}@GOTOFF, %rax
 ; LARGE-PIC-NEXT:    addss (%rcx,%rax), %xmm0
@@ -1344,9 +1405,9 @@ define dso_local float @load_constant_pool(float %x) #0 {
 ;
 ; LARGE-SMALL-DATA-PIC-LABEL: load_constant_pool:
 ; LARGE-SMALL-DATA-PIC:       # %bb.0:
-; LARGE-SMALL-DATA-PIC-NEXT:  .L25$pb:
-; LARGE-SMALL-DATA-PIC-NEXT:    leaq .L25$pb(%rip), %rax
-; LARGE-SMALL-DATA-PIC-NEXT:    movabsq $_GLOBAL_OFFSET_TABLE_-.L25$pb, %rcx
+; LARGE-SMALL-DATA-PIC-NEXT:  .L26$pb:
+; LARGE-SMALL-DATA-PIC-NEXT:    leaq .L26$pb(%rip), %rax
+; LARGE-SMALL-DATA-PIC-NEXT:    movabsq $_GLOBAL_OFFSET_TABLE_-.L26$pb, %rcx
 ; LARGE-SMALL-DATA-PIC-NEXT:    addq %rax, %rcx
 ; LARGE-SMALL-DATA-PIC-NEXT:    movabsq ${{\.?LCPI[0-9]+_[0-9]+}}@GOTOFF, %rax
 ; LARGE-SMALL-DATA-PIC-NEXT:    addss (%rcx,%rax), %xmm0
-- 
cgit v1.2.3


From a38b7a432d3cbb093af9310eba5b4982dc0a0243 Mon Sep 17 00:00:00 2001
From: Cyndy Ishida <cyndy_ishida@apple.com>
Date: Tue, 12 Mar 2024 12:37:17 -0700
Subject: [InstallAPI] Break up headers and add common header for TextAPI types
 (#84960)

Before it gets too unwieldy, add a common header for all MachO types
that are used across InstallAPI. Also, break up the types in
`InstallAPI/Frontend`. This both avoids circular dependencies and is
logically easier to maintain as more functionality gets added.
---
 clang/include/clang/InstallAPI/Context.h         |   6 +-
 clang/include/clang/InstallAPI/Frontend.h        |  94 --------------------
 clang/include/clang/InstallAPI/FrontendRecords.h | 108 +++++++++++++++++++++++
 clang/include/clang/InstallAPI/MachO.h           |  40 +++++++++
 clang/lib/InstallAPI/Frontend.cpp                |   1 +
 clang/lib/InstallAPI/Visitor.cpp                 |   2 +-
 clang/tools/clang-installapi/ClangInstallAPI.cpp |   6 +-
 clang/tools/clang-installapi/Options.h           |  12 +--
 8 files changed, 159 insertions(+), 110 deletions(-)
 create mode 100644 clang/include/clang/InstallAPI/FrontendRecords.h
 create mode 100644 clang/include/clang/InstallAPI/MachO.h

diff --git a/clang/include/clang/InstallAPI/Context.h b/clang/include/clang/InstallAPI/Context.h
index 4e9e90e5d2db..bdb576d7d85f 100644
--- a/clang/include/clang/InstallAPI/Context.h
+++ b/clang/include/clang/InstallAPI/Context.h
@@ -12,8 +12,8 @@
 #include "clang/Basic/Diagnostic.h"
 #include "clang/Basic/FileManager.h"
 #include "clang/InstallAPI/HeaderFile.h"
+#include "clang/InstallAPI/MachO.h"
 #include "llvm/ADT/DenseMap.h"
-#include "llvm/TextAPI/InterfaceFile.h"
 
 namespace clang {
 namespace installapi {
@@ -25,7 +25,7 @@ class FrontendRecordsSlice;
 struct InstallAPIContext {
 
   /// Library attributes that are typically passed as linker inputs.
-  llvm::MachO::RecordsSlice::BinaryAttrs BA;
+  BinaryAttrs BA;
 
   /// All headers that represent a library.
   HeaderSeq InputHeaders;
@@ -49,7 +49,7 @@ struct InstallAPIContext {
   llvm::StringRef OutputLoc{};
 
   /// What encoding to write output as.
-  llvm::MachO::FileType FT = llvm::MachO::FileType::TBD_V5;
+  FileType FT = FileType::TBD_V5;
 
   /// Populate entries of headers that should be included for TextAPI
   /// generation.
diff --git a/clang/include/clang/InstallAPI/Frontend.h b/clang/include/clang/InstallAPI/Frontend.h
index cbc2b159ebd1..873cb50d60a5 100644
--- a/clang/include/clang/InstallAPI/Frontend.h
+++ b/clang/include/clang/InstallAPI/Frontend.h
@@ -25,100 +25,6 @@
 namespace clang {
 namespace installapi {
 
-using SymbolFlags = llvm::MachO::SymbolFlags;
-using RecordLinkage = llvm::MachO::RecordLinkage;
-using GlobalRecord = llvm::MachO::GlobalRecord;
-using ObjCContainerRecord = llvm::MachO::ObjCContainerRecord;
-using ObjCInterfaceRecord = llvm::MachO::ObjCInterfaceRecord;
-using ObjCCategoryRecord = llvm::MachO::ObjCCategoryRecord;
-using ObjCIVarRecord = llvm::MachO::ObjCIVarRecord;
-
-// Represents a collection of frontend records for a library that are tied to a
-// darwin target triple.
-class FrontendRecordsSlice : public llvm::MachO::RecordsSlice {
-public:
-  FrontendRecordsSlice(const llvm::Triple &T)
-      : llvm::MachO::RecordsSlice({T}) {}
-
-  /// Add non-ObjC global record with attributes from AST.
-  ///
-  /// \param Name The name of symbol.
-  /// \param Linkage The linkage of symbol.
-  /// \param GV The kind of global.
-  /// \param Avail The availability information tied to the active target
-  /// triple.
-  /// \param D The pointer to the declaration from traversing AST.
-  /// \param Access The intended access level of symbol.
-  /// \param Flags The flags that describe attributes of the symbol.
-  /// \param Inlined Whether declaration is inlined, only applicable to
-  /// functions.
-  /// \return The non-owning pointer to added record in slice.
-  GlobalRecord *addGlobal(StringRef Name, RecordLinkage Linkage,
-                          GlobalRecord::Kind GV,
-                          const clang::AvailabilityInfo Avail, const Decl *D,
-                          const HeaderType Access,
-                          SymbolFlags Flags = SymbolFlags::None,
-                          bool Inlined = false);
-
-  /// Add ObjC Class record with attributes from AST.
-  ///
-  /// \param Name The name of class, not symbol.
-  /// \param Linkage The linkage of symbol.
-  /// \param Avail The availability information tied to the active target
-  /// triple.
-  /// \param D The pointer to the declaration from traversing AST.
-  /// \param Access The intended access level of symbol.
-  /// \param IsEHType Whether declaration has an exception attribute.
-  /// \return The non-owning pointer to added record in slice.
-  ObjCInterfaceRecord *addObjCInterface(StringRef Name, RecordLinkage Linkage,
-                                        const clang::AvailabilityInfo Avail,
-                                        const Decl *D, HeaderType Access,
-                                        bool IsEHType);
-
-  /// Add ObjC Category record with attributes from AST.
-  ///
-  /// \param ClassToExtend The name of class that is extended by category, not
-  /// symbol.
-  /// \param CategoryName The name of category, not symbol.
-  /// \param Avail The availability information tied
-  /// to the active target triple.
-  /// \param D The pointer to the declaration from traversing AST.
-  /// \param Access The intended access level of symbol.
-  /// \return The non-owning pointer to added record in slice.
-  ObjCCategoryRecord *addObjCCategory(StringRef ClassToExtend,
-                                      StringRef CategoryName,
-                                      const clang::AvailabilityInfo Avail,
-                                      const Decl *D, HeaderType Access);
-
-  /// Add ObjC IVar record with attributes from AST.
-  ///
-  /// \param Container The owning pointer for instance variable.
-  /// \param Name The name of ivar, not symbol.
-  /// \param Linkage The linkage of symbol.
-  /// \param Avail The availability information tied to the active target
-  /// triple.
-  /// \param D The pointer to the declaration from traversing AST.
-  /// \param Access The intended access level of symbol.
-  /// \param AC The access control tied to the ivar declaration.
-  /// \return The non-owning pointer to added record in slice.
-  ObjCIVarRecord *addObjCIVar(ObjCContainerRecord *Container,
-                              StringRef IvarName, RecordLinkage Linkage,
-                              const clang::AvailabilityInfo Avail,
-                              const Decl *D, HeaderType Access,
-                              const clang::ObjCIvarDecl::AccessControl AC);
-
-private:
-  /// Frontend information captured about records.
-  struct FrontendAttrs {
-    const AvailabilityInfo Avail;
-    const Decl *D;
-    const HeaderType Access;
-  };
-
-  /// Mapping of records stored in slice to their frontend attributes.
-  llvm::DenseMap<llvm::MachO::Record *, FrontendAttrs> FrontendRecords;
-};
-
 /// Create a buffer that contains all headers to scan
 /// for global symbols with.
 std::unique_ptr<llvm::MemoryBuffer> createInputBuffer(InstallAPIContext &Ctx);
diff --git a/clang/include/clang/InstallAPI/FrontendRecords.h b/clang/include/clang/InstallAPI/FrontendRecords.h
new file mode 100644
index 000000000000..333015b6a113
--- /dev/null
+++ b/clang/include/clang/InstallAPI/FrontendRecords.h
@@ -0,0 +1,108 @@
+//===- InstallAPI/FrontendRecords.h ------------------------------*- C++-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_INSTALLAPI_FRONTENDRECORDS_H
+#define LLVM_CLANG_INSTALLAPI_FRONTENDRECORDS_H
+
+#include "clang/AST/Availability.h"
+#include "clang/AST/DeclObjC.h"
+#include "clang/InstallAPI/MachO.h"
+
+namespace clang {
+namespace installapi {
+
+/// Frontend information captured about records.
+struct FrontendAttrs {
+  const AvailabilityInfo Avail;
+  const Decl *D;
+  const HeaderType Access;
+};
+
+// Represents a collection of frontend records for a library that are tied to a
+// darwin target triple.
+class FrontendRecordsSlice : public llvm::MachO::RecordsSlice {
+public:
+  FrontendRecordsSlice(const llvm::Triple &T)
+      : llvm::MachO::RecordsSlice({T}) {}
+
+  /// Add non-ObjC global record with attributes from AST.
+  ///
+  /// \param Name The name of symbol.
+  /// \param Linkage The linkage of symbol.
+  /// \param GV The kind of global.
+  /// \param Avail The availability information tied to the active target
+  /// triple.
+  /// \param D The pointer to the declaration from traversing AST.
+  /// \param Access The intended access level of symbol.
+  /// \param Flags The flags that describe attributes of the symbol.
+  /// \param Inlined Whether declaration is inlined, only applicable to
+  /// functions.
+  /// \return The non-owning pointer to added record in slice.
+  GlobalRecord *addGlobal(StringRef Name, RecordLinkage Linkage,
+                          GlobalRecord::Kind GV,
+                          const clang::AvailabilityInfo Avail, const Decl *D,
+                          const HeaderType Access,
+                          SymbolFlags Flags = SymbolFlags::None,
+                          bool Inlined = false);
+
+  /// Add ObjC Class record with attributes from AST.
+  ///
+  /// \param Name The name of class, not symbol.
+  /// \param Linkage The linkage of symbol.
+  /// \param Avail The availability information tied to the active target
+  /// triple.
+  /// \param D The pointer to the declaration from traversing AST.
+  /// \param Access The intended access level of symbol.
+  /// \param IsEHType Whether declaration has an exception attribute.
+  /// \return The non-owning pointer to added record in slice.
+  ObjCInterfaceRecord *addObjCInterface(StringRef Name, RecordLinkage Linkage,
+                                        const clang::AvailabilityInfo Avail,
+                                        const Decl *D, HeaderType Access,
+                                        bool IsEHType);
+
+  /// Add ObjC Category record with attributes from AST.
+  ///
+  /// \param ClassToExtend The name of class that is extended by category, not
+  /// symbol.
+  /// \param CategoryName The name of category, not symbol.
+  /// \param Avail The availability information tied
+  /// to the active target triple.
+  /// \param D The pointer to the declaration from traversing AST.
+  /// \param Access The intended access level of symbol.
+  /// \return The non-owning pointer to added record in slice.
+  ObjCCategoryRecord *addObjCCategory(StringRef ClassToExtend,
+                                      StringRef CategoryName,
+                                      const clang::AvailabilityInfo Avail,
+                                      const Decl *D, HeaderType Access);
+
+  /// Add ObjC IVar record with attributes from AST.
+  ///
+  /// \param Container The owning pointer for instance variable.
+  /// \param Name The name of ivar, not symbol.
+  /// \param Linkage The linkage of symbol.
+  /// \param Avail The availability information tied to the active target
+  /// triple.
+  /// \param D The pointer to the declaration from traversing AST.
+  /// \param Access The intended access level of symbol.
+  /// \param AC The access control tied to the ivar declaration.
+  /// \return The non-owning pointer to added record in slice.
+  ObjCIVarRecord *addObjCIVar(ObjCContainerRecord *Container,
+                              StringRef IvarName, RecordLinkage Linkage,
+                              const clang::AvailabilityInfo Avail,
+                              const Decl *D, HeaderType Access,
+                              const clang::ObjCIvarDecl::AccessControl AC);
+
+private:
+  /// Mapping of records stored in slice to their frontend attributes.
+  llvm::DenseMap<Record *, FrontendAttrs> FrontendRecords;
+};
+
+} // namespace installapi
+} // namespace clang
+
+#endif // LLVM_CLANG_INSTALLAPI_FRONTENDRECORDS_H
diff --git a/clang/include/clang/InstallAPI/MachO.h b/clang/include/clang/InstallAPI/MachO.h
new file mode 100644
index 000000000000..55e5591389ce
--- /dev/null
+++ b/clang/include/clang/InstallAPI/MachO.h
@@ -0,0 +1,40 @@
+//===- InstallAPI/MachO.h ---------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Imports and forward declarations for llvm::MachO types.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_INSTALLAPI_MACHO_H
+#define LLVM_CLANG_INSTALLAPI_MACHO_H
+
+#include "llvm/TextAPI/Architecture.h"
+#include "llvm/TextAPI/InterfaceFile.h"
+#include "llvm/TextAPI/PackedVersion.h"
+#include "llvm/TextAPI/Platform.h"
+#include "llvm/TextAPI/RecordVisitor.h"
+#include "llvm/TextAPI/Target.h"
+#include "llvm/TextAPI/TextAPIWriter.h"
+#include "llvm/TextAPI/Utils.h"
+
+using SymbolFlags = llvm::MachO::SymbolFlags;
+using RecordLinkage = llvm::MachO::RecordLinkage;
+using Record = llvm::MachO::Record;
+using GlobalRecord = llvm::MachO::GlobalRecord;
+using ObjCContainerRecord = llvm::MachO::ObjCContainerRecord;
+using ObjCInterfaceRecord = llvm::MachO::ObjCInterfaceRecord;
+using ObjCCategoryRecord = llvm::MachO::ObjCCategoryRecord;
+using ObjCIVarRecord = llvm::MachO::ObjCIVarRecord;
+using Records = llvm::MachO::Records;
+using BinaryAttrs = llvm::MachO::RecordsSlice::BinaryAttrs;
+using SymbolSet = llvm::MachO::SymbolSet;
+using FileType = llvm::MachO::FileType;
+using PackedVersion = llvm::MachO::PackedVersion;
+using Target = llvm::MachO::Target;
+
+#endif // LLVM_CLANG_INSTALLAPI_MACHO_H
diff --git a/clang/lib/InstallAPI/Frontend.cpp b/clang/lib/InstallAPI/Frontend.cpp
index 0d526fe1da66..707aeb17dc89 100644
--- a/clang/lib/InstallAPI/Frontend.cpp
+++ b/clang/lib/InstallAPI/Frontend.cpp
@@ -8,6 +8,7 @@
 
 #include "clang/InstallAPI/Frontend.h"
 #include "clang/AST/Availability.h"
+#include "clang/InstallAPI/FrontendRecords.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/StringRef.h"
 
diff --git a/clang/lib/InstallAPI/Visitor.cpp b/clang/lib/InstallAPI/Visitor.cpp
index aded94f7a94a..b4ed5974a057 100644
--- a/clang/lib/InstallAPI/Visitor.cpp
+++ b/clang/lib/InstallAPI/Visitor.cpp
@@ -11,7 +11,7 @@
 #include "clang/AST/ParentMapContext.h"
 #include "clang/AST/VTableBuilder.h"
 #include "clang/Basic/Linkage.h"
-#include "clang/InstallAPI/Frontend.h"
+#include "clang/InstallAPI/FrontendRecords.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/IR/DataLayout.h"
diff --git a/clang/tools/clang-installapi/ClangInstallAPI.cpp b/clang/tools/clang-installapi/ClangInstallAPI.cpp
index c6da1c80a673..15b0baee88bc 100644
--- a/clang/tools/clang-installapi/ClangInstallAPI.cpp
+++ b/clang/tools/clang-installapi/ClangInstallAPI.cpp
@@ -19,6 +19,8 @@
 #include "clang/Driver/Tool.h"
 #include "clang/Frontend/TextDiagnosticPrinter.h"
 #include "clang/InstallAPI/Frontend.h"
+#include "clang/InstallAPI/FrontendRecords.h"
+#include "clang/InstallAPI/MachO.h"
 #include "clang/Tooling/Tooling.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/Option/Option.h"
@@ -29,8 +31,6 @@
 #include "llvm/Support/Process.h"
 #include "llvm/Support/Signals.h"
 #include "llvm/TargetParser/Host.h"
-#include "llvm/TextAPI/RecordVisitor.h"
-#include "llvm/TextAPI/TextAPIWriter.h"
 #include <memory>
 
 using namespace clang;
@@ -125,7 +125,7 @@ static bool run(ArrayRef<const char *> Args, const char *ProgName) {
   // Execute and gather AST results.
   // An invocation is ran for each unique target triple and for each header
   // access level.
-  llvm::MachO::Records FrontendResults;
+  Records FrontendResults;
   for (const auto &[Targ, Trip] : Opts.DriverOpts.Targets) {
     for (const HeaderType Type :
          {HeaderType::Public, HeaderType::Private, HeaderType::Project}) {
diff --git a/clang/tools/clang-installapi/Options.h b/clang/tools/clang-installapi/Options.h
index 9d4d841284fd..06f79b62c531 100644
--- a/clang/tools/clang-installapi/Options.h
+++ b/clang/tools/clang-installapi/Options.h
@@ -13,23 +13,17 @@
 #include "clang/Basic/FileManager.h"
 #include "clang/Frontend/FrontendOptions.h"
 #include "clang/InstallAPI/Context.h"
+#include "clang/InstallAPI/MachO.h"
 #include "llvm/Option/ArgList.h"
 #include "llvm/Option/Option.h"
 #include "llvm/Support/Program.h"
 #include "llvm/TargetParser/Triple.h"
-#include "llvm/TextAPI/Architecture.h"
-#include "llvm/TextAPI/InterfaceFile.h"
-#include "llvm/TextAPI/PackedVersion.h"
-#include "llvm/TextAPI/Platform.h"
-#include "llvm/TextAPI/Target.h"
-#include "llvm/TextAPI/Utils.h"
 #include <set>
 #include <string>
 #include <vector>
 
 namespace clang {
 namespace installapi {
-using Macro = std::pair<std::string, bool /*isUndef*/>;
 
 struct DriverOptions {
   /// \brief Path to input file lists (JSON).
@@ -42,7 +36,7 @@ struct DriverOptions {
   std::string OutputPath;
 
   /// \brief File encoding to print.
-  llvm::MachO::FileType OutFT = llvm::MachO::FileType::TBD_V5;
+  FileType OutFT = FileType::TBD_V5;
 
   /// \brief Print verbose output.
   bool Verbose = false;
@@ -53,7 +47,7 @@ struct LinkerOptions {
   std::string InstallName;
 
   /// \brief The current version to use for the dynamic library.
-  llvm::MachO::PackedVersion CurrentVersion;
+  PackedVersion CurrentVersion;
 
   /// \brief Is application extension safe.
   bool AppExtensionSafe = false;
-- 
cgit v1.2.3


From 6bbb73b4cbc89b7291a8088aaa635814a216fbf6 Mon Sep 17 00:00:00 2001
From: Arthur Eubanks <aeubanks@google.com>
Date: Tue, 12 Mar 2024 13:43:29 -0600
Subject: [X86] Fix determining if globals with size <8 bits are large (#84975)

Previously any global under 8 bits would accidentally be considered 0
sized, which is considered a large global.
---
 llvm/lib/Target/TargetMachine.cpp       | 2 +-
 llvm/test/CodeGen/X86/code-model-elf.ll | 4 +---
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/Target/TargetMachine.cpp b/llvm/lib/Target/TargetMachine.cpp
index 8b177a89c919..a7fe329b064e 100644
--- a/llvm/lib/Target/TargetMachine.cpp
+++ b/llvm/lib/Target/TargetMachine.cpp
@@ -92,7 +92,7 @@ bool TargetMachine::isLargeGlobalValue(const GlobalValue *GVal) const {
                                 GV->getName().starts_with("__stop_")))
       return true;
     const DataLayout &DL = GV->getParent()->getDataLayout();
-    uint64_t Size = DL.getTypeSizeInBits(GV->getValueType()) / 8;
+    uint64_t Size = DL.getTypeAllocSize(GV->getValueType());
     return Size == 0 || Size > LargeDataThreshold;
   }
 
diff --git a/llvm/test/CodeGen/X86/code-model-elf.ll b/llvm/test/CodeGen/X86/code-model-elf.ll
index 4e96d39d153f..0da62e3e7a65 100644
--- a/llvm/test/CodeGen/X86/code-model-elf.ll
+++ b/llvm/test/CodeGen/X86/code-model-elf.ll
@@ -772,9 +772,7 @@ define dso_local i1 @load_bool() #0 {
 ;
 ; MEDIUM-SMALL-DATA-PIC-LABEL: load_bool:
 ; MEDIUM-SMALL-DATA-PIC:       # %bb.0:
-; MEDIUM-SMALL-DATA-PIC-NEXT:    leaq _GLOBAL_OFFSET_TABLE_(%rip), %rax
-; MEDIUM-SMALL-DATA-PIC-NEXT:    movabsq $bool@GOTOFF, %rcx
-; MEDIUM-SMALL-DATA-PIC-NEXT:    movzbl (%rax,%rcx), %eax
+; MEDIUM-SMALL-DATA-PIC-NEXT:    movzbl bool(%rip), %eax
 ; MEDIUM-SMALL-DATA-PIC-NEXT:    retq
 ;
 ; MEDIUM-PIC-LABEL: load_bool:
-- 
cgit v1.2.3


From 6095f8bbc410d2f8b926a32ba969d507f7343949 Mon Sep 17 00:00:00 2001
From: Justin Lebar <justin.lebar@gmail.com>
Date: Tue, 12 Mar 2024 12:52:31 -0700
Subject: Get rid of noisy debug log in verifyOpAndAdjustFlags. (#84677)

This debug log adds noise to a large fraction of *other* debug logs when
you run with -debug, because it prints "Verifying operation: blah blah\n"
whenever those other debug logs dump an op.

You can use -debug-only to get around this, but sometimes -debug really
is what's called for!
---
 mlir/lib/IR/AsmPrinter.cpp | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/mlir/lib/IR/AsmPrinter.cpp b/mlir/lib/IR/AsmPrinter.cpp
index 8d75349f8eed..456cf6a2c277 100644
--- a/mlir/lib/IR/AsmPrinter.cpp
+++ b/mlir/lib/IR/AsmPrinter.cpp
@@ -1895,9 +1895,6 @@ static OpPrintingFlags verifyOpAndAdjustFlags(Operation *op,
       printerFlags.shouldAssumeVerified())
     return printerFlags;
 
-  LLVM_DEBUG(llvm::dbgs() << DEBUG_TYPE << ": Verifying operation: "
-                          << op->getName() << "\n");
-
   // Ignore errors emitted by the verifier. We check the thread id to avoid
   // consuming other threads' errors.
   auto parentThreadId = llvm::get_threadid();
-- 
cgit v1.2.3


From 97fb91ee665660036f8beffd064b44c6fbbf1b73 Mon Sep 17 00:00:00 2001
From: Alexey Bataev <a.bataev@outlook.com>
Date: Tue, 12 Mar 2024 12:55:47 -0700
Subject: [SLP][NFC]Add a test with non-profitable alternate vectorized
 instructions.

---
 .../SLPVectorizer/alternate-non-profitable.ll      | 190 +++++++++++++++++++++
 1 file changed, 190 insertions(+)
 create mode 100644 llvm/test/Transforms/SLPVectorizer/alternate-non-profitable.ll

diff --git a/llvm/test/Transforms/SLPVectorizer/alternate-non-profitable.ll b/llvm/test/Transforms/SLPVectorizer/alternate-non-profitable.ll
new file mode 100644
index 000000000000..c6e2cf5543e1
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/alternate-non-profitable.ll
@@ -0,0 +1,190 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt -S -passes=slp-vectorizer -slp-threshold=-10000 < %s | FileCheck %s
+
+define <2 x float> @test_fdiv(float %a, i1 %cmp) {
+; CHECK-LABEL: define <2 x float> @test_fdiv(
+; CHECK-SAME: float [[A:%.*]], i1 [[CMP:%.*]]) {
+; CHECK-NEXT:    [[TMP1:%.*]] = fdiv float [[A]], 3.000000e+00
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x float> poison, float [[TMP1]], i64 1
+; CHECK-NEXT:    [[TMP3:%.*]] = select i1 [[CMP]], <2 x float> <float 7.700000e+01, float 9.900000e+01>, <2 x float> [[TMP2]]
+; CHECK-NEXT:    ret <2 x float> [[TMP3]]
+;
+  %1 = fdiv float %a, 3.000000e+00
+  %2 = insertelement <2 x float> poison, float %1, i64 1
+  %3 = select i1 %cmp, <2 x float> <float 7.700000e+01, float 9.900000e+01>, <2 x float> %2
+  ret <2 x float> %3
+}
+
+define <2 x float> @test_frem(float %a, i1 %cmp) {
+; CHECK-LABEL: define <2 x float> @test_frem(
+; CHECK-SAME: float [[A:%.*]], i1 [[CMP:%.*]]) {
+; CHECK-NEXT:    [[TMP1:%.*]] = frem float [[A]], 3.000000e+00
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x float> poison, float [[TMP1]], i64 1
+; CHECK-NEXT:    [[TMP3:%.*]] = select i1 [[CMP]], <2 x float> <float 7.700000e+01, float 9.900000e+01>, <2 x float> [[TMP2]]
+; CHECK-NEXT:    ret <2 x float> [[TMP3]]
+;
+  %1 = frem float %a, 3.000000e+00
+  %2 = insertelement <2 x float> poison, float %1, i64 1
+  %3 = select i1 %cmp, <2 x float> <float 7.700000e+01, float 9.900000e+01>, <2 x float> %2
+  ret <2 x float> %3
+}
+
+define <2 x float> @replace_through_casts(i16 %inp) {
+; CHECK-LABEL: define <2 x float> @replace_through_casts(
+; CHECK-SAME: i16 [[INP:%.*]]) {
+; CHECK-NEXT:    [[ADD:%.*]] = add nsw i16 [[INP]], -10
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x i16> poison, i16 [[INP]], i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x i16> [[TMP1]], i16 [[ADD]], i32 1
+; CHECK-NEXT:    [[TMP3:%.*]] = uitofp <2 x i16> [[TMP2]] to <2 x float>
+; CHECK-NEXT:    [[TMP4:%.*]] = sitofp <2 x i16> [[TMP2]] to <2 x float>
+; CHECK-NEXT:    [[R:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> [[TMP4]], <2 x i32> <i32 0, i32 3>
+; CHECK-NEXT:    ret <2 x float> [[R]]
+;
+  %add = add nsw i16 %inp, -10
+  %1 = uitofp i16 %inp to float
+  %2 = sitofp i16 %add to float
+  %3 = insertelement <2 x float> poison, float %1, i64 0
+  %r = insertelement <2 x float> %3, float %2, i64 1
+  ret <2 x float> %r
+}
+
+define <2 x float> @replace_through_casts_and_binop(i16 %inp) {
+; CHECK-LABEL: define <2 x float> @replace_through_casts_and_binop(
+; CHECK-SAME: i16 [[INP:%.*]]) {
+; CHECK-NEXT:    [[ADD:%.*]] = add nsw i16 [[INP]], -10
+; CHECK-NEXT:    [[MUL:%.*]] = mul nsw i16 [[INP]], 5
+; CHECK-NEXT:    [[TMP1:%.*]] = uitofp i16 [[MUL]] to float
+; CHECK-NEXT:    [[TMP2:%.*]] = fadd float [[TMP1]], 2.000000e+00
+; CHECK-NEXT:    [[TMP3:%.*]] = sitofp i16 [[ADD]] to float
+; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <2 x float> poison, float [[TMP2]], i64 0
+; CHECK-NEXT:    [[R:%.*]] = insertelement <2 x float> [[TMP4]], float [[TMP3]], i64 1
+; CHECK-NEXT:    ret <2 x float> [[R]]
+;
+  %add = add nsw i16 %inp, -10
+  %mul = mul nsw i16 %inp, 5
+  %1 = uitofp i16 %mul to float
+  %2 = fadd float %1, 2.000000e+00
+  %3 = sitofp i16 %add to float
+  %4 = insertelement <2 x float> poison, float %2, i64 0
+  %r = insertelement <2 x float> %4, float %3, i64 1
+  ret <2 x float> %r
+}
+
+define <2 x float> @replace_through_casts_and_binop_and_unop(i16 %inp) {
+; CHECK-LABEL: define <2 x float> @replace_through_casts_and_binop_and_unop(
+; CHECK-SAME: i16 [[INP:%.*]]) {
+; CHECK-NEXT:    [[ADD:%.*]] = add nsw i16 [[INP]], -10
+; CHECK-NEXT:    [[TMP1:%.*]] = sitofp i16 [[ADD]] to float
+; CHECK-NEXT:    [[TMP2:%.*]] = fneg float [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = uitofp i16 [[ADD]] to float
+; CHECK-NEXT:    [[TMP4:%.*]] = fadd float [[TMP3]], 2.000000e+00
+; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <2 x float> poison, float [[TMP4]], i64 0
+; CHECK-NEXT:    [[R:%.*]] = insertelement <2 x float> [[TMP5]], float [[TMP2]], i64 1
+; CHECK-NEXT:    ret <2 x float> [[R]]
+;
+  %add = add nsw i16 %inp, -10
+  %1 = sitofp i16 %add to float
+  %2 = fneg float %1
+  %3 = uitofp i16 %add to float
+  %4 = fadd float %3, 2.000000e+00
+  %5 = insertelement <2 x float> poison, float %4, i64 0
+  %r = insertelement <2 x float> %5, float %2, i64 1
+  ret <2 x float> %r
+}
+
+define <2 x float> @replace_through_casts_through_splat(i16 %inp) {
+; CHECK-LABEL: define <2 x float> @replace_through_casts_through_splat(
+; CHECK-SAME: i16 [[INP:%.*]]) {
+; CHECK-NEXT:    [[ADD:%.*]] = add nsw i16 [[INP]], -10
+; CHECK-NEXT:    [[TMP1:%.*]] = uitofp i16 [[ADD]] to float
+; CHECK-NEXT:    [[TMP2:%.*]] = fadd float [[TMP1]], 2.000000e+00
+; CHECK-NEXT:    [[TMP3:%.*]] = sitofp i16 [[ADD]] to float
+; CHECK-NEXT:    [[TMP4:%.*]] = fneg float [[TMP3]]
+; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <2 x float> poison, float [[TMP2]], i64 0
+; CHECK-NEXT:    [[R:%.*]] = insertelement <2 x float> [[TMP5]], float [[TMP4]], i64 1
+; CHECK-NEXT:    ret <2 x float> [[R]]
+;
+  %add = add nsw i16 %inp, -10
+  %1 = uitofp i16 %add to float
+  %2 = fadd float %1, 2.000000e+00
+  %3 = sitofp i16 %add to float
+  %4 = fneg float %3
+  %5 = insertelement <2 x float> poison, float %2, i64 0
+  %r = insertelement <2 x float> %5, float %4, i64 1
+  ret <2 x float> %r
+}
+
+define <2 x i32> @replace_through_int_casts(i16 %inp, <2 x i16> %dead) {
+; CHECK-LABEL: define <2 x i32> @replace_through_int_casts(
+; CHECK-SAME: i16 [[INP:%.*]], <2 x i16> [[DEAD:%.*]]) {
+; CHECK-NEXT:    [[ADD:%.*]] = add nsw i16 [[INP]], -10
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x i16> poison, i16 [[INP]], i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x i16> [[TMP1]], i16 [[ADD]], i32 1
+; CHECK-NEXT:    [[TMP3:%.*]] = zext <2 x i16> [[TMP2]] to <2 x i32>
+; CHECK-NEXT:    [[TMP4:%.*]] = sext <2 x i16> [[TMP2]] to <2 x i32>
+; CHECK-NEXT:    [[R:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> [[TMP4]], <2 x i32> <i32 0, i32 3>
+; CHECK-NEXT:    ret <2 x i32> [[R]]
+;
+  %add = add nsw i16 %inp, -10
+  %1 = zext i16 %inp to i32
+  %2 = sext i16 %add to i32
+  %3 = insertelement <2 x i32> poison, i32 %1, i64 0
+  %r = insertelement <2 x i32> %3, i32 %2, i64 1
+  ret <2 x i32> %r
+}
+
+define <2 x i32> @replace_through_int_casts_ele0_only(i16 %inp, <2 x i16> %dead) {
+; CHECK-LABEL: define <2 x i32> @replace_through_int_casts_ele0_only(
+; CHECK-SAME: i16 [[INP:%.*]], <2 x i16> [[DEAD:%.*]]) {
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x i16> poison, i16 [[INP]], i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <2 x i16> [[TMP1]], <2 x i16> poison, <2 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP3:%.*]] = zext <2 x i16> [[TMP2]] to <2 x i32>
+; CHECK-NEXT:    [[TMP4:%.*]] = sext <2 x i16> [[TMP2]] to <2 x i32>
+; CHECK-NEXT:    [[R:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> [[TMP4]], <2 x i32> <i32 0, i32 3>
+; CHECK-NEXT:    ret <2 x i32> [[R]]
+;
+  %2 = sext i16 %inp to i32
+  %4 = zext i16 %inp to i32
+  %5 = insertelement <2 x i32> poison, i32 %4, i64 0
+  %r = insertelement <2 x i32> %5, i32 %2, i64 1
+  ret <2 x i32> %r
+}
+
+define <2 x i8> @replace_through_binop_fail_cant_speculate(i8 %inp, <2 x i8> %d, <2 x i8> %any) {
+; CHECK-LABEL: define <2 x i8> @replace_through_binop_fail_cant_speculate(
+; CHECK-SAME: i8 [[INP:%.*]], <2 x i8> [[D:%.*]], <2 x i8> [[ANY:%.*]]) {
+; CHECK-NEXT:    [[ADD:%.*]] = add i8 [[INP]], 5
+; CHECK-NEXT:    [[V0:%.*]] = insertelement <2 x i8> poison, i8 [[INP]], i64 0
+; CHECK-NEXT:    [[V:%.*]] = insertelement <2 x i8> [[V0]], i8 [[ADD]], i64 1
+; CHECK-NEXT:    [[DIV0:%.*]] = sdiv <2 x i8> <i8 -128, i8 -128>, [[V]]
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i8 [[INP]], 123
+; CHECK-NEXT:    [[R:%.*]] = insertelement <2 x i8> [[DIV0]], i8 [[TMP1]], i64 0
+; CHECK-NEXT:    ret <2 x i8> [[R]]
+;
+  %add = add i8 %inp, 5
+  %v0 = insertelement <2 x i8> poison, i8 %inp, i64 0
+  %v = insertelement <2 x i8> %v0, i8 %add, i64 1
+  %div0 = sdiv <2 x i8> <i8 -128, i8 -128>, %v
+  %1 = xor i8 %inp, 123
+  %r = insertelement <2 x i8> %div0, i8 %1, i64 0
+  ret <2 x i8> %r
+}
+
+define <2 x i8> @replace_through_binop_preserve_flags(i8 %inp, <2 x i8> %d, <2 x i8> %any) {
+; CHECK-LABEL: define <2 x i8> @replace_through_binop_preserve_flags(
+; CHECK-SAME: i8 [[INP:%.*]], <2 x i8> [[D:%.*]], <2 x i8> [[ANY:%.*]]) {
+; CHECK-NEXT:    [[ADD:%.*]] = xor i8 [[INP]], 5
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x i8> poison, i8 [[INP]], i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x i8> [[TMP1]], i8 [[ADD]], i32 1
+; CHECK-NEXT:    [[TMP3:%.*]] = xor <2 x i8> [[TMP2]], <i8 123, i8 1>
+; CHECK-NEXT:    [[TMP4:%.*]] = add nsw <2 x i8> [[TMP2]], <i8 123, i8 1>
+; CHECK-NEXT:    [[R:%.*]] = shufflevector <2 x i8> [[TMP3]], <2 x i8> [[TMP4]], <2 x i32> <i32 0, i32 3>
+; CHECK-NEXT:    ret <2 x i8> [[R]]
+;
+  %add = xor i8 %inp, 5
+  %1 = xor i8 %inp, 123
+  %2 = add nsw i8 %add, 1
+  %3 = insertelement <2 x i8> poison, i8 %1, i64 0
+  %r = insertelement <2 x i8> %3, i8 %2, i64 1
+  ret <2 x i8> %r
+}
-- 
cgit v1.2.3


From 2377beba8d10ce1092db7f8ddd5b10a2c0d3bfd1 Mon Sep 17 00:00:00 2001
From: Daniel Thornburgh <dthorn@google.com>
Date: Tue, 12 Mar 2024 13:33:12 -0700
Subject: [Fuchsia] Add LLDB_TEST_USE_VENDOR_PACKAGES to boostrap passthrough

---
 clang/cmake/caches/Fuchsia.cmake | 1 +
 1 file changed, 1 insertion(+)

diff --git a/clang/cmake/caches/Fuchsia.cmake b/clang/cmake/caches/Fuchsia.cmake
index fe925901eb3d..1209fd935986 100644
--- a/clang/cmake/caches/Fuchsia.cmake
+++ b/clang/cmake/caches/Fuchsia.cmake
@@ -65,6 +65,7 @@ set(_FUCHSIA_BOOTSTRAP_PASSTHROUGH
   LLDB_EMBED_PYTHON_HOME
   LLDB_PYTHON_HOME
   LLDB_PYTHON_RELATIVE_PATH
+  LLDB_TEST_USE_VENDOR_PACKAGES
   Python3_EXECUTABLE
   Python3_LIBRARIES
   Python3_INCLUDE_DIRS
-- 
cgit v1.2.3


From 54f631d11640732af0dc9c2aa53af4e118d9fe36 Mon Sep 17 00:00:00 2001
From: "S. Bharadwaj Yadavalli" <Bharadwaj.Yadavalli@microsoft.com>
Date: Tue, 12 Mar 2024 16:51:18 -0400
Subject: [DirectX][NFC] Model precise overload type specification of DXIL Ops
 (#83917)

Implement an abstraction to specify precise overload types supported by
DXIL ops. These overload types are typically a subset of LLVM
intrinsics.

Implement the corresponding changes in DXILEmitter backend.

Add tests to verify expected errors for unsupported overload types at
code generation time.

Add tests to check for correct overload error output.
---
 llvm/lib/Target/DirectX/DXIL.td           |  55 ++++++++++--
 llvm/lib/Target/DirectX/DXILOpBuilder.cpp |   4 +-
 llvm/test/CodeGen/DirectX/exp2_error.ll   |  13 +++
 llvm/test/CodeGen/DirectX/frac.ll         |   3 -
 llvm/test/CodeGen/DirectX/frac_error.ll   |  14 +++
 llvm/test/CodeGen/DirectX/round_error.ll  |  13 +++
 llvm/test/CodeGen/DirectX/sin.ll          |  22 +----
 llvm/test/CodeGen/DirectX/sin_error.ll    |  14 +++
 llvm/utils/TableGen/DXILEmitter.cpp       | 140 +++++++++++++++++++++---------
 9 files changed, 203 insertions(+), 75 deletions(-)
 create mode 100644 llvm/test/CodeGen/DirectX/exp2_error.ll
 create mode 100644 llvm/test/CodeGen/DirectX/frac_error.ll
 create mode 100644 llvm/test/CodeGen/DirectX/round_error.ll
 create mode 100644 llvm/test/CodeGen/DirectX/sin_error.ll

diff --git a/llvm/lib/Target/DirectX/DXIL.td b/llvm/lib/Target/DirectX/DXIL.td
index 9536a01e125b..66b0ef24332c 100644
--- a/llvm/lib/Target/DirectX/DXIL.td
+++ b/llvm/lib/Target/DirectX/DXIL.td
@@ -205,28 +205,71 @@ defset list<DXILOpClass> OpClasses = {
   def writeSamplerFeedbackBias : DXILOpClass;
   def writeSamplerFeedbackGrad : DXILOpClass;
   def writeSamplerFeedbackLevel: DXILOpClass;
+
+  // This is a sentinel definition. Hence placed at the end of the list
+  // and not as part of the above alphabetically sorted valid definitions.
+  // Additionally it is capitalized unlike all the others.
+  def UnknownOpClass: DXILOpClass;
+}
+
+// Several of the overloaded DXIL Operations support for data types
+// that are a subset of the overloaded LLVM intrinsics that they map to.
+// For e.g., llvm.sin.* intrinsic operates on any floating-point type and
+// maps for lowering to DXIL Op Sin. However, valid overloads of DXIL Sin
+// operation overloads are half (f16) and float (f32) only.
+//
+// The following abstracts overload types specific to DXIL operations.
+
+class DXILType : LLVMType<OtherVT> {
+  let isAny = 1;
+  int isI16OrI32 = 0;
+  int isHalfOrFloat = 0;
 }
 
+// Concrete records for various overload types supported specifically by
+// DXIL Operations.
+let isI16OrI32 = 1 in
+  def llvm_i16ori32_ty : DXILType;
+
+let isHalfOrFloat = 1 in
+  def llvm_halforfloat_ty : DXILType;
+
 // Abstraction DXIL Operation to LLVM intrinsic
-class DXILOpMapping<int opCode, DXILOpClass opClass, Intrinsic intrinsic, string doc> {
+class DXILOpMappingBase {
+  int OpCode = 0;                      // Opcode of DXIL Operation
+  DXILOpClass OpClass = UnknownOpClass;// Class of DXIL Operation.
+  Intrinsic LLVMIntrinsic = ?;         // LLVM Intrinsic DXIL Operation maps to
+  string Doc = "";                     // A short description of the operation
+  list<LLVMType> OpTypes = ?;          // Valid types of DXIL Operation in the
+                                       // format [returnTy, param1ty, ...]
+}
+
+class DXILOpMapping<int opCode, DXILOpClass opClass,
+                    Intrinsic intrinsic, string doc,
+                    list<LLVMType> opTys = []> : DXILOpMappingBase {
   int OpCode = opCode;                 // Opcode corresponding to DXIL Operation
-  DXILOpClass OpClass = opClass;             // Class of DXIL Operation.
+  DXILOpClass OpClass = opClass;       // Class of DXIL Operation.
   Intrinsic LLVMIntrinsic = intrinsic; // LLVM Intrinsic the DXIL Operation maps
   string Doc = doc;                    // to a short description of the operation
+  list<LLVMType> OpTypes = !if(!eq(!size(opTys), 0), LLVMIntrinsic.Types, opTys);
 }
 
 // Concrete definition of DXIL Operation mapping to corresponding LLVM intrinsic
 def Sin  : DXILOpMapping<13, unary, int_sin,
-                         "Returns sine(theta) for theta in radians.">;
+                         "Returns sine(theta) for theta in radians.",
+                         [llvm_halforfloat_ty, LLVMMatchType<0>]>;
 def Exp2 : DXILOpMapping<21, unary, int_exp2,
                          "Returns the base 2 exponential, or 2**x, of the specified value."
-                         "exp2(x) = 2**x.">;
+                         "exp2(x) = 2**x.",
+                         [llvm_halforfloat_ty, LLVMMatchType<0>]>;
 def Frac : DXILOpMapping<22, unary, int_dx_frac,
                          "Returns a fraction from 0 to 1 that represents the "
-                         "decimal part of the input.">;
+                         "decimal part of the input.",
+                         [llvm_halforfloat_ty, LLVMMatchType<0>]>;
 def Round : DXILOpMapping<26, unary, int_round,
                          "Returns the input rounded to the nearest integer"
-                         "within a floating-point type.">;
+                         "within a floating-point type.",
+                         [llvm_halforfloat_ty, LLVMMatchType<0>]>;
 def UMax : DXILOpMapping<39, binary, int_umax,
                          "Unsigned integer maximum. UMax(a,b) = a > b ? a : b">;
 def FMad : DXILOpMapping<46, tertiary, int_fmuladd,
diff --git a/llvm/lib/Target/DirectX/DXILOpBuilder.cpp b/llvm/lib/Target/DirectX/DXILOpBuilder.cpp
index 21a20d45b922..11b24d044923 100644
--- a/llvm/lib/Target/DirectX/DXILOpBuilder.cpp
+++ b/llvm/lib/Target/DirectX/DXILOpBuilder.cpp
@@ -254,10 +254,8 @@ static FunctionCallee getOrCreateDXILOpFunction(dxil::OpCode DXILOp,
   const OpCodeProperty *Prop = getOpCodeProperty(DXILOp);
 
   OverloadKind Kind = getOverloadKind(OverloadTy);
-  // FIXME: find the issue and report error in clang instead of check it in
-  // backend.
   if ((Prop->OverloadTys & (uint16_t)Kind) == 0) {
-    llvm_unreachable("invalid overload");
+    report_fatal_error("Invalid Overload Type", /* gen_crash_diag=*/false);
   }
 
   std::string FnName = constructOverloadName(Kind, OverloadTy, *Prop);
diff --git a/llvm/test/CodeGen/DirectX/exp2_error.ll b/llvm/test/CodeGen/DirectX/exp2_error.ll
new file mode 100644
index 000000000000..6b9126785fd4
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/exp2_error.ll
@@ -0,0 +1,13 @@
+; RUN: not opt -S -dxil-op-lower %s 2>&1 | FileCheck %s
+
+; DXIL operation exp2 does not support double overload type
+; CHECK: LLVM ERROR: Invalid Overload
+
+define noundef double @exp2_double(double noundef %a) #0 {
+entry:
+  %a.addr = alloca double, align 8
+  store double %a, ptr %a.addr, align 8
+  %0 = load double, ptr %a.addr, align 8
+  %elt.exp2 = call double @llvm.exp2.f64(double %0)
+  ret double %elt.exp2
+}
diff --git a/llvm/test/CodeGen/DirectX/frac.ll b/llvm/test/CodeGen/DirectX/frac.ll
index ab605ed6084a..ae86fe06654d 100644
--- a/llvm/test/CodeGen/DirectX/frac.ll
+++ b/llvm/test/CodeGen/DirectX/frac.ll
@@ -29,6 +29,3 @@ entry:
   %dx.frac = call half @llvm.dx.frac.f16(half %0)
   ret half %dx.frac
 }
-
-; Function Attrs: nocallback nofree nosync nounwind readnone speculatable willreturn
-declare half @llvm.dx.frac.f16(half) #1
diff --git a/llvm/test/CodeGen/DirectX/frac_error.ll b/llvm/test/CodeGen/DirectX/frac_error.ll
new file mode 100644
index 000000000000..ebce76105ad4
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/frac_error.ll
@@ -0,0 +1,14 @@
+; RUN: not opt -S -dxil-op-lower %s 2>&1 | FileCheck %s
+
+; DXIL operation frac does not support double overload type
+; CHECK: LLVM ERROR: Invalid Overload Type
+
+; Function Attrs: noinline nounwind optnone
+define noundef double @frac_double(double noundef %a) #0 {
+entry:
+  %a.addr = alloca double, align 8
+  store double %a, ptr %a.addr, align 8
+  %0 = load double, ptr %a.addr, align 8
+  %dx.frac = call double @llvm.dx.frac.f64(double %0)
+  ret double %dx.frac
+}
diff --git a/llvm/test/CodeGen/DirectX/round_error.ll b/llvm/test/CodeGen/DirectX/round_error.ll
new file mode 100644
index 000000000000..3bd87b2bbf02
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/round_error.ll
@@ -0,0 +1,13 @@
+; RUN: not opt -S -dxil-op-lower %s 2>&1 | FileCheck %s
+
+; This test is expected to fail with the following error
+; CHECK: LLVM ERROR: Invalid Overload Type
+
+define noundef double @round_double(double noundef %a) #0 {
+entry:
+  %a.addr = alloca double, align 8
+  store double %a, ptr %a.addr, align 8
+  %0 = load double, ptr %a.addr, align 8
+  %elt.round = call double @llvm.round.f64(double %0)
+  ret double %elt.round
+}
diff --git a/llvm/test/CodeGen/DirectX/sin.ll b/llvm/test/CodeGen/DirectX/sin.ll
index bb31d28bfcfe..1f285c433581 100644
--- a/llvm/test/CodeGen/DirectX/sin.ll
+++ b/llvm/test/CodeGen/DirectX/sin.ll
@@ -4,11 +4,8 @@
 ; CHECK:call float @dx.op.unary.f32(i32 13, float %{{.*}})
 ; CHECK:call half @dx.op.unary.f16(i32 13, half %{{.*}})
 
-target datalayout = "e-m:e-p:32:32-i1:32-i8:8-i16:16-i32:32-i64:64-f16:16-f32:32-f64:64-n8:16:32:64"
-target triple = "dxil-pc-shadermodel6.7-library"
-
 ; Function Attrs: noinline nounwind optnone
-define noundef float @_Z3foof(float noundef %a) #0 {
+define noundef float @sin_float(float noundef %a) #0 {
 entry:
   %a.addr = alloca float, align 4
   store float %a, ptr %a.addr, align 4
@@ -17,11 +14,8 @@ entry:
   ret float %1
 }
 
-; Function Attrs: nocallback nofree nosync nounwind readnone speculatable willreturn
-declare float @llvm.sin.f32(float) #1
-
 ; Function Attrs: noinline nounwind optnone
-define noundef half @_Z3barDh(half noundef %a) #0 {
+define noundef half @sin_half(half noundef %a) #0 {
 entry:
   %a.addr = alloca half, align 2
   store half %a, ptr %a.addr, align 2
@@ -29,15 +23,3 @@ entry:
   %1 = call half @llvm.sin.f16(half %0)
   ret half %1
 }
-
-; Function Attrs: nocallback nofree nosync nounwind readnone speculatable willreturn
-declare half @llvm.sin.f16(half) #1
-
-attributes #0 = { noinline nounwind optnone "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
-attributes #1 = { nocallback nofree nosync nounwind readnone speculatable willreturn }
-
-!llvm.module.flags = !{!0}
-!llvm.ident = !{!1}
-
-!0 = !{i32 1, !"wchar_size", i32 4}
-!1 = !{!"clang version 15.0.0 (https://github.com/llvm/llvm-project.git 73417c517644db5c419c85c0b3cb6750172fcab5)"}
diff --git a/llvm/test/CodeGen/DirectX/sin_error.ll b/llvm/test/CodeGen/DirectX/sin_error.ll
new file mode 100644
index 000000000000..ece0e530315b
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/sin_error.ll
@@ -0,0 +1,14 @@
+; RUN: not opt -S -dxil-op-lower %s 2>&1 | FileCheck %s
+
+; DXIL operation sin does not support double overload type
+; CHECK: LLVM ERROR: Invalid Overload
+
+define noundef double @sin_double(double noundef %a) #0 {
+entry:
+  %a.addr = alloca double, align 8
+  store double %a, ptr %a.addr, align 8
+  %0 = load double, ptr %a.addr, align 8
+  %1 = call double @llvm.sin.f64(double %0)
+  ret double %1
+}
+
diff --git a/llvm/utils/TableGen/DXILEmitter.cpp b/llvm/utils/TableGen/DXILEmitter.cpp
index fc958f532873..59089929837e 100644
--- a/llvm/utils/TableGen/DXILEmitter.cpp
+++ b/llvm/utils/TableGen/DXILEmitter.cpp
@@ -22,6 +22,7 @@
 #include "llvm/Support/DXILABI.h"
 #include "llvm/TableGen/Record.h"
 #include "llvm/TableGen/TableGenBackend.h"
+#include <string>
 
 using namespace llvm;
 using namespace llvm::dxil;
@@ -38,8 +39,8 @@ struct DXILOperationDesc {
   int OpCode;         // ID of DXIL operation
   StringRef OpClass;  // name of the opcode class
   StringRef Doc;      // the documentation description of this instruction
-  SmallVector<MVT::SimpleValueType> OpTypes; // Vector of operand types -
-                                             // return type is at index 0
+  SmallVector<Record *> OpTypes; // Vector of operand type records -
+                                 // return type is at index 0
   SmallVector<std::string>
       OpAttributes;     // operation attribute represented as strings
   StringRef Intrinsic;  // The llvm intrinsic map to OpName. Default is "" which
@@ -57,20 +58,21 @@ struct DXILOperationDesc {
   DXILShaderModel ShaderModel;           // minimum shader model required
   DXILShaderModel ShaderModelTranslated; // minimum shader model required with
                                          // translation by linker
-  int OverloadParamIndex; // parameter index which control the overload.
-                          // When < 0, should be only 1 overload type.
+  int OverloadParamIndex;             // Index of parameter with overload type.
+                                      //   -1 : no overload types
   SmallVector<StringRef, 4> counters; // counters for this inst.
   DXILOperationDesc(const Record *);
 };
 } // end anonymous namespace
 
-/// Convert DXIL type name string to dxil::ParameterKind
+/// Return dxil::ParameterKind corresponding to input LLVMType record
 ///
-/// \param VT Simple Value Type
+/// \param R TableGen def record of class LLVMType
 /// \return ParameterKind As defined in llvm/Support/DXILABI.h
 
-static ParameterKind getParameterKind(MVT::SimpleValueType VT) {
-  switch (VT) {
+static ParameterKind getParameterKind(const Record *R) {
+  auto VTRec = R->getValueAsDef("VT");
+  switch (getValueType(VTRec)) {
   case MVT::isVoid:
     return ParameterKind::VOID;
   case MVT::f16:
@@ -90,6 +92,12 @@ static ParameterKind getParameterKind(MVT::SimpleValueType VT) {
   case MVT::fAny:
   case MVT::iAny:
     return ParameterKind::OVERLOAD;
+  case MVT::Other:
+    // Handle DXIL-specific overload types
+    if (R->getValueAsInt("isHalfOrFloat") || R->getValueAsInt("isI16OrI32")) {
+      return ParameterKind::OVERLOAD;
+    }
+    LLVM_FALLTHROUGH;
   default:
     llvm_unreachable("Support for specified DXIL Type not yet implemented");
   }
@@ -106,45 +114,80 @@ DXILOperationDesc::DXILOperationDesc(const Record *R) {
 
   Doc = R->getValueAsString("Doc");
 
+  auto TypeRecs = R->getValueAsListOfDefs("OpTypes");
+  unsigned TypeRecsSize = TypeRecs.size();
+  // Populate OpTypes with return type and parameter types
+
+  // Parameter indices of overloaded parameters.
+  // This vector contains overload parameters in the order order used to
+  // resolve an LLVMMatchType in accordance with  convention outlined in
+  // the comment before the definition of class LLVMMatchType in
+  // llvm/IR/Intrinsics.td
+  SmallVector<int> OverloadParamIndices;
+  for (unsigned i = 0; i < TypeRecsSize; i++) {
+    auto TR = TypeRecs[i];
+    // Track operation parameter indices of any overload types
+    auto isAny = TR->getValueAsInt("isAny");
+    if (isAny == 1) {
+      // TODO: At present it is expected that all overload types in a DXIL Op
+      // are of the same type. Hence, OverloadParamIndices will have only one
+      // element. This implies we do not need a vector. However, until more
+      // (all?) DXIL Ops are added in DXIL.td, a vector is being used to flag
+      // cases this assumption would not hold.
+      if (!OverloadParamIndices.empty()) {
+        bool knownType = true;
+        // Ensure that the same overload type registered earlier is being used
+        for (auto Idx : OverloadParamIndices) {
+          if (TR != TypeRecs[Idx]) {
+            knownType = false;
+            break;
+          }
+        }
+        if (!knownType) {
+          report_fatal_error("Specification of multiple differing overload "
+                             "parameter types not yet supported",
+                             false);
+        }
+      } else {
+        OverloadParamIndices.push_back(i);
+      }
+    }
+    // Populate OpTypes array according to the type specification
+    if (TR->isAnonymous()) {
+      // Check prior overload types exist
+      assert(!OverloadParamIndices.empty() &&
+             "No prior overloaded parameter found to match.");
+      // Get the parameter index of anonymous type, TR, references
+      auto OLParamIndex = TR->getValueAsInt("Number");
+      // Resolve and insert the type to that at OLParamIndex
+      OpTypes.emplace_back(TypeRecs[OLParamIndex]);
+    } else {
+      // A non-anonymous type. Just record it in OpTypes
+      OpTypes.emplace_back(TR);
+    }
+  }
+
+  // Set the index of the overload parameter, if any.
+  OverloadParamIndex = -1; // default; indicating none
+  if (!OverloadParamIndices.empty()) {
+    if (OverloadParamIndices.size() > 1)
+      report_fatal_error("Multiple overload type specification not supported",
+                         false);
+    OverloadParamIndex = OverloadParamIndices[0];
+  }
+  // Get the operation class
+  OpClass = R->getValueAsDef("OpClass")->getName();
+
   if (R->getValue("LLVMIntrinsic")) {
     auto *IntrinsicDef = R->getValueAsDef("LLVMIntrinsic");
     auto DefName = IntrinsicDef->getName();
     assert(DefName.starts_with("int_") && "invalid intrinsic name");
     // Remove the int_ from intrinsic name.
     Intrinsic = DefName.substr(4);
-    // TODO: It is expected that return type and parameter types of
-    // DXIL Operation are the same as that of the intrinsic. Deviations
-    // are expected to be encoded in TableGen record specification and
-    // handled accordingly here. Support to be added later, as needed.
-    // Get parameter type list of the intrinsic. Types attribute contains
-    // the list of as [returnType, param1Type,, param2Type, ...]
-
-    OverloadParamIndex = -1;
-    auto TypeRecs = IntrinsicDef->getValueAsListOfDefs("Types");
-    unsigned TypeRecsSize = TypeRecs.size();
-    // Populate return type and parameter type names
-    for (unsigned i = 0; i < TypeRecsSize; i++) {
-      auto TR = TypeRecs[i];
-      OpTypes.emplace_back(getValueType(TR->getValueAsDef("VT")));
-      // Get the overload parameter index.
-      // TODO : Seems hacky. Is it possible that more than one parameter can
-      // be of overload kind??
-      // TODO: Check for any additional constraints specified for DXIL operation
-      // restricting return type.
-      if (i > 0) {
-        auto &CurParam = OpTypes.back();
-        if (getParameterKind(CurParam) >= ParameterKind::OVERLOAD) {
-          OverloadParamIndex = i;
-        }
-      }
-    }
-    // Get the operation class
-    OpClass = R->getValueAsDef("OpClass")->getName();
-
-    // NOTE: For now, assume that attributes of DXIL Operation are the same as
+    // TODO: For now, assume that attributes of DXIL Operation are the same as
     // that of the intrinsic. Deviations are expected to be encoded in TableGen
     // record specification and handled accordingly here. Support to be added
-    // later.
+    // as needed.
     auto IntrPropList = IntrinsicDef->getValueAsListInit("IntrProperties");
     auto IntrPropListSize = IntrPropList->size();
     for (unsigned i = 0; i < IntrPropListSize; i++) {
@@ -191,12 +234,13 @@ static std::string getParameterKindStr(ParameterKind Kind) {
 }
 
 /// Return a string representation of OverloadKind enum that maps to
-/// input Simple Value Type enum
-/// \param VT Simple Value Type enum
+/// input LLVMType record
+/// \param R TableGen def record of class LLVMType
 /// \return std::string string representation of OverloadKind
 
-static std::string getOverloadKindStr(MVT::SimpleValueType VT) {
-  switch (VT) {
+static std::string getOverloadKindStr(const Record *R) {
+  auto VTRec = R->getValueAsDef("VT");
+  switch (getValueType(VTRec)) {
   case MVT::isVoid:
     return "OverloadKind::VOID";
   case MVT::f16:
@@ -219,6 +263,16 @@ static std::string getOverloadKindStr(MVT::SimpleValueType VT) {
     return "OverloadKind::I16 | OverloadKind::I32 | OverloadKind::I64";
   case MVT::fAny:
     return "OverloadKind::HALF | OverloadKind::FLOAT | OverloadKind::DOUBLE";
+  case MVT::Other:
+    // Handle DXIL-specific overload types
+    {
+      if (R->getValueAsInt("isHalfOrFloat")) {
+        return "OverloadKind::HALF | OverloadKind::FLOAT";
+      } else if (R->getValueAsInt("isI16OrI32")) {
+        return "OverloadKind::I16 | OverloadKind::I32";
+      }
+    }
+    LLVM_FALLTHROUGH;
   default:
     llvm_unreachable(
         "Support for specified parameter OverloadKind not yet implemented");
-- 
cgit v1.2.3


From e9492ccae085b5feb850ff17a96fe8211f7f6d7d Mon Sep 17 00:00:00 2001
From: Jason Eckhardt <jeckhardt@nvidia.com>
Date: Tue, 12 Mar 2024 16:01:58 -0500
Subject: [TableGen] DecoderEmitter clean-ups and modernization. (#84832)

The decoder emitter is showing some signs of age. This patch makes a few
kinds of clean-ups:
- Use ranged-for more widely, including using enumerate() for those
loops maintaining a loop index along with the items.
- Reduce the number of arguments to fieldFromInsn (removes an out
reference parameter: CodingStandards). The insn_t argument to insnWithID
can/should probably be removed soon too since modern C++ allows us to
return a local container without a copy.
- Use raw strings for the large emitted code segments. This enhances
both readability and modifiability.
---
 llvm/utils/TableGen/DecoderEmitter.cpp | 653 ++++++++++++++++-----------------
 1 file changed, 317 insertions(+), 336 deletions(-)

diff --git a/llvm/utils/TableGen/DecoderEmitter.cpp b/llvm/utils/TableGen/DecoderEmitter.cpp
index 88f245238138..dd78dc02159b 100644
--- a/llvm/utils/TableGen/DecoderEmitter.cpp
+++ b/llvm/utils/TableGen/DecoderEmitter.cpp
@@ -226,7 +226,7 @@ static BitsInit &getBitsField(const Record &def, StringRef str) {
   VarLenInst VLI = VarLenInst(cast<DagInit>(RV->getValue()), RV);
   SmallVector<Init *, 16> Bits;
 
-  for (auto &SI : VLI) {
+  for (const auto &SI : VLI) {
     if (const BitsInit *BI = dyn_cast<BitsInit>(SI.Value)) {
       for (unsigned Idx = 0U; Idx < BI->getNumBits(); ++Idx) {
         Bits.push_back(BI->getBit(Idx));
@@ -441,16 +441,15 @@ public:
 protected:
   // Populates the insn given the uid.
   void insnWithID(insn_t &Insn, unsigned Opcode) const {
-    BitsInit &Bits = getBitsField(*AllInstructions[Opcode].EncodingDef, "Inst");
-    Insn.resize(BitWidth > Bits.getNumBits() ? BitWidth : Bits.getNumBits(),
-                BIT_UNSET);
+    const Record *EncodingDef = AllInstructions[Opcode].EncodingDef;
+    BitsInit &Bits = getBitsField(*EncodingDef, "Inst");
+    Insn.resize(std::max(BitWidth, Bits.getNumBits()), BIT_UNSET);
     // We may have a SoftFail bitmask, which specifies a mask where an encoding
     // may differ from the value in "Inst" and yet still be valid, but the
     // disassembler should return SoftFail instead of Success.
     //
     // This is used for marking UNPREDICTABLE instructions in the ARM world.
-    const RecordVal *RV =
-        AllInstructions[Opcode].EncodingDef->getValue("SoftFail");
+    const RecordVal *RV = EncodingDef->getValue("SoftFail");
     const BitsInit *SFBits = RV ? dyn_cast<BitsInit>(RV->getValue()) : nullptr;
     for (unsigned i = 0; i < Bits.getNumBits(); ++i) {
       if (SFBits && bitFromBits(*SFBits, i) == BIT_TRUE)
@@ -472,10 +471,11 @@ protected:
   // Populates the field of the insn given the start position and the number of
   // consecutive bits to scan for.
   //
-  // Returns false if there exists any uninitialized bit value in the range.
-  // Returns true, otherwise.
-  bool fieldFromInsn(uint64_t &Field, insn_t &Insn, unsigned StartBit,
-                     unsigned NumBits) const;
+  // Returns a pair of values (indicator, field), where the indicator is false
+  // if there exists any uninitialized bit value in the range and true if all
+  // bits are well-known. The second value is the potentially populated field.
+  std::pair<bool, uint64_t> fieldFromInsn(const insn_t &Insn, unsigned StartBit,
+                                          unsigned NumBits) const;
 
   /// dumpFilterArray - dumpFilterArray prints out debugging info for the given
   /// filter array as a series of chars.
@@ -581,26 +581,25 @@ Filter::Filter(FilterChooser &owner, unsigned startBit, unsigned numBits,
   NumFiltered = 0;
   LastOpcFiltered = {0, 0};
 
-  for (unsigned i = 0, e = Owner->Opcodes.size(); i != e; ++i) {
+  for (const auto &OpcPair : Owner->Opcodes) {
     insn_t Insn;
 
     // Populates the insn given the uid.
-    Owner->insnWithID(Insn, Owner->Opcodes[i].EncodingID);
+    Owner->insnWithID(Insn, OpcPair.EncodingID);
 
-    uint64_t Field;
     // Scans the segment for possibly well-specified encoding bits.
-    bool ok = Owner->fieldFromInsn(Field, Insn, StartBit, NumBits);
+    auto [Ok, Field] = Owner->fieldFromInsn(Insn, StartBit, NumBits);
 
-    if (ok) {
+    if (Ok) {
       // The encoding bits are well-known.  Lets add the uid of the
       // instruction into the bucket keyed off the constant field value.
-      LastOpcFiltered = Owner->Opcodes[i];
+      LastOpcFiltered = OpcPair;
       FilteredInstructions[Field].push_back(LastOpcFiltered);
       ++NumFiltered;
     } else {
       // Some of the encoding bit(s) are unspecified.  This contributes to
       // one additional member of "Variable" instructions.
-      VariableInstructions.push_back(Owner->Opcodes[i]);
+      VariableInstructions.push_back(OpcPair);
     }
   }
 
@@ -699,7 +698,7 @@ void Filter::emitTableEntry(DecoderTableInfo &TableInfo) const {
 
   size_t PrevFilter = 0;
   bool HasFallthrough = false;
-  for (auto &Filter : FilterChooserMap) {
+  for (const auto &Filter : FilterChooserMap) {
     // Field value -1 implies a non-empty set of variable instructions.
     // See also recurse().
     if (Filter.first == NO_FIXED_SEGMENTS_SENTINEL) {
@@ -784,7 +783,7 @@ void DecoderEmitter::emitTable(formatted_raw_ostream &OS, DecoderTable &Table,
   // is used below to index into NumberedEncodings.
   DenseMap<unsigned, unsigned> OpcodeToEncodingID;
   OpcodeToEncodingID.reserve(EncodingIDs.size());
-  for (auto &EI : EncodingIDs)
+  for (const auto &EI : EncodingIDs)
     OpcodeToEncodingID[EI.Opcode] = EI.EncodingID;
 
   OS.indent(Indentation) << "static const uint8_t DecoderTable" << Namespace
@@ -1038,27 +1037,29 @@ void DecoderEmitter::emitDecoderFunction(formatted_raw_ostream &OS,
   }
   OS.indent(Indentation) << "}\n";
   Indentation -= 2;
-  OS.indent(Indentation) << "}\n\n";
+  OS.indent(Indentation) << "}\n";
 }
 
 // Populates the field of the insn given the start position and the number of
 // consecutive bits to scan for.
 //
-// Returns false if and on the first uninitialized bit value encountered.
-// Returns true, otherwise.
-bool FilterChooser::fieldFromInsn(uint64_t &Field, insn_t &Insn,
-                                  unsigned StartBit, unsigned NumBits) const {
-  Field = 0;
+// Returns a pair of values (indicator, field), where the indicator is false
+// if there exists any uninitialized bit value in the range and true if all
+// bits are well-known. The second value is the potentially populated field.
+std::pair<bool, uint64_t> FilterChooser::fieldFromInsn(const insn_t &Insn,
+                                                       unsigned StartBit,
+                                                       unsigned NumBits) const {
+  uint64_t Field = 0;
 
   for (unsigned i = 0; i < NumBits; ++i) {
     if (Insn[StartBit + i] == BIT_UNSET)
-      return false;
+      return {false, Field};
 
     if (Insn[StartBit + i] == BIT_TRUE)
       Field = Field | (1ULL << i);
   }
 
-  return true;
+  return {true, Field};
 }
 
 /// dumpFilterArray - dumpFilterArray prints out debugging info for the given
@@ -1246,14 +1247,14 @@ unsigned FilterChooser::getDecoderIndex(DecoderSet &Decoders, unsigned Opc,
 // If ParenIfBinOp is true, print a surrounding () if Val uses && or ||.
 bool FilterChooser::emitPredicateMatchAux(const Init &Val, bool ParenIfBinOp,
                                           raw_ostream &OS) const {
-  if (auto *D = dyn_cast<DefInit>(&Val)) {
+  if (const auto *D = dyn_cast<DefInit>(&Val)) {
     if (!D->getDef()->isSubClassOf("SubtargetFeature"))
       return true;
     OS << "Bits[" << Emitter->PredicateNamespace << "::" << D->getAsString()
        << "]";
     return false;
   }
-  if (auto *D = dyn_cast<DagInit>(&Val)) {
+  if (const auto *D = dyn_cast<DagInit>(&Val)) {
     std::string Op = D->getOperator()->getAsString();
     if (Op == "not" && D->getNumArgs() == 1) {
       OS << '!';
@@ -1350,9 +1351,9 @@ void FilterChooser::emitPredicateTableEntry(DecoderTableInfo &TableInfo,
   encodeULEB128(PIdx, S);
 
   TableInfo.Table.push_back(MCD::OPC_CheckPredicate);
-  // Predicate index
-  for (unsigned i = 0, e = PBytes.size(); i != e; ++i)
-    TableInfo.Table.push_back(PBytes[i]);
+  // Predicate index.
+  for (const auto PB : PBytes)
+    TableInfo.Table.push_back(PB);
   // Push location for NumToSkip backpatching.
   TableInfo.FixupStack.back().push_back(TableInfo.Table.size());
   TableInfo.Table.push_back(0);
@@ -1362,13 +1363,13 @@ void FilterChooser::emitPredicateTableEntry(DecoderTableInfo &TableInfo,
 
 void FilterChooser::emitSoftFailTableEntry(DecoderTableInfo &TableInfo,
                                            unsigned Opc) const {
-  const RecordVal *RV = AllInstructions[Opc].EncodingDef->getValue("SoftFail");
+  const Record *EncodingDef = AllInstructions[Opc].EncodingDef;
+  const RecordVal *RV = EncodingDef->getValue("SoftFail");
   BitsInit *SFBits = RV ? dyn_cast<BitsInit>(RV->getValue()) : nullptr;
 
   if (!SFBits)
     return;
-  BitsInit *InstBits =
-      AllInstructions[Opc].EncodingDef->getValueAsBitsInit("Inst");
+  BitsInit *InstBits = EncodingDef->getValueAsBitsInit("Inst");
 
   APInt PositiveMask(BitWidth, 0ULL);
   APInt NegativeMask(BitWidth, 0ULL);
@@ -1495,9 +1496,9 @@ void FilterChooser::emitSingletonTableEntry(DecoderTableInfo &TableInfo,
   raw_svector_ostream S(Bytes);
   encodeULEB128(DIdx, S);
 
-  // Decoder index
-  for (unsigned i = 0, e = Bytes.size(); i != e; ++i)
-    TableInfo.Table.push_back(Bytes[i]);
+  // Decoder index.
+  for (const auto B : Bytes)
+    TableInfo.Table.push_back(B);
 
   if (!HasCompleteDecoder) {
     // Push location for NumToSkip backpatching.
@@ -1566,7 +1567,7 @@ bool FilterChooser::filterProcessor(bool AllowMixed, bool Greedy) {
   if (AllowMixed && !Greedy) {
     assert(numInstructions == 3);
 
-    for (auto Opcode : Opcodes) {
+    for (const auto &Opcode : Opcodes) {
       std::vector<unsigned> StartBits;
       std::vector<unsigned> EndBits;
       std::vector<uint64_t> FieldVals;
@@ -1613,10 +1614,10 @@ bool FilterChooser::filterProcessor(bool AllowMixed, bool Greedy) {
     else
       bitAttrs.push_back(ATTR_NONE);
 
-  for (unsigned InsnIndex = 0; InsnIndex < numInstructions; ++InsnIndex) {
+  for (const auto &OpcPair : Opcodes) {
     insn_t insn;
 
-    insnWithID(insn, Opcodes[InsnIndex].EncodingID);
+    insnWithID(insn, OpcPair.EncodingID);
 
     for (BitIndex = 0; BitIndex < BitWidth; ++BitIndex) {
       switch (bitAttrs[BitIndex]) {
@@ -1760,14 +1761,14 @@ bool FilterChooser::filterProcessor(bool AllowMixed, bool Greedy) {
   bool AllUseless = true;
   unsigned BestScore = 0;
 
-  for (unsigned i = 0, e = Filters.size(); i != e; ++i) {
-    unsigned Usefulness = Filters[i].usefulness();
+  for (const auto &[Idx, Filter] : enumerate(Filters)) {
+    unsigned Usefulness = Filter.usefulness();
 
     if (Usefulness)
       AllUseless = false;
 
     if (Usefulness > BestScore) {
-      BestIndex = i;
+      BestIndex = Idx;
       BestScore = Usefulness;
     }
   }
@@ -1892,8 +1893,7 @@ void parseVarLenInstOperand(const Record &Def,
   VarLenInst VLI(cast<DagInit>(RV->getValue()), RV);
   SmallVector<int> TiedTo;
 
-  for (unsigned Idx = 0; Idx < CGI.Operands.size(); ++Idx) {
-    auto &Op = CGI.Operands[Idx];
+  for (const auto &[Idx, Op] : enumerate(CGI.Operands)) {
     if (Op.MIOperandInfo && Op.MIOperandInfo->getNumArgs() > 0)
       for (auto *Arg : Op.MIOperandInfo->getArgs())
         Operands.push_back(getOpInfo(cast<DefInit>(Arg)->getDef()));
@@ -1909,7 +1909,7 @@ void parseVarLenInstOperand(const Record &Def,
   }
 
   unsigned CurrBitPos = 0;
-  for (auto &EncodingSegment : VLI) {
+  for (const auto &EncodingSegment : VLI) {
     unsigned Offset = 0;
     StringRef OpName;
 
@@ -2028,26 +2028,23 @@ populateInstruction(CodeGenTarget &Target, const Record &EncodingDef,
   std::vector<std::pair<Init *, StringRef>> InOutOperands;
   DagInit *Out = Def.getValueAsDag("OutOperandList");
   DagInit *In = Def.getValueAsDag("InOperandList");
-  for (unsigned i = 0; i < Out->getNumArgs(); ++i)
-    InOutOperands.push_back(std::pair(Out->getArg(i), Out->getArgNameStr(i)));
-  for (unsigned i = 0; i < In->getNumArgs(); ++i)
-    InOutOperands.push_back(std::pair(In->getArg(i), In->getArgNameStr(i)));
+  for (const auto &[Idx, Arg] : enumerate(Out->getArgs()))
+    InOutOperands.push_back(std::pair(Arg, Out->getArgNameStr(Idx)));
+  for (const auto &[Idx, Arg] : enumerate(In->getArgs()))
+    InOutOperands.push_back(std::pair(Arg, In->getArgNameStr(Idx)));
 
   // Search for tied operands, so that we can correctly instantiate
   // operands that are not explicitly represented in the encoding.
   std::map<std::string, std::string> TiedNames;
-  for (unsigned i = 0; i < CGI.Operands.size(); ++i) {
-    auto &Op = CGI.Operands[i];
-    for (unsigned j = 0; j < Op.Constraints.size(); ++j) {
-      const CGIOperandList::ConstraintInfo &CI = Op.Constraints[j];
+  for (const auto &[I, Op] : enumerate(CGI.Operands)) {
+    for (const auto &[J, CI] : enumerate(Op.Constraints)) {
       if (CI.isTied()) {
-        int tiedTo = CI.getTiedOperand();
         std::pair<unsigned, unsigned> SO =
-            CGI.Operands.getSubOperandNumber(tiedTo);
+            CGI.Operands.getSubOperandNumber(CI.getTiedOperand());
         std::string TiedName = CGI.Operands[SO.first].SubOpNames[SO.second];
         if (TiedName.empty())
           TiedName = CGI.Operands[SO.first].Name;
-        std::string MyName = Op.SubOpNames[j];
+        std::string MyName = Op.SubOpNames[J];
         if (MyName.empty())
           MyName = Op.Name;
 
@@ -2099,10 +2096,9 @@ populateInstruction(CodeGenTarget &Target, const Record &EncodingDef,
 
         // Decode each of the sub-ops separately.
         assert(SubOps && SubArgDag->getNumArgs() == SubOps->getNumArgs());
-        for (unsigned i = 0; i < SubOps->getNumArgs(); ++i) {
-          StringRef SubOpName = SubArgDag->getArgNameStr(i);
-          OperandInfo SubOpInfo =
-              getOpInfo(cast<DefInit>(SubOps->getArg(i))->getDef());
+        for (const auto &[I, Arg] : enumerate(SubOps->getArgs())) {
+          StringRef SubOpName = SubArgDag->getArgNameStr(I);
+          OperandInfo SubOpInfo = getOpInfo(cast<DefInit>(Arg)->getDef());
 
           addOneOperandFields(EncodingDef, Bits, TiedNames, SubOpName,
                               SubOpInfo);
@@ -2169,273 +2165,253 @@ populateInstruction(CodeGenTarget &Target, const Record &EncodingDef,
 // using the VS compiler. It has a bug which causes the function
 // to be optimized out in some circumstances. See llvm.org/pr38292
 static void emitFieldFromInstruction(formatted_raw_ostream &OS) {
-  OS << "// Helper functions for extracting fields from encoded instructions.\n"
-     << "// InsnType must either be integral or an APInt-like object that "
-        "must:\n"
-     << "// * be default-constructible and copy-constructible\n"
-     << "// * be constructible from an APInt (this can be private)\n"
-     << "// * Support insertBits(bits, startBit, numBits)\n"
-     << "// * Support extractBitsAsZExtValue(numBits, startBit)\n"
-     << "// * Support the ~, &, ==, and != operators with other objects of "
-        "the same type\n"
-     << "// * Support the != and bitwise & with uint64_t\n"
-     << "// * Support put (<<) to raw_ostream&\n"
-     << "template <typename InsnType>\n"
-     << "#if defined(_MSC_VER) && !defined(__clang__)\n"
-     << "__declspec(noinline)\n"
-     << "#endif\n"
-     << "static std::enable_if_t<std::is_integral<InsnType>::value, InsnType>\n"
-     << "fieldFromInstruction(const InsnType &insn, unsigned startBit,\n"
-     << "                     unsigned numBits) {\n"
-     << "  assert(startBit + numBits <= 64 && \"Cannot support >64-bit "
-        "extractions!\");\n"
-     << "  assert(startBit + numBits <= (sizeof(InsnType) * 8) &&\n"
-     << "         \"Instruction field out of bounds!\");\n"
-     << "  InsnType fieldMask;\n"
-     << "  if (numBits == sizeof(InsnType) * 8)\n"
-     << "    fieldMask = (InsnType)(-1LL);\n"
-     << "  else\n"
-     << "    fieldMask = (((InsnType)1 << numBits) - 1) << startBit;\n"
-     << "  return (insn & fieldMask) >> startBit;\n"
-     << "}\n"
-     << "\n"
-     << "template <typename InsnType>\n"
-     << "static std::enable_if_t<!std::is_integral<InsnType>::value, "
-        "uint64_t>\n"
-     << "fieldFromInstruction(const InsnType &insn, unsigned startBit,\n"
-     << "                     unsigned numBits) {\n"
-     << "  return insn.extractBitsAsZExtValue(numBits, startBit);\n"
-     << "}\n\n";
+  OS << R"(
+// Helper functions for extracting fields from encoded instructions.
+// InsnType must either be integral or an APInt-like object that must:
+// * be default-constructible and copy-constructible
+// * be constructible from an APInt (this can be private)
+// * Support insertBits(bits, startBit, numBits)
+// * Support extractBitsAsZExtValue(numBits, startBit)
+// * Support the ~, &, ==, and != operators with other objects of the same type
+// * Support the != and bitwise & with uint64_t
+// * Support put (<<) to raw_ostream&
+template <typename InsnType>
+#if defined(_MSC_VER) && !defined(__clang__)
+__declspec(noinline)
+#endif
+static std::enable_if_t<std::is_integral<InsnType>::value, InsnType>
+fieldFromInstruction(const InsnType &insn, unsigned startBit,
+                     unsigned numBits) {
+  assert(startBit + numBits <= 64 && "Cannot support >64-bit extractions!");
+  assert(startBit + numBits <= (sizeof(InsnType) * 8) &&
+         "Instruction field out of bounds!");
+  InsnType fieldMask;
+  if (numBits == sizeof(InsnType) * 8)
+    fieldMask = (InsnType)(-1LL);
+  else
+    fieldMask = (((InsnType)1 << numBits) - 1) << startBit;
+  return (insn & fieldMask) >> startBit;
+}
+
+template <typename InsnType>
+static std::enable_if_t<!std::is_integral<InsnType>::value, uint64_t>
+fieldFromInstruction(const InsnType &insn, unsigned startBit,
+                     unsigned numBits) {
+  return insn.extractBitsAsZExtValue(numBits, startBit);
+}
+)";
 }
 
 // emitInsertBits - Emit the templated helper function insertBits().
 static void emitInsertBits(formatted_raw_ostream &OS) {
-  OS << "// Helper function for inserting bits extracted from an encoded "
-        "instruction into\n"
-     << "// a field.\n"
-     << "template <typename InsnType>\n"
-     << "static std::enable_if_t<std::is_integral<InsnType>::value>\n"
-     << "insertBits(InsnType &field, InsnType bits, unsigned startBit, "
-        "unsigned numBits) {\n"
-     << "  assert(startBit + numBits <= sizeof field * 8);\n"
-     << "  field |= (InsnType)bits << startBit;\n"
-     << "}\n"
-     << "\n"
-     << "template <typename InsnType>\n"
-     << "static std::enable_if_t<!std::is_integral<InsnType>::value>\n"
-     << "insertBits(InsnType &field, uint64_t bits, unsigned startBit, "
-        "unsigned numBits) {\n"
-     << "  field.insertBits(bits, startBit, numBits);\n"
-     << "}\n\n";
+  OS << R"(
+// Helper function for inserting bits extracted from an encoded instruction into
+// a field.
+template <typename InsnType>
+static std::enable_if_t<std::is_integral<InsnType>::value>
+insertBits(InsnType &field, InsnType bits, unsigned startBit, unsigned numBits) {
+  assert(startBit + numBits <= sizeof field * 8);
+  field |= (InsnType)bits << startBit;
+}
+
+template <typename InsnType>
+static std::enable_if_t<!std::is_integral<InsnType>::value>
+insertBits(InsnType &field, uint64_t bits, unsigned startBit, unsigned numBits) {
+  field.insertBits(bits, startBit, numBits);
+}
+)";
 }
 
 // emitDecodeInstruction - Emit the templated helper function
 // decodeInstruction().
 static void emitDecodeInstruction(formatted_raw_ostream &OS,
                                   bool IsVarLenInst) {
-  OS << "template <typename InsnType>\n"
-     << "static DecodeStatus decodeInstruction(const uint8_t DecodeTable[], "
-        "MCInst &MI,\n"
-     << "                                      InsnType insn, uint64_t "
-        "Address,\n"
-     << "                                      const MCDisassembler *DisAsm,\n"
-     << "                                      const MCSubtargetInfo &STI";
+  OS << R"(
+template <typename InsnType>
+static DecodeStatus decodeInstruction(const uint8_t DecodeTable[], MCInst &MI,
+                                      InsnType insn, uint64_t Address,
+                                      const MCDisassembler *DisAsm,
+                                      const MCSubtargetInfo &STI)";
   if (IsVarLenInst) {
-    OS << ",\n"
-       << "                                      llvm::function_ref<void(APInt "
-          "&,"
-       << " uint64_t)> makeUp";
+    OS << ",\n                                      "
+          "llvm::function_ref<void(APInt &, uint64_t)> makeUp";
   }
-  OS << ") {\n"
-     << "  const FeatureBitset &Bits = STI.getFeatureBits();\n"
-     << "\n"
-     << "  const uint8_t *Ptr = DecodeTable;\n"
-     << "  uint64_t CurFieldValue = 0;\n"
-     << "  DecodeStatus S = MCDisassembler::Success;\n"
-     << "  while (true) {\n"
-     << "    ptrdiff_t Loc = Ptr - DecodeTable;\n"
-     << "    switch (*Ptr) {\n"
-     << "    default:\n"
-     << "      errs() << Loc << \": Unexpected decode table opcode!\\n\";\n"
-     << "      return MCDisassembler::Fail;\n"
-     << "    case MCD::OPC_ExtractField: {\n"
-     << "      // Decode the start value.\n"
-     << "      unsigned DecodedLen;\n"
-     << "      unsigned Start = decodeULEB128(++Ptr, &DecodedLen);\n"
-     << "      Ptr += DecodedLen;\n"
-     << "      unsigned Len = *Ptr++;\n";
+  OS << R"() {
+  const FeatureBitset &Bits = STI.getFeatureBits();
+
+  const uint8_t *Ptr = DecodeTable;
+  uint64_t CurFieldValue = 0;
+  DecodeStatus S = MCDisassembler::Success;
+  while (true) {
+    ptrdiff_t Loc = Ptr - DecodeTable;
+    switch (*Ptr) {
+    default:
+      errs() << Loc << ": Unexpected decode table opcode!\n";
+      return MCDisassembler::Fail;
+    case MCD::OPC_ExtractField: {
+      // Decode the start value.
+      unsigned DecodedLen;
+      unsigned Start = decodeULEB128(++Ptr, &DecodedLen);
+      Ptr += DecodedLen;
+      unsigned Len = *Ptr++;)";
   if (IsVarLenInst)
-    OS << "      makeUp(insn, Start + Len);\n";
-  OS << "      CurFieldValue = fieldFromInstruction(insn, Start, Len);\n"
-     << "      LLVM_DEBUG(dbgs() << Loc << \": OPC_ExtractField(\" << Start << "
-        "\", \"\n"
-     << "                   << Len << \"): \" << CurFieldValue << \"\\n\");\n"
-     << "      break;\n"
-     << "    }\n"
-     << "    case MCD::OPC_FilterValue: {\n"
-     << "      // Decode the field value.\n"
-     << "      unsigned Len;\n"
-     << "      uint64_t Val = decodeULEB128(++Ptr, &Len);\n"
-     << "      Ptr += Len;\n"
-     << "      // NumToSkip is a plain 24-bit integer.\n"
-     << "      unsigned NumToSkip = *Ptr++;\n"
-     << "      NumToSkip |= (*Ptr++) << 8;\n"
-     << "      NumToSkip |= (*Ptr++) << 16;\n"
-     << "\n"
-     << "      // Perform the filter operation.\n"
-     << "      if (Val != CurFieldValue)\n"
-     << "        Ptr += NumToSkip;\n"
-     << "      LLVM_DEBUG(dbgs() << Loc << \": OPC_FilterValue(\" << Val << "
-        "\", \" << NumToSkip\n"
-     << "                   << \"): \" << ((Val != CurFieldValue) ? \"FAIL:\" "
-        ": \"PASS:\")\n"
-     << "                   << \" continuing at \" << (Ptr - DecodeTable) << "
-        "\"\\n\");\n"
-     << "\n"
-     << "      break;\n"
-     << "    }\n"
-     << "    case MCD::OPC_CheckField: {\n"
-     << "      // Decode the start value.\n"
-     << "      unsigned Len;\n"
-     << "      unsigned Start = decodeULEB128(++Ptr, &Len);\n"
-     << "      Ptr += Len;\n"
-     << "      Len = *Ptr;\n";
+    OS << "\n      makeUp(insn, Start + Len);";
+  OS << R"(
+      CurFieldValue = fieldFromInstruction(insn, Start, Len);
+      LLVM_DEBUG(dbgs() << Loc << ": OPC_ExtractField(" << Start << ", "
+                   << Len << "): " << CurFieldValue << "\n");
+      break;
+    }
+    case MCD::OPC_FilterValue: {
+      // Decode the field value.
+      unsigned Len;
+      uint64_t Val = decodeULEB128(++Ptr, &Len);
+      Ptr += Len;
+      // NumToSkip is a plain 24-bit integer.
+      unsigned NumToSkip = *Ptr++;
+      NumToSkip |= (*Ptr++) << 8;
+      NumToSkip |= (*Ptr++) << 16;
+
+      // Perform the filter operation.
+      if (Val != CurFieldValue)
+        Ptr += NumToSkip;
+      LLVM_DEBUG(dbgs() << Loc << ": OPC_FilterValue(" << Val << ", " << NumToSkip
+                   << "): " << ((Val != CurFieldValue) ? "FAIL:" : "PASS:")
+                   << " continuing at " << (Ptr - DecodeTable) << "\n");
+
+      break;
+    }
+    case MCD::OPC_CheckField: {
+      // Decode the start value.
+      unsigned Len;
+      unsigned Start = decodeULEB128(++Ptr, &Len);
+      Ptr += Len;
+      Len = *Ptr;)";
   if (IsVarLenInst)
-    OS << "      makeUp(insn, Start + Len);\n";
-  OS << "      uint64_t FieldValue = fieldFromInstruction(insn, Start, Len);\n"
-     << "      // Decode the field value.\n"
-     << "      unsigned PtrLen = 0;\n"
-     << "      uint64_t ExpectedValue = decodeULEB128(++Ptr, &PtrLen);\n"
-     << "      Ptr += PtrLen;\n"
-     << "      // NumToSkip is a plain 24-bit integer.\n"
-     << "      unsigned NumToSkip = *Ptr++;\n"
-     << "      NumToSkip |= (*Ptr++) << 8;\n"
-     << "      NumToSkip |= (*Ptr++) << 16;\n"
-     << "\n"
-     << "      // If the actual and expected values don't match, skip.\n"
-     << "      if (ExpectedValue != FieldValue)\n"
-     << "        Ptr += NumToSkip;\n"
-     << "      LLVM_DEBUG(dbgs() << Loc << \": OPC_CheckField(\" << Start << "
-        "\", \"\n"
-     << "                   << Len << \", \" << ExpectedValue << \", \" << "
-        "NumToSkip\n"
-     << "                   << \"): FieldValue = \" << FieldValue << \", "
-        "ExpectedValue = \"\n"
-     << "                   << ExpectedValue << \": \"\n"
-     << "                   << ((ExpectedValue == FieldValue) ? \"PASS\\n\" : "
-        "\"FAIL\\n\"));\n"
-     << "      break;\n"
-     << "    }\n"
-     << "    case MCD::OPC_CheckPredicate: {\n"
-     << "      unsigned Len;\n"
-     << "      // Decode the Predicate Index value.\n"
-     << "      unsigned PIdx = decodeULEB128(++Ptr, &Len);\n"
-     << "      Ptr += Len;\n"
-     << "      // NumToSkip is a plain 24-bit integer.\n"
-     << "      unsigned NumToSkip = *Ptr++;\n"
-     << "      NumToSkip |= (*Ptr++) << 8;\n"
-     << "      NumToSkip |= (*Ptr++) << 16;\n"
-     << "      // Check the predicate.\n"
-     << "      bool Pred;\n"
-     << "      if (!(Pred = checkDecoderPredicate(PIdx, Bits)))\n"
-     << "        Ptr += NumToSkip;\n"
-     << "      (void)Pred;\n"
-     << "      LLVM_DEBUG(dbgs() << Loc << \": OPC_CheckPredicate(\" << PIdx "
-        "<< \"): \"\n"
-     << "            << (Pred ? \"PASS\\n\" : \"FAIL\\n\"));\n"
-     << "\n"
-     << "      break;\n"
-     << "    }\n"
-     << "    case MCD::OPC_Decode: {\n"
-     << "      unsigned Len;\n"
-     << "      // Decode the Opcode value.\n"
-     << "      unsigned Opc = decodeULEB128(++Ptr, &Len);\n"
-     << "      Ptr += Len;\n"
-     << "      unsigned DecodeIdx = decodeULEB128(Ptr, &Len);\n"
-     << "      Ptr += Len;\n"
-     << "\n"
-     << "      MI.clear();\n"
-     << "      MI.setOpcode(Opc);\n"
-     << "      bool DecodeComplete;\n";
+    OS << "\n      makeUp(insn, Start + Len);";
+  OS << R"(
+      uint64_t FieldValue = fieldFromInstruction(insn, Start, Len);
+      // Decode the field value.
+      unsigned PtrLen = 0;
+      uint64_t ExpectedValue = decodeULEB128(++Ptr, &PtrLen);
+      Ptr += PtrLen;
+      // NumToSkip is a plain 24-bit integer.
+      unsigned NumToSkip = *Ptr++;
+      NumToSkip |= (*Ptr++) << 8;
+      NumToSkip |= (*Ptr++) << 16;
+
+      // If the actual and expected values don't match, skip.
+      if (ExpectedValue != FieldValue)
+        Ptr += NumToSkip;
+      LLVM_DEBUG(dbgs() << Loc << ": OPC_CheckField(" << Start << ", "
+                   << Len << ", " << ExpectedValue << ", " << NumToSkip
+                   << "): FieldValue = " << FieldValue << ", ExpectedValue = "
+                   << ExpectedValue << ": "
+                   << ((ExpectedValue == FieldValue) ? "PASS\n" : "FAIL\n"));
+      break;
+    }
+    case MCD::OPC_CheckPredicate: {
+      unsigned Len;
+      // Decode the Predicate Index value.
+      unsigned PIdx = decodeULEB128(++Ptr, &Len);
+      Ptr += Len;
+      // NumToSkip is a plain 24-bit integer.
+      unsigned NumToSkip = *Ptr++;
+      NumToSkip |= (*Ptr++) << 8;
+      NumToSkip |= (*Ptr++) << 16;
+      // Check the predicate.
+      bool Pred;
+      if (!(Pred = checkDecoderPredicate(PIdx, Bits)))
+        Ptr += NumToSkip;
+      (void)Pred;
+      LLVM_DEBUG(dbgs() << Loc << ": OPC_CheckPredicate(" << PIdx << "): "
+            << (Pred ? "PASS\n" : "FAIL\n"));
+
+      break;
+    }
+    case MCD::OPC_Decode: {
+      unsigned Len;
+      // Decode the Opcode value.
+      unsigned Opc = decodeULEB128(++Ptr, &Len);
+      Ptr += Len;
+      unsigned DecodeIdx = decodeULEB128(Ptr, &Len);
+      Ptr += Len;
+
+      MI.clear();
+      MI.setOpcode(Opc);
+      bool DecodeComplete;)";
   if (IsVarLenInst) {
-    OS << "      Len = InstrLenTable[Opc];\n"
-       << "      makeUp(insn, Len);\n";
+    OS << "\n      Len = InstrLenTable[Opc];\n"
+       << "      makeUp(insn, Len);";
+  }
+  OS << R"(
+      S = decodeToMCInst(S, DecodeIdx, insn, MI, Address, DisAsm, DecodeComplete);
+      assert(DecodeComplete);
+
+      LLVM_DEBUG(dbgs() << Loc << ": OPC_Decode: opcode " << Opc
+                   << ", using decoder " << DecodeIdx << ": "
+                   << (S != MCDisassembler::Fail ? "PASS" : "FAIL") << "\n");
+      return S;
+    }
+    case MCD::OPC_TryDecode: {
+      unsigned Len;
+      // Decode the Opcode value.
+      unsigned Opc = decodeULEB128(++Ptr, &Len);
+      Ptr += Len;
+      unsigned DecodeIdx = decodeULEB128(Ptr, &Len);
+      Ptr += Len;
+      // NumToSkip is a plain 24-bit integer.
+      unsigned NumToSkip = *Ptr++;
+      NumToSkip |= (*Ptr++) << 8;
+      NumToSkip |= (*Ptr++) << 16;
+
+      // Perform the decode operation.
+      MCInst TmpMI;
+      TmpMI.setOpcode(Opc);
+      bool DecodeComplete;
+      S = decodeToMCInst(S, DecodeIdx, insn, TmpMI, Address, DisAsm, DecodeComplete);
+      LLVM_DEBUG(dbgs() << Loc << ": OPC_TryDecode: opcode " << Opc
+                   << ", using decoder " << DecodeIdx << ": ");
+
+      if (DecodeComplete) {
+        // Decoding complete.
+        LLVM_DEBUG(dbgs() << (S != MCDisassembler::Fail ? "PASS" : "FAIL") << "\n");
+        MI = TmpMI;
+        return S;
+      } else {
+        assert(S == MCDisassembler::Fail);
+        // If the decoding was incomplete, skip.
+        Ptr += NumToSkip;
+        LLVM_DEBUG(dbgs() << "FAIL: continuing at " << (Ptr - DecodeTable) << "\n");
+        // Reset decode status. This also drops a SoftFail status that could be
+        // set before the decode attempt.
+        S = MCDisassembler::Success;
+      }
+      break;
+    }
+    case MCD::OPC_SoftFail: {
+      // Decode the mask values.
+      unsigned Len;
+      uint64_t PositiveMask = decodeULEB128(++Ptr, &Len);
+      Ptr += Len;
+      uint64_t NegativeMask = decodeULEB128(Ptr, &Len);
+      Ptr += Len;
+      bool Fail = (insn & PositiveMask) != 0 || (~insn & NegativeMask) != 0;
+      if (Fail)
+        S = MCDisassembler::SoftFail;
+      LLVM_DEBUG(dbgs() << Loc << ": OPC_SoftFail: " << (Fail ? "FAIL\n" : "PASS\n"));
+      break;
+    }
+    case MCD::OPC_Fail: {
+      LLVM_DEBUG(dbgs() << Loc << ": OPC_Fail\n");
+      return MCDisassembler::Fail;
+    }
+    }
   }
-  OS << "      S = decodeToMCInst(S, DecodeIdx, insn, MI, Address, DisAsm, "
-        "DecodeComplete);\n"
-     << "      assert(DecodeComplete);\n"
-     << "\n"
-     << "      LLVM_DEBUG(dbgs() << Loc << \": OPC_Decode: opcode \" << Opc\n"
-     << "                   << \", using decoder \" << DecodeIdx << \": \"\n"
-     << "                   << (S != MCDisassembler::Fail ? \"PASS\" : "
-        "\"FAIL\") << \"\\n\");\n"
-     << "      return S;\n"
-     << "    }\n"
-     << "    case MCD::OPC_TryDecode: {\n"
-     << "      unsigned Len;\n"
-     << "      // Decode the Opcode value.\n"
-     << "      unsigned Opc = decodeULEB128(++Ptr, &Len);\n"
-     << "      Ptr += Len;\n"
-     << "      unsigned DecodeIdx = decodeULEB128(Ptr, &Len);\n"
-     << "      Ptr += Len;\n"
-     << "      // NumToSkip is a plain 24-bit integer.\n"
-     << "      unsigned NumToSkip = *Ptr++;\n"
-     << "      NumToSkip |= (*Ptr++) << 8;\n"
-     << "      NumToSkip |= (*Ptr++) << 16;\n"
-     << "\n"
-     << "      // Perform the decode operation.\n"
-     << "      MCInst TmpMI;\n"
-     << "      TmpMI.setOpcode(Opc);\n"
-     << "      bool DecodeComplete;\n"
-     << "      S = decodeToMCInst(S, DecodeIdx, insn, TmpMI, Address, DisAsm, "
-        "DecodeComplete);\n"
-     << "      LLVM_DEBUG(dbgs() << Loc << \": OPC_TryDecode: opcode \" << "
-        "Opc\n"
-     << "                   << \", using decoder \" << DecodeIdx << \": \");\n"
-     << "\n"
-     << "      if (DecodeComplete) {\n"
-     << "        // Decoding complete.\n"
-     << "        LLVM_DEBUG(dbgs() << (S != MCDisassembler::Fail ? \"PASS\" : "
-        "\"FAIL\") << \"\\n\");\n"
-     << "        MI = TmpMI;\n"
-     << "        return S;\n"
-     << "      } else {\n"
-     << "        assert(S == MCDisassembler::Fail);\n"
-     << "        // If the decoding was incomplete, skip.\n"
-     << "        Ptr += NumToSkip;\n"
-     << "        LLVM_DEBUG(dbgs() << \"FAIL: continuing at \" << (Ptr - "
-        "DecodeTable) << \"\\n\");\n"
-     << "        // Reset decode status. This also drops a SoftFail status "
-        "that could be\n"
-     << "        // set before the decode attempt.\n"
-     << "        S = MCDisassembler::Success;\n"
-     << "      }\n"
-     << "      break;\n"
-     << "    }\n"
-     << "    case MCD::OPC_SoftFail: {\n"
-     << "      // Decode the mask values.\n"
-     << "      unsigned Len;\n"
-     << "      uint64_t PositiveMask = decodeULEB128(++Ptr, &Len);\n"
-     << "      Ptr += Len;\n"
-     << "      uint64_t NegativeMask = decodeULEB128(Ptr, &Len);\n"
-     << "      Ptr += Len;\n"
-     << "      bool Fail = (insn & PositiveMask) != 0 || (~insn & "
-        "NegativeMask) != 0;\n"
-     << "      if (Fail)\n"
-     << "        S = MCDisassembler::SoftFail;\n"
-     << "      LLVM_DEBUG(dbgs() << Loc << \": OPC_SoftFail: \" << (Fail ? "
-        "\"FAIL\\n\" : \"PASS\\n\"));\n"
-     << "      break;\n"
-     << "    }\n"
-     << "    case MCD::OPC_Fail: {\n"
-     << "      LLVM_DEBUG(dbgs() << Loc << \": OPC_Fail\\n\");\n"
-     << "      return MCDisassembler::Fail;\n"
-     << "    }\n"
-     << "    }\n"
-     << "  }\n"
-     << "  llvm_unreachable(\"bogosity detected in disassembler state "
-        "machine!\");\n"
-     << "}\n\n";
+  llvm_unreachable("bogosity detected in disassembler state machine!");
+}
+
+)";
 }
 
 // Helper to propagate SoftFail status. Returns false if the status is Fail;
@@ -2443,10 +2419,13 @@ static void emitDecodeInstruction(formatted_raw_ostream &OS,
 // is correct to propagate the values of this enum; see comment on 'enum
 // DecodeStatus'.)
 static void emitCheck(formatted_raw_ostream &OS) {
-  OS << "static bool Check(DecodeStatus &Out, DecodeStatus In) {\n"
-     << "  Out = static_cast<DecodeStatus>(Out & In);\n"
-     << "  return Out != MCDisassembler::Fail;\n"
-     << "}\n\n";
+  OS << R"(
+static bool Check(DecodeStatus &Out, DecodeStatus In) {
+  Out = static_cast<DecodeStatus>(Out & In);
+  return Out != MCDisassembler::Fail;
+}
+
+)";
 }
 
 // Collect all HwModes referenced by the target for encoding purposes,
@@ -2469,16 +2448,18 @@ collectHwModesReferencedForEncodings(const CodeGenHwModes &HWM,
 // Emits disassembler code for instruction decoding.
 void DecoderEmitter::run(raw_ostream &o) {
   formatted_raw_ostream OS(o);
-  OS << "#include \"llvm/MC/MCInst.h\"\n";
-  OS << "#include \"llvm/MC/MCSubtargetInfo.h\"\n";
-  OS << "#include \"llvm/Support/DataTypes.h\"\n";
-  OS << "#include \"llvm/Support/Debug.h\"\n";
-  OS << "#include \"llvm/Support/LEB128.h\"\n";
-  OS << "#include \"llvm/Support/raw_ostream.h\"\n";
-  OS << "#include \"llvm/TargetParser/SubtargetFeature.h\"\n";
-  OS << "#include <assert.h>\n";
-  OS << '\n';
-  OS << "namespace llvm {\n\n";
+  OS << R"(
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/DataTypes.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/LEB128.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/TargetParser/SubtargetFeature.h"
+#include <assert.h>
+
+namespace llvm {
+)";
 
   emitFieldFromInstruction(OS);
   emitInsertBits(OS);
@@ -2533,9 +2514,9 @@ void DecoderEmitter::run(raw_ostream &o) {
   bool IsVarLenInst = Target.hasVariableLengthEncodings();
   unsigned MaxInstLen = 0;
 
-  for (unsigned i = 0; i < NumberedEncodings.size(); ++i) {
-    const Record *EncodingDef = NumberedEncodings[i].EncodingDef;
-    const CodeGenInstruction *Inst = NumberedEncodings[i].Inst;
+  for (const auto &[NEI, NumberedEncoding] : enumerate(NumberedEncodings)) {
+    const Record *EncodingDef = NumberedEncoding.EncodingDef;
+    const CodeGenInstruction *Inst = NumberedEncoding.Inst;
     const Record *Def = Inst->TheDef;
     unsigned Size = EncodingDef->getValueAsInt("Size");
     if (Def->getValueAsString("Namespace") == "TargetOpcode" ||
@@ -2546,7 +2527,7 @@ void DecoderEmitter::run(raw_ostream &o) {
       continue;
     }
 
-    if (i < NumberedInstructions.size())
+    if (NEI < NumberedInstructions.size())
       NumInstructions++;
     NumEncodings++;
 
@@ -2556,19 +2537,19 @@ void DecoderEmitter::run(raw_ostream &o) {
     if (IsVarLenInst)
       InstrLen.resize(NumberedInstructions.size(), 0);
 
-    if (unsigned Len = populateInstruction(Target, *EncodingDef, *Inst, i,
+    if (unsigned Len = populateInstruction(Target, *EncodingDef, *Inst, NEI,
                                            Operands, IsVarLenInst)) {
       if (IsVarLenInst) {
         MaxInstLen = std::max(MaxInstLen, Len);
-        InstrLen[i] = Len;
+        InstrLen[NEI] = Len;
       }
       std::string DecoderNamespace =
           std::string(EncodingDef->getValueAsString("DecoderNamespace"));
-      if (!NumberedEncodings[i].HwModeName.empty())
+      if (!NumberedEncoding.HwModeName.empty())
         DecoderNamespace +=
-            std::string("_") + NumberedEncodings[i].HwModeName.str();
+            std::string("_") + NumberedEncoding.HwModeName.str();
       OpcMap[std::pair(DecoderNamespace, Size)].emplace_back(
-          i, Target.getInstrIntValue(Def));
+          NEI, Target.getInstrIntValue(Def));
     } else {
       NumEncodingsOmitted++;
     }
-- 
cgit v1.2.3


From 498b7d2f86b4bceb381e66b093670c7ec4bc6cc4 Mon Sep 17 00:00:00 2001
From: Changpeng Fang <changpeng.fang@amd.com>
Date: Tue, 12 Mar 2024 14:02:20 -0700
Subject: AMDGPU: Copy TSFlags from Pseudo to DS_Real (#84977)

We need TSFalgs from pseudo to real.
---
 llvm/lib/Target/AMDGPU/DSInstructions.td | 1 +
 1 file changed, 1 insertion(+)

diff --git a/llvm/lib/Target/AMDGPU/DSInstructions.td b/llvm/lib/Target/AMDGPU/DSInstructions.td
index cc763df5a476..87ace01a6d0e 100644
--- a/llvm/lib/Target/AMDGPU/DSInstructions.td
+++ b/llvm/lib/Target/AMDGPU/DSInstructions.td
@@ -65,6 +65,7 @@ class DS_Real <DS_Pseudo ps, string opName = ps.Mnemonic> :
   let SubtargetPredicate = ps.SubtargetPredicate;
   let WaveSizePredicate  = ps.WaveSizePredicate;
   let OtherPredicates    = ps.OtherPredicates;
+  let TSFlags            = ps.TSFlags;
   let SchedRW            = ps.SchedRW;
   let mayLoad            = ps.mayLoad;
   let mayStore           = ps.mayStore;
-- 
cgit v1.2.3


From 1a6ec906fb3781c2fc98979ec37a2a76479b0b08 Mon Sep 17 00:00:00 2001
From: Daniel Paoliello <danpao@microsoft.com>
Date: Tue, 12 Mar 2024 14:10:49 -0700
Subject: [Arm64EC] Copy import descriptors to the EC Map (#84834)

As noted in <https://github.com/llvm/llvm-project/pull/78537>, MSVC
places import descriptors in both the EC and regular map - that PR moved
the descriptors to ONLY the regular map, however this causes linking
errors when linking as Arm64EC:

```
bcryptprimitives.lib(bcryptprimitives.dll) : error LNK2001: unresolved external symbol __IMPORT_DESCRIPTOR_bcryptprimitives (EC Symbol)
```

This change copies import descriptors from the regular map to the EC
map, which fixes this linking error.
---
 llvm/include/llvm/Object/COFFImportFile.h    |  6 ++++++
 llvm/lib/Object/ArchiveWriter.cpp            | 11 +++++++++++
 llvm/lib/Object/COFFImportFile.cpp           | 10 ++++------
 llvm/test/tools/llvm-dlltool/arm64ec.test    |  6 ++++++
 llvm/test/tools/llvm-lib/arm64ec-implib.test | 12 ++++++++++++
 5 files changed, 39 insertions(+), 6 deletions(-)

diff --git a/llvm/include/llvm/Object/COFFImportFile.h b/llvm/include/llvm/Object/COFFImportFile.h
index 402ded0d64fe..7268faa87eb7 100644
--- a/llvm/include/llvm/Object/COFFImportFile.h
+++ b/llvm/include/llvm/Object/COFFImportFile.h
@@ -26,6 +26,12 @@
 namespace llvm {
 namespace object {
 
+constexpr std::string_view ImportDescriptorPrefix = "__IMPORT_DESCRIPTOR_";
+constexpr std::string_view NullImportDescriptorSymbolName =
+    "__NULL_IMPORT_DESCRIPTOR";
+constexpr std::string_view NullThunkDataPrefix = "\x7f";
+constexpr std::string_view NullThunkDataSuffix = "_NULL_THUNK_DATA";
+
 class COFFImportFile : public SymbolicFile {
 private:
   enum SymbolIndex { ImpSymbol, ThunkSymbol, ECAuxSymbol, ECThunkSymbol };
diff --git a/llvm/lib/Object/ArchiveWriter.cpp b/llvm/lib/Object/ArchiveWriter.cpp
index be51093933a8..e0629747b40c 100644
--- a/llvm/lib/Object/ArchiveWriter.cpp
+++ b/llvm/lib/Object/ArchiveWriter.cpp
@@ -677,6 +677,13 @@ static bool isECObject(object::SymbolicFile &Obj) {
   return false;
 }
 
+bool isImportDescriptor(StringRef Name) {
+  return Name.starts_with(ImportDescriptorPrefix) ||
+         Name == StringRef{NullImportDescriptorSymbolName} ||
+         (Name.starts_with(NullThunkDataPrefix) &&
+          Name.ends_with(NullThunkDataSuffix));
+}
+
 static Expected<std::vector<unsigned>> getSymbols(SymbolicFile *Obj,
                                                   uint16_t Index,
                                                   raw_ostream &SymNames,
@@ -704,6 +711,10 @@ static Expected<std::vector<unsigned>> getSymbols(SymbolicFile *Obj,
       if (Map == &SymMap->Map) {
         Ret.push_back(SymNames.tell());
         SymNames << Name << '\0';
+        // If EC is enabled, then the import descriptors are NOT put into EC
+        // objects so we need to copy them to the EC map manually.
+        if (SymMap->UseECMap && isImportDescriptor(Name))
+          SymMap->ECMap[Name] = Index;
       }
     } else {
       Ret.push_back(SymNames.tell());
diff --git a/llvm/lib/Object/COFFImportFile.cpp b/llvm/lib/Object/COFFImportFile.cpp
index 376dd126baf6..46c8e702581e 100644
--- a/llvm/lib/Object/COFFImportFile.cpp
+++ b/llvm/lib/Object/COFFImportFile.cpp
@@ -108,7 +108,7 @@ template <class T> static void append(std::vector<uint8_t> &B, const T &Data) {
 }
 
 static void writeStringTable(std::vector<uint8_t> &B,
-                             ArrayRef<const std::string> Strings) {
+                             ArrayRef<const std::string_view> Strings) {
   // The COFF string table consists of a 4-byte value which is the size of the
   // table, including the length field itself.  This value is followed by the
   // string content itself, which is an array of null-terminated C-style
@@ -171,9 +171,6 @@ static Expected<std::string> replace(StringRef S, StringRef From,
   return (Twine(S.substr(0, Pos)) + To + S.substr(Pos + From.size())).str();
 }
 
-static const std::string NullImportDescriptorSymbolName =
-    "__NULL_IMPORT_DESCRIPTOR";
-
 namespace {
 // This class constructs various small object files necessary to support linking
 // symbols imported from a DLL.  The contents are pretty strictly defined and
@@ -192,8 +189,9 @@ class ObjectFactory {
 public:
   ObjectFactory(StringRef S, MachineTypes M)
       : NativeMachine(M), ImportName(S), Library(llvm::sys::path::stem(S)),
-        ImportDescriptorSymbolName(("__IMPORT_DESCRIPTOR_" + Library).str()),
-        NullThunkSymbolName(("\x7f" + Library + "_NULL_THUNK_DATA").str()) {}
+        ImportDescriptorSymbolName((ImportDescriptorPrefix + Library).str()),
+        NullThunkSymbolName(
+            (NullThunkDataPrefix + Library + NullThunkDataSuffix).str()) {}
 
   // Creates an Import Descriptor.  This is a small object file which contains a
   // reference to the terminators and contains the library name (entry) for the
diff --git a/llvm/test/tools/llvm-dlltool/arm64ec.test b/llvm/test/tools/llvm-dlltool/arm64ec.test
index e742a77ff78a..b03b4eaf7b2d 100644
--- a/llvm/test/tools/llvm-dlltool/arm64ec.test
+++ b/llvm/test/tools/llvm-dlltool/arm64ec.test
@@ -12,9 +12,12 @@ ARMAP-NEXT: test_NULL_THUNK_DATA in test.dll
 ARMAP-EMPTY:
 ARMAP-NEXT: Archive EC map
 ARMAP-NEXT: #func in test.dll
+ARMAP-NEXT: __IMPORT_DESCRIPTOR_test in test.dll
+ARMAP-NEXT: __NULL_IMPORT_DESCRIPTOR in test.dll
 ARMAP-NEXT: __imp_aux_func in test.dll
 ARMAP-NEXT: __imp_func in test.dll
 ARMAP-NEXT: func in test.dll
+ARMAP-NEXT: test_NULL_THUNK_DATA in test.dll
 
 RUN: llvm-dlltool -m arm64ec -d test.def -N test2.def -l test2.lib
 RUN: llvm-nm --print-armap test2.lib | FileCheck --check-prefix=ARMAP2 %s
@@ -28,9 +31,12 @@ ARMAP2-NEXT: test_NULL_THUNK_DATA in test.dll
 ARMAP2-EMPTY:
 ARMAP2-NEXT: Archive EC map
 ARMAP2-NEXT: #func in test.dll
+ARMAP2-NEXT: __IMPORT_DESCRIPTOR_test in test.dll
+ARMAP2-NEXT: __NULL_IMPORT_DESCRIPTOR in test.dll
 ARMAP2-NEXT: __imp_aux_func in test.dll
 ARMAP2-NEXT: __imp_func in test.dll
 ARMAP2-NEXT: func in test.dll
+ARMAP2-NEXT: test_NULL_THUNK_DATA in test.dll
 
 RUN: llvm-dlltool -m arm64ec -d test.def --input-native-def test2.def -l test3.lib
 RUN: llvm-nm --print-armap test3.lib | FileCheck --check-prefix=ARMAP2 %s
diff --git a/llvm/test/tools/llvm-lib/arm64ec-implib.test b/llvm/test/tools/llvm-lib/arm64ec-implib.test
index 77bdc23589fd..00eddd2a4752 100644
--- a/llvm/test/tools/llvm-lib/arm64ec-implib.test
+++ b/llvm/test/tools/llvm-lib/arm64ec-implib.test
@@ -16,6 +16,8 @@ ARMAP-NEXT: #funcexp in test.dll
 ARMAP-NEXT: #mangledfunc in test.dll
 ARMAP-NEXT: ?test_cpp_func@@$$hYAHPEAX@Z in test.dll
 ARMAP-NEXT: ?test_cpp_func@@YAHPEAX@Z in test.dll
+ARMAP-NEXT: __IMPORT_DESCRIPTOR_test in test.dll
+ARMAP-NEXT: __NULL_IMPORT_DESCRIPTOR in test.dll
 ARMAP-NEXT: __imp_?test_cpp_func@@YAHPEAX@Z in test.dll
 ARMAP-NEXT: __imp_aux_?test_cpp_func@@YAHPEAX@Z in test.dll
 ARMAP-NEXT: __imp_aux_expname in test.dll
@@ -28,6 +30,7 @@ ARMAP-NEXT: __imp_mangledfunc in test.dll
 ARMAP-NEXT: expname in test.dll
 ARMAP-NEXT: funcexp in test.dll
 ARMAP-NEXT: mangledfunc in test.dll
+ARMAP-NEXT: test_NULL_THUNK_DATA in test.dll
 
 RUN: llvm-readobj test.lib | FileCheck -check-prefix=READOBJ %s
 
@@ -122,6 +125,8 @@ ARMAPX-NEXT: #funcexp in test.dll
 ARMAPX-NEXT: #mangledfunc in test.dll
 ARMAPX-NEXT: ?test_cpp_func@@$$hYAHPEAX@Z in test.dll
 ARMAPX-NEXT: ?test_cpp_func@@YAHPEAX@Z in test.dll
+ARMAPX-NEXT: __IMPORT_DESCRIPTOR_test in test.dll
+ARMAPX-NEXT: __NULL_IMPORT_DESCRIPTOR in test.dll
 ARMAPX-NEXT: __imp_?test_cpp_func@@YAHPEAX@Z in test.dll
 ARMAPX-NEXT: __imp_aux_?test_cpp_func@@YAHPEAX@Z in test.dll
 ARMAPX-NEXT: __imp_aux_expname in test.dll
@@ -134,6 +139,7 @@ ARMAPX-NEXT: __imp_mangledfunc in test.dll
 ARMAPX-NEXT: expname in test.dll
 ARMAPX-NEXT: funcexp in test.dll
 ARMAPX-NEXT: mangledfunc in test.dll
+ARMAPX-NEXT: test_NULL_THUNK_DATA in test.dll
 
 RUN: llvm-readobj testx.lib | FileCheck -check-prefix=READOBJX %s
 
@@ -255,6 +261,8 @@ ARMAPX2-NEXT: #funcexp in test2.dll
 ARMAPX2-NEXT: #mangledfunc in test2.dll
 ARMAPX2-NEXT: ?test_cpp_func@@$$hYAHPEAX@Z in test2.dll
 ARMAPX2-NEXT: ?test_cpp_func@@YAHPEAX@Z in test2.dll
+ARMAPX2-NEXT: __IMPORT_DESCRIPTOR_test2 in test2.dll
+ARMAPX2-NEXT: __NULL_IMPORT_DESCRIPTOR in test2.dll
 ARMAPX2-NEXT: __imp_?test_cpp_func@@YAHPEAX@Z in test2.dll
 ARMAPX2-NEXT: __imp_aux_?test_cpp_func@@YAHPEAX@Z in test2.dll
 ARMAPX2-NEXT: __imp_aux_expname in test2.dll
@@ -267,6 +275,7 @@ ARMAPX2-NEXT: __imp_mangledfunc in test2.dll
 ARMAPX2-NEXT: expname in test2.dll
 ARMAPX2-NEXT: funcexp in test2.dll
 ARMAPX2-NEXT: mangledfunc in test2.dll
+ARMAPX2-NEXT: test2_NULL_THUNK_DATA in test2.dll
 
 ARMAPX2:      test2.dll:
 ARMAPX2:      00000000 T #funcexp
@@ -309,6 +318,8 @@ EXPAS-ARMAP-NEXT: #func1 in test.dll
 EXPAS-ARMAP-NEXT: #func2 in test.dll
 EXPAS-ARMAP-NEXT: #func3 in test.dll
 EXPAS-ARMAP-NEXT: #func4 in test.dll
+EXPAS-ARMAP-NEXT: __IMPORT_DESCRIPTOR_test in test.dll
+EXPAS-ARMAP-NEXT: __NULL_IMPORT_DESCRIPTOR in test.dll
 EXPAS-ARMAP-NEXT: __imp_aux_func1 in test.dll
 EXPAS-ARMAP-NEXT: __imp_aux_func2 in test.dll
 EXPAS-ARMAP-NEXT: __imp_aux_func3 in test.dll
@@ -323,6 +334,7 @@ EXPAS-ARMAP-NEXT: func1 in test.dll
 EXPAS-ARMAP-NEXT: func2 in test.dll
 EXPAS-ARMAP-NEXT: func3 in test.dll
 EXPAS-ARMAP-NEXT: func4 in test.dll
+EXPAS-ARMAP-NEXT: test_NULL_THUNK_DATA in test.dll
 
 EXPAS-READOBJ:      File: test.dll
 EXPAS-READOBJ-NEXT: Format: COFF-import-file-ARM64EC
-- 
cgit v1.2.3


From d73c2d5df21735805a1f46a85790db64c0615e1c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Stefan=20Gr=C3=A4nitz?= <stefan.graenitz@gmail.com>
Date: Tue, 12 Mar 2024 23:12:46 +0100
Subject: Fix unittest after #84460: only applicable if the platform supports
 JIT

---
 .../Interpreter/InterpreterExtensionsTest.cpp      | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/clang/unittests/Interpreter/InterpreterExtensionsTest.cpp b/clang/unittests/Interpreter/InterpreterExtensionsTest.cpp
index f1c3d65ab0a9..77fd1b4e1981 100644
--- a/clang/unittests/Interpreter/InterpreterExtensionsTest.cpp
+++ b/clang/unittests/Interpreter/InterpreterExtensionsTest.cpp
@@ -17,7 +17,9 @@
 #include "clang/Sema/Lookup.h"
 #include "clang/Sema/Sema.h"
 
+#include "llvm/ExecutionEngine/Orc/LLJIT.h"
 #include "llvm/Support/Error.h"
+#include "llvm/Support/TargetSelect.h"
 #include "llvm/Testing/Support/Error.h"
 
 #include "gmock/gmock.h"
@@ -27,6 +29,22 @@
 using namespace clang;
 namespace {
 
+static bool HostSupportsJit() {
+  auto J = llvm::orc::LLJITBuilder().create();
+  if (J)
+    return true;
+  LLVMConsumeError(llvm::wrap(J.takeError()));
+  return false;
+}
+
+struct LLVMInitRAII {
+  LLVMInitRAII() {
+    llvm::InitializeNativeTarget();
+    llvm::InitializeNativeTargetAsmPrinter();
+  }
+  ~LLVMInitRAII() { llvm::llvm_shutdown(); }
+} LLVMInit;
+
 class TestCreateResetExecutor : public Interpreter {
 public:
   TestCreateResetExecutor(std::unique_ptr<CompilerInstance> CI,
@@ -39,6 +57,10 @@ public:
 };
 
 TEST(InterpreterExtensionsTest, ExecutorCreateReset) {
+  // Make sure we can create the executer on the platform.
+  if (!HostSupportsJit())
+    GTEST_SKIP();
+
   clang::IncrementalCompilerBuilder CB;
   llvm::Error ErrOut = llvm::Error::success();
   TestCreateResetExecutor Interp(cantFail(CB.CreateCpp()), ErrOut);
-- 
cgit v1.2.3


From 76f3a084e77991cffbb8108959457ffd75f8e9c8 Mon Sep 17 00:00:00 2001
From: Mehdi Amini <joker.eph@gmail.com>
Date: Tue, 12 Mar 2024 15:18:47 -0700
Subject: Update GettingStarted.rst doc with negative refspec to filter user
 branches (#75015)

This allows to keep fetching release branches as well.
---
 llvm/docs/GettingStarted.rst | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/llvm/docs/GettingStarted.rst b/llvm/docs/GettingStarted.rst
index 705f6427d9ed..7ecef78c405b 100644
--- a/llvm/docs/GettingStarted.rst
+++ b/llvm/docs/GettingStarted.rst
@@ -42,10 +42,14 @@ Getting the Source Code and Building LLVM
 
      ``git clone --depth 1 https://github.com/llvm/llvm-project.git``
 
-   * You are likely only interested in the main branch moving forward, if
-     you don't want `git fetch` (or `git pull`) to download user branches, use:
+   * You are likely not interested in the user branches in the repo (used for
+     stacked pull-requests and reverts), you can filter them from your
+     `git fetch` (or `git pull`) with this configuration:
 
-     ``sed 's#fetch = +refs/heads/\*:refs/remotes/origin/\*#fetch = +refs/heads/main:refs/remotes/origin/main#' -i llvm-project/.git/config``
+.. code-block:: console
+
+  git config --add remote.origin.fetch '^refs/heads/users/*'
+  git config --add remote.origin.fetch '^refs/heads/revert-*'
 
 #. Configure and build LLVM and Clang:
 
-- 
cgit v1.2.3


From beb47e78be6a819b6501f99302c1c4c1ae84b90e Mon Sep 17 00:00:00 2001
From: Michael Buch <michaelbuch12@gmail.com>
Date: Tue, 12 Mar 2024 22:19:09 +0000
Subject: [clang][CodeCompletion] Allow debuggers to code-complete reserved
 identifiers (#84891)

---
 clang/lib/Sema/SemaCodeComplete.cpp       | 4 ++++
 clang/test/CodeCompletion/ordinary-name.c | 7 ++++++-
 2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/clang/lib/Sema/SemaCodeComplete.cpp b/clang/lib/Sema/SemaCodeComplete.cpp
index 8d7523900940..73e6baa52782 100644
--- a/clang/lib/Sema/SemaCodeComplete.cpp
+++ b/clang/lib/Sema/SemaCodeComplete.cpp
@@ -764,6 +764,10 @@ getRequiredQualification(ASTContext &Context, const DeclContext *CurContext,
 // Filter out names reserved for the implementation if they come from a
 // system header.
 static bool shouldIgnoreDueToReservedName(const NamedDecl *ND, Sema &SemaRef) {
+  // Debuggers want access to all identifiers, including reserved ones.
+  if (SemaRef.getLangOpts().DebuggerSupport)
+    return false;
+
   ReservedIdentifierStatus Status = ND->isReserved(SemaRef.getLangOpts());
   // Ignore reserved names for compiler provided decls.
   if (isReservedInAllContexts(Status) && ND->getLocation().isInvalid())
diff --git a/clang/test/CodeCompletion/ordinary-name.c b/clang/test/CodeCompletion/ordinary-name.c
index c8181a248daa..939856192061 100644
--- a/clang/test/CodeCompletion/ordinary-name.c
+++ b/clang/test/CodeCompletion/ordinary-name.c
@@ -5,6 +5,7 @@ typedef struct t _TYPEDEF;
 void foo() {
   int y;
   // RUN: %clang_cc1 -isystem %S/Inputs -fsyntax-only -code-completion-at=%s:%(line-1):9 %s -o - | FileCheck -check-prefix=CHECK-CC1 %s
+  // CHECK-CC1-NOT: __builtin_va_list
   // CHECK-CC1-NOT: __INTEGER_TYPE
   // CHECK-CC1: _Imaginary
   // CHECK-CC1: _MyPrivateType
@@ -15,4 +16,8 @@ void foo() {
   // CHECK-CC1: y
 
   // PR8744
-  // RUN: %clang_cc1 -isystem %S/Inputs -fsyntax-only -code-completion-at=%s:%(line-17):11 %s
+  // RUN: %clang_cc1 -isystem %S/Inputs -fsyntax-only -code-completion-at=%s:%(line-18):11 %s
+  
+  // RUN: %clang_cc1 -isystem %S/Inputs -fsyntax-only -fdebugger-support -code-completion-at=%s:%(line-15):9 %s -o - | FileCheck -check-prefix=CHECK-DBG %s
+  // CHECK-DBG: __builtin_va_list
+  // CHECK-DBG: __INTEGER_TYPE
-- 
cgit v1.2.3


From 88bf64097e453deca73c91ec7de1af7eebe296a9 Mon Sep 17 00:00:00 2001
From: Michael Buch <michaelbuch12@gmail.com>
Date: Tue, 12 Mar 2024 22:19:27 +0000
Subject: [lldb][test] TestExprCompletion.py: add tests for completion of
 reserved identifiers (#84890)

---
 lldb/test/API/commands/expression/completion/Makefile             | 1 +
 .../test/API/commands/expression/completion/TestExprCompletion.py | 5 +++++
 lldb/test/API/commands/expression/completion/main.cpp             | 5 +++++
 lldb/test/API/commands/expression/completion/sys/reserved.h       | 8 ++++++++
 4 files changed, 19 insertions(+)
 create mode 100644 lldb/test/API/commands/expression/completion/sys/reserved.h

diff --git a/lldb/test/API/commands/expression/completion/Makefile b/lldb/test/API/commands/expression/completion/Makefile
index 020dce7c31d1..9882622b2189 100644
--- a/lldb/test/API/commands/expression/completion/Makefile
+++ b/lldb/test/API/commands/expression/completion/Makefile
@@ -1,3 +1,4 @@
 CXX_SOURCES := main.cpp other.cpp
+CXXFLAGS += -isystem $(SRCDIR)/sys
 
 include Makefile.rules
diff --git a/lldb/test/API/commands/expression/completion/TestExprCompletion.py b/lldb/test/API/commands/expression/completion/TestExprCompletion.py
index c6a1e3c0f422..022b9436ee8e 100644
--- a/lldb/test/API/commands/expression/completion/TestExprCompletion.py
+++ b/lldb/test/API/commands/expression/completion/TestExprCompletion.py
@@ -246,6 +246,11 @@ class CommandLineExprCompletionTestCase(TestBase):
             "expr some_expr.Self(). FooNoArgs", "expr some_expr.Self(). FooNoArgsBar()"
         )
 
+        self.complete_from_to("expr myVec.__f", "expr myVec.__func()")
+        self.complete_from_to("expr myVec._F", "expr myVec._Func()")
+        self.complete_from_to("expr myVec.__m", "expr myVec.__mem")
+        self.complete_from_to("expr myVec._M", "expr myVec._Mem")
+
     def test_expr_completion_with_descriptions(self):
         self.build()
         self.main_source = "main.cpp"
diff --git a/lldb/test/API/commands/expression/completion/main.cpp b/lldb/test/API/commands/expression/completion/main.cpp
index 908bebbebff5..5e03805a7a4d 100644
--- a/lldb/test/API/commands/expression/completion/main.cpp
+++ b/lldb/test/API/commands/expression/completion/main.cpp
@@ -1,3 +1,5 @@
+#include <reserved.h>
+
 namespace LongNamespaceName { class NestedClass { long m; }; }
 
 // Defined in other.cpp, we only have a forward declaration here.
@@ -31,5 +33,8 @@ int main()
     some_expr.FooNumbersBar1();
     Expr::StaticMemberMethodBar();
     ForwardDecl *fwd_decl_ptr = &fwd_decl;
+    MyVec myVec;
+    myVec.__func();
+    myVec._Func();
     return 0; // Break here
 }
diff --git a/lldb/test/API/commands/expression/completion/sys/reserved.h b/lldb/test/API/commands/expression/completion/sys/reserved.h
new file mode 100644
index 000000000000..0ce10ebec62b
--- /dev/null
+++ b/lldb/test/API/commands/expression/completion/sys/reserved.h
@@ -0,0 +1,8 @@
+class MyVec {
+  int __mem;
+  int _Mem;
+
+public:
+  void __func() {}
+  void _Func() {}
+};
-- 
cgit v1.2.3


From 47625e47db1d8fef6936ef48103e9aeb1fa3d328 Mon Sep 17 00:00:00 2001
From: Dave Clausen <daveclausen@gmail.com>
Date: Tue, 12 Mar 2024 18:36:48 -0400
Subject: Fix race in the implementation of __tsan_acquire() (#84923)

`__tsan::Acquire()`, which is called by `__tsan_acquire()`, has a
performance optimization which attempts to avoid acquiring the atomic
variable's mutex if the variable has no associated memory model state.

However, if the atomic variable was recently written to by a
`compare_exchange_weak/strong` on another thread, the memory model state
may be created *after* the atomic variable is updated. This is a data
race, and can cause the thread calling `Acquire()` to not realize that
the atomic variable was previously written to by another thread.

Specifically, if you have code that writes to an atomic variable using
`compare_exchange_weak/strong`, and then in another thread you read the
value using a relaxed load, followed by an
`atomic_thread_fence(memory_order_acquire)`, followed by a call to
`__tsan_acquire()`, TSAN may not realize that the store happened before
the fence, and so it will complain about any other variables you access
from both threads if the thread-safety of those accesses depended on the
happens-before relationship between the store and the fence.

This change eliminates the unsafe optimization in `Acquire()`. Now,
`Acquire()` acquires the mutex before checking for the existence of the
memory model state.
---
 compiler-rt/lib/tsan/rtl/tsan_rtl_mutex.cpp        |  2 +-
 .../test/tsan/compare_exchange_acquire_fence.cpp   | 43 ++++++++++++++++++++++
 2 files changed, 44 insertions(+), 1 deletion(-)
 create mode 100644 compiler-rt/test/tsan/compare_exchange_acquire_fence.cpp

diff --git a/compiler-rt/lib/tsan/rtl/tsan_rtl_mutex.cpp b/compiler-rt/lib/tsan/rtl/tsan_rtl_mutex.cpp
index 2e978852ea7d..2a8aa1915c9a 100644
--- a/compiler-rt/lib/tsan/rtl/tsan_rtl_mutex.cpp
+++ b/compiler-rt/lib/tsan/rtl/tsan_rtl_mutex.cpp
@@ -446,9 +446,9 @@ void Acquire(ThreadState *thr, uptr pc, uptr addr) {
   if (!s)
     return;
   SlotLocker locker(thr);
+  ReadLock lock(&s->mtx);
   if (!s->clock)
     return;
-  ReadLock lock(&s->mtx);
   thr->clock.Acquire(s->clock);
 }
 
diff --git a/compiler-rt/test/tsan/compare_exchange_acquire_fence.cpp b/compiler-rt/test/tsan/compare_exchange_acquire_fence.cpp
new file mode 100644
index 000000000000..b9fd0c5ad21f
--- /dev/null
+++ b/compiler-rt/test/tsan/compare_exchange_acquire_fence.cpp
@@ -0,0 +1,43 @@
+// RUN: %clangxx_tsan -O1 %s -o %t && %run %t 2>&1
+// This is a correct program and tsan should not report a race.
+//
+// Verify that there is a happens-before relationship between a
+// memory_order_release store that happens as part of a successful
+// compare_exchange_strong(), and an atomic_thread_fence(memory_order_acquire)
+// that happens after a relaxed load.
+
+#include <atomic>
+#include <sanitizer/tsan_interface.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <thread>
+
+std::atomic<bool> a;
+unsigned int b;
+constexpr int loops = 100000;
+
+void Thread1() {
+  for (int i = 0; i < loops; ++i) {
+    while (a.load(std::memory_order_acquire)) {
+    }
+    b = i;
+    bool expected = false;
+    a.compare_exchange_strong(expected, true, std::memory_order_acq_rel);
+  }
+}
+
+int main() {
+  std::thread t(Thread1);
+  unsigned int sum = 0;
+  for (int i = 0; i < loops; ++i) {
+    while (!a.load(std::memory_order_relaxed)) {
+    }
+    std::atomic_thread_fence(std::memory_order_acquire);
+    __tsan_acquire(&a);
+    sum += b;
+    a.store(false, std::memory_order_release);
+  }
+  t.join();
+  fprintf(stderr, "DONE: %u\n", sum);
+  return 0;
+}
-- 
cgit v1.2.3


From fd32e744a58fe61b4bd6acfa1d501bc1d6c1d96f Mon Sep 17 00:00:00 2001
From: Maksim Panchenko <maks@fb.com>
Date: Tue, 12 Mar 2024 15:52:27 -0700
Subject: [BOLT] Add support for Linux kernel PCI fixup section (#84982)

.pci_fixup section contains a table with entries allowing to invoke a
fixup hook whenever a problem is encountered with a PCI device. The
hookup code typically points to the start of a function. As we are not
relocating functions in the kernel (at least not yet), verify this
assumption while reading the table and ignore any functions with a fixup
code in the middle.
---
 bolt/lib/Rewrite/LinuxKernelRewriter.cpp | 126 +++++++++++++++++++++++--------
 bolt/test/X86/linux-pci-fixup.s          |  41 ++++++++++
 2 files changed, 134 insertions(+), 33 deletions(-)
 create mode 100644 bolt/test/X86/linux-pci-fixup.s

diff --git a/bolt/lib/Rewrite/LinuxKernelRewriter.cpp b/bolt/lib/Rewrite/LinuxKernelRewriter.cpp
index 331a61e7c3c2..a2bfd45a64e3 100644
--- a/bolt/lib/Rewrite/LinuxKernelRewriter.cpp
+++ b/bolt/lib/Rewrite/LinuxKernelRewriter.cpp
@@ -55,6 +55,11 @@ static cl::opt<bool> DumpParavirtualPatchSites(
     "dump-para-sites", cl::desc("dump Linux kernel paravitual patch sites"),
     cl::init(false), cl::Hidden, cl::cat(BoltCategory));
 
+static cl::opt<bool>
+    DumpPCIFixups("dump-pci-fixups",
+                  cl::desc("dump Linux kernel PCI fixup table"),
+                  cl::init(false), cl::Hidden, cl::cat(BoltCategory));
+
 static cl::opt<bool> DumpStaticCalls("dump-static-calls",
                                      cl::desc("dump Linux kernel static calls"),
                                      cl::init(false), cl::Hidden,
@@ -181,6 +186,10 @@ class LinuxKernelRewriter final : public MetadataRewriter {
   /// Size of bug_entry struct.
   static constexpr size_t BUG_TABLE_ENTRY_SIZE = 12;
 
+  /// .pci_fixup section.
+  ErrorOr<BinarySection &> PCIFixupSection = std::errc::bad_address;
+  static constexpr size_t PCI_FIXUP_ENTRY_SIZE = 16;
+
   /// Insert an LKMarker for a given code pointer \p PC from a non-code section
   /// \p SectionName.
   void insertLKMarker(uint64_t PC, uint64_t SectionOffset,
@@ -190,9 +199,6 @@ class LinuxKernelRewriter final : public MetadataRewriter {
   /// Process linux kernel special sections and their relocations.
   void processLKSections();
 
-  /// Process special linux kernel section, .pci_fixup.
-  void processLKPCIFixup();
-
   /// Process __ksymtab and __ksymtab_gpl.
   void processLKKSymtab(bool IsGPL = false);
 
@@ -226,6 +232,9 @@ class LinuxKernelRewriter final : public MetadataRewriter {
   /// Read alternative instruction info from .altinstructions.
   Error readAltInstructions();
 
+  /// Read .pci_fixup
+  Error readPCIFixupTable();
+
   /// Mark instructions referenced by kernel metadata.
   Error markInstructions();
 
@@ -256,6 +265,9 @@ public:
     if (Error E = readAltInstructions())
       return E;
 
+    if (Error E = readPCIFixupTable())
+      return E;
+
     return Error::success();
   }
 
@@ -318,41 +330,11 @@ void LinuxKernelRewriter::insertLKMarker(uint64_t PC, uint64_t SectionOffset,
 }
 
 void LinuxKernelRewriter::processLKSections() {
-  processLKPCIFixup();
   processLKKSymtab();
   processLKKSymtab(true);
   processLKSMPLocks();
 }
 
-/// Process .pci_fixup section of Linux Kernel.
-/// This section contains a list of entries for different PCI devices and their
-/// corresponding hook handler (code pointer where the fixup
-/// code resides, usually on x86_64 it is an entry PC relative 32 bit offset).
-/// Documentation is in include/linux/pci.h.
-void LinuxKernelRewriter::processLKPCIFixup() {
-  ErrorOr<BinarySection &> SectionOrError =
-      BC.getUniqueSectionByName(".pci_fixup");
-  if (!SectionOrError)
-    return;
-
-  const uint64_t SectionSize = SectionOrError->getSize();
-  const uint64_t SectionAddress = SectionOrError->getAddress();
-  assert((SectionSize % 16) == 0 && ".pci_fixup size is not a multiple of 16");
-
-  for (uint64_t I = 12; I + 4 <= SectionSize; I += 16) {
-    const uint64_t PC = SectionAddress + I;
-    ErrorOr<uint64_t> Offset = BC.getSignedValueAtAddress(PC, 4);
-    assert(Offset && "cannot read value from .pci_fixup");
-    const int32_t SignedOffset = *Offset;
-    const uint64_t HookupAddress = PC + SignedOffset;
-    BinaryFunction *HookupFunction =
-        BC.getBinaryFunctionAtAddress(HookupAddress);
-    assert(HookupFunction && "expected function for entry in .pci_fixup");
-    BC.addRelocation(PC, HookupFunction->getSymbol(), Relocation::getPC32(), 0,
-                     *Offset);
-  }
-}
-
 /// Process __ksymtab[_gpl] sections of Linux Kernel.
 /// This section lists all the vmlinux symbols that kernel modules can access.
 ///
@@ -1283,6 +1265,84 @@ Error LinuxKernelRewriter::readAltInstructions() {
   return Error::success();
 }
 
+/// When the Linux kernel needs to handle an error associated with a given PCI
+/// device, it uses a table stored in .pci_fixup section to locate a fixup code
+/// specific to the vendor and the problematic device. The section contains a
+/// list of the following structures defined in include/linux/pci.h:
+///
+///   struct pci_fixup {
+///     u16 vendor;     /* Or PCI_ANY_ID */
+///     u16 device;     /* Or PCI_ANY_ID */
+///     u32 class;      /* Or PCI_ANY_ID */
+///     unsigned int class_shift; /* should be 0, 8, 16 */
+///     int hook_offset;
+///   };
+///
+/// Normally, the hook will point to a function start and we don't have to
+/// update the pointer if we are not relocating functions. Hence, while reading
+/// the table we validate this assumption. If a function has a fixup code in the
+/// middle of its body, we issue a warning and ignore it.
+Error LinuxKernelRewriter::readPCIFixupTable() {
+  PCIFixupSection = BC.getUniqueSectionByName(".pci_fixup");
+  if (!PCIFixupSection)
+    return Error::success();
+
+  if (PCIFixupSection->getSize() % PCI_FIXUP_ENTRY_SIZE)
+    return createStringError(errc::executable_format_error,
+                             "PCI fixup table size error");
+
+  const uint64_t Address = PCIFixupSection->getAddress();
+  DataExtractor DE = DataExtractor(PCIFixupSection->getContents(),
+                                   BC.AsmInfo->isLittleEndian(),
+                                   BC.AsmInfo->getCodePointerSize());
+  uint64_t EntryID = 0;
+  DataExtractor::Cursor Cursor(0);
+  while (Cursor && !DE.eof(Cursor)) {
+    const uint16_t Vendor = DE.getU16(Cursor);
+    const uint16_t Device = DE.getU16(Cursor);
+    const uint32_t Class = DE.getU32(Cursor);
+    const uint32_t ClassShift = DE.getU32(Cursor);
+    const uint64_t HookAddress =
+        Address + Cursor.tell() + (int32_t)DE.getU32(Cursor);
+
+    if (!Cursor)
+      return createStringError(errc::executable_format_error,
+                               "out of bounds while reading .pci_fixup: %s",
+                               toString(Cursor.takeError()).c_str());
+
+    ++EntryID;
+
+    if (opts::DumpPCIFixups) {
+      BC.outs() << "PCI fixup entry: " << EntryID << "\n\tVendor       0x"
+                << Twine::utohexstr(Vendor) << "\n\tDevice:      0x"
+                << Twine::utohexstr(Device) << "\n\tClass:       0x"
+                << Twine::utohexstr(Class) << "\n\tClassShift:  0x"
+                << Twine::utohexstr(ClassShift) << "\n\tHookAddress: 0x"
+                << Twine::utohexstr(HookAddress) << '\n';
+    }
+
+    BinaryFunction *BF = BC.getBinaryFunctionContainingAddress(HookAddress);
+    if (!BF && opts::Verbosity) {
+      BC.outs() << "BOLT-INFO: no function matches address 0x"
+                << Twine::utohexstr(HookAddress)
+                << " of hook from .pci_fixup\n";
+    }
+
+    if (!BF || !BC.shouldEmit(*BF))
+      continue;
+
+    if (const uint64_t Offset = HookAddress - BF->getAddress()) {
+      BC.errs() << "BOLT-WARNING: PCI fixup detected in the middle of function "
+                << *BF << " at offset 0x" << Twine::utohexstr(Offset) << '\n';
+      BF->setSimple(false);
+    }
+  }
+
+  BC.outs() << "BOLT-INFO: parsed " << EntryID << " PCI fixup entries\n";
+
+  return Error::success();
+}
+
 } // namespace
 
 std::unique_ptr<MetadataRewriter>
diff --git a/bolt/test/X86/linux-pci-fixup.s b/bolt/test/X86/linux-pci-fixup.s
new file mode 100644
index 000000000000..a574ba84c4df
--- /dev/null
+++ b/bolt/test/X86/linux-pci-fixup.s
@@ -0,0 +1,41 @@
+# REQUIRES: system-linux
+
+# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown %s -o %t.o
+# RUN: %clang %cflags -nostdlib %t.o -o %t.exe \
+# RUN:   -Wl,--image-base=0xffffffff80000000,--no-dynamic-linker,--no-eh-frame-hdr,--no-pie
+# RUN: llvm-bolt %t.exe --print-normalized -o %t.out |& FileCheck %s
+
+## Check that BOLT correctly parses the Linux kernel .pci_fixup section and
+## verify that PCI fixup hook in the middle of a function is detected.
+
+# CHECK:      BOLT-INFO: Linux kernel binary detected
+# CHECK:      BOLT-WARNING: PCI fixup detected in the middle of function _start
+# CHECK:      BOLT-INFO: parsed 2 PCI fixup entries
+
+  .text
+  .globl _start
+  .type _start, %function
+_start:
+  nop
+.L0:
+  ret
+  .size _start, .-_start
+
+## PCI fixup table.
+  .section .pci_fixup,"a",@progbits
+
+  .short 0x8086     # vendor
+  .short 0xbeef     # device
+  .long 0xffffffff  # class
+  .long 0x0         # class shift
+  .long _start - .  # fixup
+
+  .short 0x8086     # vendor
+  .short 0xbad      # device
+  .long 0xffffffff  # class
+  .long 0x0         # class shift
+  .long .L0 - .     # fixup
+
+## Fake Linux Kernel sections.
+  .section __ksymtab,"a",@progbits
+  .section __ksymtab_gpl,"a",@progbits
-- 
cgit v1.2.3


From 422d240dc9b4b36f505c43e6fe650af4f4cf4f98 Mon Sep 17 00:00:00 2001
From: Adrian Prantl <aprantl@apple.com>
Date: Tue, 12 Mar 2024 16:10:38 -0700
Subject: Relax tests to also work with newer versions of lldb.

- result variables are optional
- static members may print their values
- public/protected shows up in ptype output
---
 .../debuginfo-tests/llgdb-tests/static-member-2.cpp            | 10 +++++-----
 .../debuginfo-tests/llgdb-tests/static-member.cpp              |  6 +++---
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/cross-project-tests/debuginfo-tests/llgdb-tests/static-member-2.cpp b/cross-project-tests/debuginfo-tests/llgdb-tests/static-member-2.cpp
index 5b6647c0631c..c9b416dace92 100644
--- a/cross-project-tests/debuginfo-tests/llgdb-tests/static-member-2.cpp
+++ b/cross-project-tests/debuginfo-tests/llgdb-tests/static-member-2.cpp
@@ -4,19 +4,19 @@
 // XFAIL: gdb-clang-incompatibility
 
 // DEBUGGER: delete breakpoints
-// DEBUGGER: break static-member.cpp:33
+// DEBUGGER: break static-member-2.cpp:36
 // DEBUGGER: r
 // DEBUGGER: ptype C
 // CHECK:      {{struct|class}} C {
-// CHECK:      static const int a;
+// CHECK:      static const int a
 // CHECK-NEXT: static int b;
 // CHECK-NEXT: static int c;
-// CHECK-NEXT: int d;
+// CHECK:      int d;
 // CHECK-NEXT: }
 // DEBUGGER: p C::a
-// CHECK: ${{[0-9]}} = 4
+// CHECK:  4
 // DEBUGGER: p C::c
-// CHECK: ${{[0-9]}} = 15
+// CHECK: 15
 
 // PR14471, PR14734
 
diff --git a/cross-project-tests/debuginfo-tests/llgdb-tests/static-member.cpp b/cross-project-tests/debuginfo-tests/llgdb-tests/static-member.cpp
index 29dd84dc8325..492e0ca08420 100644
--- a/cross-project-tests/debuginfo-tests/llgdb-tests/static-member.cpp
+++ b/cross-project-tests/debuginfo-tests/llgdb-tests/static-member.cpp
@@ -3,14 +3,14 @@
 // RUN: %test_debuginfo %s %t.out
 // XFAIL: !system-darwin && gdb-clang-incompatibility
 // DEBUGGER: delete breakpoints
-// DEBUGGER: break static-member.cpp:33
+// DEBUGGER: break static-member.cpp:35
 // DEBUGGER: r
 // DEBUGGER: ptype MyClass
 // CHECK:      {{struct|class}} MyClass {
-// CHECK:      static const int a;
+// CHECK:      static const int a
 // CHECK-NEXT: static int b;
 // CHECK-NEXT: static int c;
-// CHECK-NEXT: int d;
+// CHECK:      int d;
 // CHECK-NEXT: }
 // DEBUGGER: p MyClass::a
 // CHECK: ${{[0-9]}} = 4
-- 
cgit v1.2.3


From 418f0066ebfcde8cd72d33cc103fc1c3e36a1205 Mon Sep 17 00:00:00 2001
From: Adrian Prantl <aprantl@apple.com>
Date: Tue, 12 Mar 2024 16:11:45 -0700
Subject: Modernize llgdb script and make it easier to debug.

---
 .../debuginfo-tests/llgdb-tests/test_debuginfo.pl          | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/cross-project-tests/debuginfo-tests/llgdb-tests/test_debuginfo.pl b/cross-project-tests/debuginfo-tests/llgdb-tests/test_debuginfo.pl
index 6dbc3b9b8632..fa52a5037c21 100755
--- a/cross-project-tests/debuginfo-tests/llgdb-tests/test_debuginfo.pl
+++ b/cross-project-tests/debuginfo-tests/llgdb-tests/test_debuginfo.pl
@@ -56,7 +56,7 @@ my $my_debugger = $ENV{'DEBUGGER'};
 if (!$my_debugger) {
     if ($use_lldb) {
         my $path = dirname(Cwd::abs_path($0));
-        $my_debugger = "/usr/bin/env python3 $path/llgdb.py";
+        $my_debugger = "/usr/bin/xcrun python3 $path/llgdb.py";
     } else {
         $my_debugger = "gdb";
     }
@@ -66,12 +66,18 @@ if (!$my_debugger) {
 my $debugger_options = "-q -batch -n -x";
 
 # run debugger and capture output.
+print("Running debugger\n");
 system("$my_debugger $debugger_options $debugger_script_file $executable_file > $output_file 2>&1");
-
+if ($?) {
+    print("Debugger output was:\n");
+    system("cat", "$output_file");
+    exit 1;
+}
 # validate output.
+print("Running FileCheck\n");
 system("FileCheck", "-input-file", "$output_file", "$testcase_file");
-if ($?>>8 == 1) {
-    print "Debugger output was:\n";
+if ($?) {
+    print("Debugger output was:\n");
     system("cat", "$output_file");
     exit 1;
 }
-- 
cgit v1.2.3


From 65f07b804c2c05cf49bd043f2a6e9a0020198165 Mon Sep 17 00:00:00 2001
From: anbbna <117081688+anbbna@users.noreply.github.com>
Date: Wed, 13 Mar 2024 07:27:18 +0800
Subject: [MIPS] Introduce NAL instruction support for Mipsr6 and prer6
 (#84429)

NAL is an assembly idiom on Pre-R6 instruction sets (which is
implemented in binutils), or an actual instruction on Release 6
instruction set, and is used to read the PC, due to the nature of the
MIPS architecture.

Since we can't read the PC directly, on pre-R6 we use a always-not-taken
Branch and Link operation to the address of the next instruction, which
effectively writes the address to $31, thus PC is read with offset +8.

MIPS Release 6 removed the conventional Branch and Link instructions,
but kept NAL as an actual instruction for compatibility on the assembly
level. The instruction has the same encoding of the pre-R6 ones, and
with the same behavior: PC + 8 -> $31.
---
 llvm/lib/Target/Mips/Mips32r6InstrFormats.td | 11 +++++++++++
 llvm/lib/Target/Mips/Mips32r6InstrInfo.td    | 16 +++++++++++++++-
 llvm/lib/Target/Mips/MipsInstrInfo.td        |  3 +++
 llvm/lib/Target/Mips/MipsScheduleGeneric.td  |  2 +-
 llvm/test/MC/Mips/mips32/nal.s               | 14 ++++++++++++++
 llvm/test/MC/Mips/mips32r6/nal.s             | 21 +++++++++++++++++++++
 6 files changed, 65 insertions(+), 2 deletions(-)
 create mode 100644 llvm/test/MC/Mips/mips32/nal.s
 create mode 100644 llvm/test/MC/Mips/mips32r6/nal.s

diff --git a/llvm/lib/Target/Mips/Mips32r6InstrFormats.td b/llvm/lib/Target/Mips/Mips32r6InstrFormats.td
index ccb6d1df777a..8536c52028c3 100644
--- a/llvm/lib/Target/Mips/Mips32r6InstrFormats.td
+++ b/llvm/lib/Target/Mips/Mips32r6InstrFormats.td
@@ -86,6 +86,7 @@ def OPCODE5_BC1NEZ : OPCODE5<0b01101>;
 def OPCODE5_BC2EQZ : OPCODE5<0b01001>;
 def OPCODE5_BC2NEZ : OPCODE5<0b01101>;
 def OPCODE5_BGEZAL : OPCODE5<0b10001>;
+def OPCODE5_NAL : OPCODE5<0b10000>;
 def OPCODE5_SIGRIE : OPCODE5<0b10111>;
 // The next four constants are unnamed in the spec. These names are taken from
 // the OPGROUP names they are used with.
@@ -201,6 +202,16 @@ class BAL_FM : MipsR6Inst {
   let Inst{15-0} = offset;
 }
 
+// NAL for Release 6
+class NAL_FM : MipsR6Inst {
+  bits<32> Inst;
+
+  let Inst{31-26} = OPGROUP_REGIMM.Value;
+  let Inst{25-21} = 0b00000; 
+  let Inst{20-16} = OPCODE5_NAL.Value;
+  let Inst{15-0} = 0x00;
+}
+
 class COP0_EVP_DVP_FM<bits<1> sc> : MipsR6Inst {
   bits<5> rt;
 
diff --git a/llvm/lib/Target/Mips/Mips32r6InstrInfo.td b/llvm/lib/Target/Mips/Mips32r6InstrInfo.td
index 854563ab32bd..9c29acbd0d8a 100644
--- a/llvm/lib/Target/Mips/Mips32r6InstrInfo.td
+++ b/llvm/lib/Target/Mips/Mips32r6InstrInfo.td
@@ -73,6 +73,7 @@ class AUI_ENC    : AUI_FM;
 class AUIPC_ENC  : PCREL16_FM<OPCODE5_AUIPC>;
 
 class BAL_ENC   : BAL_FM;
+class NAL_ENC   : NAL_FM;
 class BALC_ENC  : BRANCH_OFF26_FM<0b111010>;
 class BC_ENC    : BRANCH_OFF26_FM<0b110010>;
 class BEQC_ENC  : CMP_BRANCH_2R_OFF16_FM<OPGROUP_ADDI>,
@@ -381,6 +382,12 @@ class BC_DESC_BASE<string instr_asm, DAGOperand opnd> : BRANCH_DESC_BASE,
   bit isCTI = 1;
 }
 
+class NAL_DESC_BASE<string instr_asm> : BRANCH_DESC_BASE,
+    MipsR6Arch<instr_asm> {
+  string AsmString = instr_asm;
+  bit isCTI = 1;
+}
+
 class CMP_BC_DESC_BASE<string instr_asm, DAGOperand opnd,
                        RegisterOperand GPROpnd> : BRANCH_DESC_BASE,
                                                   MipsR6Arch<instr_asm> {
@@ -424,6 +431,12 @@ class BAL_DESC : BC_DESC_BASE<"bal", brtarget> {
   bit isCTI = 1;
 }
 
+class NAL_DESC : NAL_DESC_BASE<"nal"> {
+  bit hasDelaySlot = 1;
+  list<Register> Defs = [RA];
+  bit isCTI = 1;
+}
+
 class BALC_DESC : BC_DESC_BASE<"balc", brtarget26> {
   bit isCall = 1;
   list<Register> Defs = [RA];
@@ -868,6 +881,8 @@ def AUI : R6MMR6Rel, AUI_ENC, AUI_DESC, ISA_MIPS32R6;
 def AUIPC : R6MMR6Rel, AUIPC_ENC, AUIPC_DESC, ISA_MIPS32R6;
 def BAL : BAL_ENC, BAL_DESC, ISA_MIPS32R6;
 def BALC : R6MMR6Rel, BALC_ENC, BALC_DESC, ISA_MIPS32R6;
+def NAL : NAL_ENC, NAL_DESC, ISA_MIPS32R6;
+
 let AdditionalPredicates = [NotInMicroMips] in {
   def BC1EQZ : BC1EQZ_ENC, BC1EQZ_DESC, ISA_MIPS32R6, HARDFLOAT;
   def BC1NEZ : BC1NEZ_ENC, BC1NEZ_DESC, ISA_MIPS32R6, HARDFLOAT;
@@ -948,7 +963,6 @@ let AdditionalPredicates = [NotInMicroMips] in {
   def MUL_R6 : R6MMR6Rel, MUL_R6_ENC, MUL_R6_DESC, ISA_MIPS32R6;
   def MULU   : R6MMR6Rel, MULU_ENC, MULU_DESC, ISA_MIPS32R6;
 }
-def NAL; // BAL with rd=0
 let AdditionalPredicates = [NotInMicroMips] in {
   def PREF_R6 : R6MMR6Rel, PREF_ENC, PREF_DESC, ISA_MIPS32R6;
   def RINT_D : RINT_D_ENC, RINT_D_DESC, ISA_MIPS32R6, HARDFLOAT;
diff --git a/llvm/lib/Target/Mips/MipsInstrInfo.td b/llvm/lib/Target/Mips/MipsInstrInfo.td
index 4b6f4b22e71b..23e04c442bf6 100644
--- a/llvm/lib/Target/Mips/MipsInstrInfo.td
+++ b/llvm/lib/Target/Mips/MipsInstrInfo.td
@@ -3049,6 +3049,9 @@ def : MipsInstAlias<"divu $rd, $imm", (UDivIMacro GPR32Opnd:$rd, GPR32Opnd:$rd,
                                                   simm32:$imm), 0>,
       ISA_MIPS1_NOT_32R6_64R6;
 
+
+def : MipsInstAlias<"nal", (BLTZAL ZERO, 0), 1>, ISA_MIPS1_NOT_32R6_64R6;
+
 def SRemMacro : MipsAsmPseudoInst<(outs GPR32Opnd:$rd),
                                   (ins GPR32Opnd:$rs, GPR32Opnd:$rt),
                                   "rem\t$rd, $rs, $rt">,
diff --git a/llvm/lib/Target/Mips/MipsScheduleGeneric.td b/llvm/lib/Target/Mips/MipsScheduleGeneric.td
index a3df88a93cfb..6771a897eea7 100644
--- a/llvm/lib/Target/Mips/MipsScheduleGeneric.td
+++ b/llvm/lib/Target/Mips/MipsScheduleGeneric.td
@@ -285,7 +285,7 @@ def GenericWriteJumpAndLink : SchedWriteRes<[GenericIssueCTISTD]> {
 // jalr, jr.hb, jr, jalr.hb, jarlc, jialc
 def : InstRW<[GenericWriteJump], (instrs B, BAL, BAL_BR, BEQ, BNE, BGTZ, BGEZ,
                                   BLEZ, BLTZ, BLTZAL, J, JALX, JR, JR_HB, ERET,
-                                  ERet, ERETNC, DERET)>;
+                                  ERet, ERETNC, DERET, NAL)>;
 
 def : InstRW<[GenericWriteJump], (instrs BEQL, BNEL, BGEZL, BGTZL, BLEZL,
                                   BLTZL)>;
diff --git a/llvm/test/MC/Mips/mips32/nal.s b/llvm/test/MC/Mips/mips32/nal.s
new file mode 100644
index 000000000000..72c723108586
--- /dev/null
+++ b/llvm/test/MC/Mips/mips32/nal.s
@@ -0,0 +1,14 @@
+# RUN: llvm-mc %s -triple=mipsel-linux-gnu -filetype=obj -o - | \
+# RUN:		 llvm-objdump --no-print-imm-hex -d - | FileCheck %s --check-prefix=MIPS32-EL
+# RUN: llvm-mc %s -triple=mips-linux-gnu -filetype=obj -o - | \
+# RUN:		 llvm-objdump --no-print-imm-hex -d - | FileCheck %s --check-prefix=MIPS32-EB
+
+# Whether it is a macro or an actual instruction, it always has a delay slot.
+# Ensure the delay slot is filled correctly.
+# MIPS32-EL:		00 00 10 04   bltzal  $zero, 0x4
+# MIPS32-EL-NEXT: 	00 00 00 00   nop
+# MIPS32-EB:		04 10 00 00   bltzal  $zero, 0x4
+# MIPS32-EB-NEXT:	00 00 00 00   nop
+
+nal_test:
+	nal
diff --git a/llvm/test/MC/Mips/mips32r6/nal.s b/llvm/test/MC/Mips/mips32r6/nal.s
new file mode 100644
index 000000000000..94c1a774f47e
--- /dev/null
+++ b/llvm/test/MC/Mips/mips32r6/nal.s
@@ -0,0 +1,21 @@
+# RUN: llvm-mc %s -triple=mipsisa32r6el-linux-gnu -filetype=obj -o - | \
+# RUN:		 llvm-objdump --no-print-imm-hex -d - | FileCheck %s --check-prefix=MIPS32R6-EL
+# RUN: llvm-mc %s -triple=mipsisa32r6-linux-gnu -filetype=obj -o - | \
+# RUN: 		 llvm-objdump --no-print-imm-hex -d - | FileCheck %s --check-prefix=MIPS32R6-EB
+
+# Whether it is a macro or an actual instruction, it always has a delay slot.
+# Ensure the delay slot is filled correctly.
+# Also ensure that NAL does not reside in a forbidden slot.
+# MIPS32R6-EL:		00 00 80 f8   bnezc	$4, 0x4
+# MIPS32R6-EL-NEXT:	00 00 00 00   nop
+# MIPS32R6-EL:		00 00 10 04   nal
+# MIPS32R6-EL-NEXT:	00 00 00 00   nop
+# MIPS32R6-EB:		f8 80 00 00   bnezc	$4, 0x4
+# MIPS32R6-EB-NEXT:	00 00 00 00   nop
+# MIPS32R6-EB:		04 10 00 00   nal
+# MIPS32R6-EB-NEXT:	00 00 00 00   nop
+
+nal_test:
+	# We generate a fobidden solt just for testing.
+	bnezc $a0, 0
+	nal
-- 
cgit v1.2.3