summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVitaly Buka <vitalybuka@google.com>2024-03-27 10:34:16 -0700
committerVitaly Buka <vitalybuka@google.com>2024-03-27 10:34:16 -0700
commitfe88e0c9dd5aaf8d86aed1f4935dc7ac3e4a97d4 (patch)
treed1093b3d86fc3ac1bc51c6d01288bb923932c012
parent9386aff1a04de43eaf7f337ba51bd4cc641199c9 (diff)
[𝘀𝗽𝗿] changes introduced through rebaseupstream/users/vitalybuka/spr/main.nfcir-add-setnosanitize-helpers
Created using spr 1.3.4 [skip ci]
-rw-r--r--.github/workflows/scorecard.yml2
-rw-r--r--clang/docs/ReleaseNotes.rst1
-rw-r--r--clang/docs/analyzer/checkers.rst83
-rw-r--r--clang/include/clang-c/Index.h1
-rw-r--r--clang/include/clang/Basic/Attr.td7
-rw-r--r--clang/include/clang/Basic/AttrDocs.td11
-rw-r--r--clang/include/clang/Basic/DiagnosticInstallAPIKinds.td1
-rw-r--r--clang/include/clang/Basic/Specifiers.h43
-rw-r--r--clang/include/clang/InstallAPI/HeaderFile.h16
-rw-r--r--clang/include/clang/Sema/Sema.h3
-rw-r--r--clang/include/clang/StaticAnalyzer/Checkers/Checkers.td2
-rw-r--r--clang/lib/AST/ItaniumMangle.cpp1
-rw-r--r--clang/lib/AST/Type.cpp4
-rw-r--r--clang/lib/AST/TypePrinter.cpp6
-rw-r--r--clang/lib/Basic/Targets/RISCV.cpp11
-rw-r--r--clang/lib/Basic/Targets/RISCV.h2
-rw-r--r--clang/lib/CodeGen/CGBuiltin.cpp6
-rw-r--r--clang/lib/CodeGen/CGCall.cpp6
-rw-r--r--clang/lib/CodeGen/CGDebugInfo.cpp2
-rw-r--r--clang/lib/Sema/Sema.cpp7
-rw-r--r--clang/lib/Sema/SemaAPINotes.cpp41
-rw-r--r--clang/lib/Sema/SemaChecking.cpp70
-rw-r--r--clang/lib/Sema/SemaConcept.cpp12
-rw-r--r--clang/lib/Sema/SemaDecl.cpp9
-rw-r--r--clang/lib/Sema/SemaDeclAttr.cpp7
-rw-r--r--clang/lib/Sema/SemaObjCProperty.cpp4
-rw-r--r--clang/lib/Sema/SemaType.cpp5
-rw-r--r--clang/test/APINotes/Inputs/APINotes/SomeOtherKit.apinotes8
-rw-r--r--clang/test/APINotes/Inputs/BrokenHeaders/APINotes.apinotes5
-rw-r--r--clang/test/APINotes/Inputs/BrokenHeaders/SomeBrokenLib.h6
-rw-r--r--clang/test/APINotes/Inputs/BrokenHeaders2/APINotes.apinotes7
-rw-r--r--clang/test/APINotes/Inputs/BrokenHeaders2/SomeBrokenLib.h6
-rw-r--r--clang/test/APINotes/Inputs/Frameworks/FrameworkWithActualPrivateModule.framework/Headers/FrameworkWithActualPrivateModule.h1
-rw-r--r--clang/test/APINotes/Inputs/Frameworks/FrameworkWithActualPrivateModule.framework/Modules/module.modulemap5
-rw-r--r--clang/test/APINotes/Inputs/Frameworks/FrameworkWithActualPrivateModule.framework/Modules/module.private.modulemap5
-rw-r--r--clang/test/APINotes/Inputs/Frameworks/FrameworkWithActualPrivateModule.framework/PrivateHeaders/FrameworkWithActualPrivateModule_Private.apinotes1
-rw-r--r--clang/test/APINotes/Inputs/Frameworks/FrameworkWithActualPrivateModule.framework/PrivateHeaders/FrameworkWithActualPrivateModule_Private.h2
-rw-r--r--clang/test/APINotes/Inputs/Frameworks/FrameworkWithWrongCase.framework/Headers/FrameworkWithWrongCase.h1
-rw-r--r--clang/test/APINotes/Inputs/Frameworks/FrameworkWithWrongCase.framework/Modules/module.modulemap5
-rw-r--r--clang/test/APINotes/Inputs/Frameworks/FrameworkWithWrongCase.framework/PrivateHeaders/FrameworkWithWrongCase_Private.apinotes1
-rw-r--r--clang/test/APINotes/Inputs/Frameworks/FrameworkWithWrongCasePrivate.framework/Headers/FrameworkWithWrongCasePrivate.h1
-rw-r--r--clang/test/APINotes/Inputs/Frameworks/FrameworkWithWrongCasePrivate.framework/Modules/module.modulemap5
-rw-r--r--clang/test/APINotes/Inputs/Frameworks/FrameworkWithWrongCasePrivate.framework/Modules/module.private.modulemap1
-rw-r--r--clang/test/APINotes/Inputs/Frameworks/FrameworkWithWrongCasePrivate.framework/PrivateHeaders/FrameworkWithWrongCasePrivate_Private.apinotes1
-rw-r--r--clang/test/APINotes/Inputs/Frameworks/LayeredKit.framework/Headers/LayeredKit.h11
-rw-r--r--clang/test/APINotes/Inputs/Frameworks/LayeredKit.framework/Modules/module.modulemap5
-rw-r--r--clang/test/APINotes/Inputs/Frameworks/LayeredKitImpl.framework/Headers/LayeredKitImpl.apinotes9
-rw-r--r--clang/test/APINotes/Inputs/Frameworks/LayeredKitImpl.framework/Headers/LayeredKitImpl.h7
-rw-r--r--clang/test/APINotes/Inputs/Frameworks/LayeredKitImpl.framework/Modules/module.modulemap5
-rw-r--r--clang/test/APINotes/Inputs/Frameworks/SimpleKit.framework/Modules/module.modulemap5
-rw-r--r--clang/test/APINotes/Inputs/Frameworks/SomeKit.framework/APINotes/SomeKit.apinotes74
-rw-r--r--clang/test/APINotes/Inputs/Frameworks/SomeKit.framework/APINotes/SomeKit_private.apinotes15
-rw-r--r--clang/test/APINotes/Inputs/Frameworks/SomeKit.framework/Headers/SomeKitForNullAnnotation.h55
-rw-r--r--clang/test/APINotes/Inputs/Frameworks/SomeKit.framework/Modules/module.modulemap5
-rw-r--r--clang/test/APINotes/Inputs/Frameworks/SomeKit.framework/Modules/module.private.modulemap8
-rw-r--r--clang/test/APINotes/Inputs/Frameworks/SomeKit.framework/Modules/module_private.modulemap8
-rw-r--r--clang/test/APINotes/Inputs/Frameworks/SomeKit.framework/PrivateHeaders/SomeKit_Private.h16
-rw-r--r--clang/test/APINotes/Inputs/Frameworks/SomeKit.framework/PrivateHeaders/SomeKit_PrivateForNullAnnotation.h17
-rw-r--r--clang/test/APINotes/Inputs/Frameworks/SomeKit.framework/PrivateHeaders/SomeKit_private.apinotes15
-rw-r--r--clang/test/APINotes/Inputs/Frameworks/SomeOtherKit.framework/APINotes/SomeOtherKit.apinotes8
-rw-r--r--clang/test/APINotes/Inputs/Frameworks/SomeOtherKit.framework/Headers/SomeOtherKit.apinotes8
-rw-r--r--clang/test/APINotes/Inputs/Frameworks/SomeOtherKit.framework/Headers/SomeOtherKit.h9
-rw-r--r--clang/test/APINotes/Inputs/Frameworks/SomeOtherKit.framework/Modules/module.modulemap5
-rw-r--r--clang/test/APINotes/Inputs/Frameworks/TopLevelPrivateKit.framework/Headers/TopLevelPrivateKit.h1
-rw-r--r--clang/test/APINotes/Inputs/Frameworks/TopLevelPrivateKit.framework/Headers/TopLevelPrivateKit_Private.apinotes1
-rw-r--r--clang/test/APINotes/Inputs/Frameworks/TopLevelPrivateKit.framework/Modules/module.modulemap5
-rw-r--r--clang/test/APINotes/Inputs/Frameworks/TopLevelPrivateKit.framework/Modules/module.private.modulemap5
-rw-r--r--clang/test/APINotes/Inputs/Frameworks/TopLevelPrivateKit.framework/PrivateHeaders/TopLevelPrivateKit.apinotes1
-rw-r--r--clang/test/APINotes/Inputs/Frameworks/TopLevelPrivateKit.framework/PrivateHeaders/TopLevelPrivateKit_Private.apinotes4
-rw-r--r--clang/test/APINotes/Inputs/Frameworks/TopLevelPrivateKit.framework/PrivateHeaders/TopLevelPrivateKit_Private.h1
-rw-r--r--clang/test/APINotes/Inputs/Frameworks/TopLevelPrivateKit.framework/PrivateHeaders/TopLevelPrivateKit_Private_private.apinotes1
-rw-r--r--clang/test/APINotes/Inputs/Frameworks/VersionedKit.framework/Headers/VersionedKit.apinotes156
-rw-r--r--clang/test/APINotes/Inputs/Frameworks/VersionedKit.framework/Headers/VersionedKit.h137
-rw-r--r--clang/test/APINotes/Inputs/Frameworks/VersionedKit.framework/Modules/module.modulemap5
-rw-r--r--clang/test/APINotes/Inputs/Headers/APINotes.apinotes18
-rw-r--r--clang/test/APINotes/Inputs/Headers/BrokenTypes.apinotes10
-rw-r--r--clang/test/APINotes/Inputs/Headers/BrokenTypes.h8
-rw-r--r--clang/test/APINotes/Inputs/Headers/ExternCtx.apinotes15
-rw-r--r--clang/test/APINotes/Inputs/Headers/ExternCtx.h11
-rw-r--r--clang/test/APINotes/Inputs/Headers/HeaderLib.apinotes37
-rw-r--r--clang/test/APINotes/Inputs/Headers/HeaderLib.h19
-rw-r--r--clang/test/APINotes/Inputs/Headers/InstancetypeModule.apinotes10
-rw-r--r--clang/test/APINotes/Inputs/Headers/InstancetypeModule.h10
-rw-r--r--clang/test/APINotes/Inputs/Headers/ModuleWithWrongCase.h1
-rw-r--r--clang/test/APINotes/Inputs/Headers/ModuleWithWrongCasePrivate.h1
-rw-r--r--clang/test/APINotes/Inputs/Headers/ModuleWithWrongCasePrivate_Private.apinotes1
-rw-r--r--clang/test/APINotes/Inputs/Headers/ModuleWithWrongCase_Private.apinotes1
-rw-r--r--clang/test/APINotes/Inputs/Headers/Namespaces.apinotes53
-rw-r--r--clang/test/APINotes/Inputs/Headers/Namespaces.h39
-rw-r--r--clang/test/APINotes/Inputs/Headers/PrivateLib.apinotes4
-rw-r--r--clang/test/APINotes/Inputs/Headers/PrivateLib.h1
-rw-r--r--clang/test/APINotes/Inputs/Headers/PrivateLib_private.apinotes1
-rw-r--r--clang/test/APINotes/Inputs/Headers/SwiftImportAs.apinotes9
-rw-r--r--clang/test/APINotes/Inputs/Headers/SwiftImportAs.h6
-rw-r--r--clang/test/APINotes/Inputs/Headers/module.modulemap31
-rw-r--r--clang/test/APINotes/Inputs/Headers/module.private.modulemap5
-rw-r--r--clang/test/APINotes/Inputs/yaml-reader-errors/UIKit.apinotes65
-rw-r--r--clang/test/APINotes/Inputs/yaml-reader-errors/UIKit.h1
-rw-r--r--clang/test/APINotes/Inputs/yaml-reader-errors/module.modulemap3
-rw-r--r--clang/test/APINotes/availability.m48
-rw-r--r--clang/test/APINotes/broken_types.m19
-rw-r--r--clang/test/APINotes/case-for-private-apinotes-file.c22
-rw-r--r--clang/test/APINotes/extern-context.cpp23
-rw-r--r--clang/test/APINotes/instancetype.m9
-rw-r--r--clang/test/APINotes/module-cache.m65
-rw-r--r--clang/test/APINotes/namespaces.cpp69
-rw-r--r--clang/test/APINotes/nullability.c21
-rw-r--r--clang/test/APINotes/nullability.m46
-rw-r--r--clang/test/APINotes/objc-forward-declarations.m12
-rw-r--r--clang/test/APINotes/objc_designated_inits.m17
-rw-r--r--clang/test/APINotes/properties.m42
-rw-r--r--clang/test/APINotes/retain-count-convention.m38
-rw-r--r--clang/test/APINotes/search-order.m25
-rw-r--r--clang/test/APINotes/swift-import-as.cpp16
-rw-r--r--clang/test/APINotes/top-level-private-modules.c8
-rw-r--r--clang/test/APINotes/types.m28
-rw-r--r--clang/test/APINotes/versioned-multi.c69
-rw-r--r--clang/test/APINotes/versioned.m187
-rw-r--r--clang/test/APINotes/yaml-convert-diags.c6
-rw-r--r--clang/test/APINotes/yaml-parse-diags.c6
-rw-r--r--clang/test/APINotes/yaml-reader-errors.m5
-rw-r--r--clang/test/CodeGen/CSKY/csky-abi.c16
-rw-r--r--clang/test/CodeGen/LoongArch/abi-lp64d.c4
-rw-r--r--clang/test/CodeGen/PowerPC/aix-altivec-vaargs.c4
-rw-r--r--clang/test/CodeGen/PowerPC/aix-vaargs.c14
-rw-r--r--clang/test/CodeGen/PowerPC/ppc64le-varargs-f128.c18
-rw-r--r--clang/test/CodeGen/RISCV/riscv-func-attr-target-err.c22
-rw-r--r--clang/test/CodeGen/RISCV/riscv-func-attr-target.c33
-rw-r--r--clang/test/CodeGen/RISCV/riscv-vector-callingconv-llvm-ir.c34
-rw-r--r--clang/test/CodeGen/RISCV/riscv-vector-callingconv-llvm-ir.cpp32
-rw-r--r--clang/test/CodeGen/RISCV/riscv-vector-callingconv.c17
-rw-r--r--clang/test/CodeGen/RISCV/riscv-vector-callingconv.cpp35
-rw-r--r--clang/test/CodeGen/RISCV/riscv32-vararg.c40
-rw-r--r--clang/test/CodeGen/RISCV/riscv64-vararg.c16
-rw-r--r--clang/test/CodeGen/RISCV/rvb-intrinsics/riscv32-zbb-error.c4
-rw-r--r--clang/test/CodeGen/RISCV/rvb-intrinsics/riscv64-zbkb-error.c12
-rw-r--r--clang/test/CodeGen/RISCV/rvv-intrinsics-handcrafted/rvv-error.c2
-rw-r--r--clang/test/CodeGen/WebAssembly/wasm-varargs.c16
-rw-r--r--clang/test/CodeGen/X86/va-arg-sse.c4
-rw-r--r--clang/test/CodeGen/X86/x86_64-vaarg.c4
-rw-r--r--clang/test/CodeGen/aarch64-ABI-align-packed.c14
-rw-r--r--clang/test/CodeGen/aarch64-varargs.c2
-rw-r--r--clang/test/CodeGen/arm-varargs.c2
-rw-r--r--clang/test/CodeGen/hexagon-linux-vararg.c2
-rw-r--r--clang/test/CodeGen/mips-varargs.c16
-rw-r--r--clang/test/CodeGen/pr53127.cpp4
-rw-r--r--clang/test/CodeGen/varargs-with-nonzero-default-address-space.c46
-rw-r--r--clang/test/CodeGen/xcore-abi.c2
-rw-r--r--clang/test/CodeGenCXX/ext-int.cpp12
-rw-r--r--clang/test/CodeGenCXX/ibm128-declarations.cpp4
-rw-r--r--clang/test/CodeGenCXX/x86_64-vaarg.cpp4
-rw-r--r--clang/test/Driver/aarch64-sve.c9
-rw-r--r--clang/test/InstallAPI/Inputs/Umbrella/Umbrella.framework/Headers/AAA.h3
-rw-r--r--clang/test/InstallAPI/Inputs/Umbrella/Umbrella.framework/Headers/SpecialUmbrella.h1
-rw-r--r--clang/test/InstallAPI/Inputs/Umbrella/Umbrella.framework/PrivateHeaders/AAA_Private.h3
-rw-r--r--clang/test/InstallAPI/Inputs/Umbrella/Umbrella.framework/PrivateHeaders/SpecialPrivateUmbrella.h1
-rw-r--r--clang/test/InstallAPI/umbrella-headers-unix.test40
-rw-r--r--clang/test/InstallAPI/umbrella-headers.test48
-rw-r--r--clang/test/Modules/codegen.test2
-rw-r--r--clang/test/Preprocessor/aarch64-target-features.c2
-rw-r--r--clang/test/SemaTemplate/concepts.cpp10
-rw-r--r--clang/tools/clang-installapi/InstallAPIOpts.td12
-rw-r--r--clang/tools/clang-installapi/Options.cpp79
-rw-r--r--clang/tools/clang-installapi/Options.h9
-rw-r--r--clang/tools/libclang/CXType.cpp1
-rw-r--r--clang/utils/TableGen/RISCVVEmitter.cpp4
-rw-r--r--compiler-rt/lib/scudo/standalone/tests/strings_test.cpp2
-rw-r--r--flang/lib/Lower/OpenMP/ClauseProcessor.h2
-rw-r--r--flang/lib/Lower/OpenMP/Clauses.cpp52
-rw-r--r--flang/lib/Lower/OpenMP/Clauses.h8
-rw-r--r--flang/lib/Lower/OpenMP/DataSharingProcessor.h2
-rw-r--r--flang/lib/Lower/OpenMP/OpenMP.cpp4
-rw-r--r--libc/docs/dev/code_style.rst2
-rw-r--r--libc/src/__support/CPP/CMakeLists.txt2
-rw-r--r--libc/src/__support/CPP/atomic.h74
-rw-r--r--libc/src/__support/CPP/bit.h25
-rw-r--r--libc/src/__support/CPP/type_traits/add_pointer.h1
-rw-r--r--libc/src/__support/CPP/type_traits/decay.h1
-rw-r--r--libc/src/__support/CPP/type_traits/is_destructible.h3
-rw-r--r--libc/src/__support/CPP/type_traits/is_function.h3
-rw-r--r--libc/src/__support/CPP/type_traits/is_lvalue_reference.h3
-rw-r--r--libc/src/__support/CPP/type_traits/is_reference.h3
-rw-r--r--libc/src/__support/CPP/type_traits/is_rvalue_reference.h3
-rw-r--r--libc/src/__support/CPP/type_traits/is_trivially_copyable.h1
-rw-r--r--libc/src/__support/CPP/type_traits/is_trivially_destructible.h5
-rw-r--r--libc/src/__support/CPP/type_traits/remove_all_extents.h3
-rw-r--r--libc/src/__support/FPUtil/CMakeLists.txt1
-rw-r--r--libc/src/__support/FPUtil/FEnvImpl.h1
-rw-r--r--libc/src/__support/FPUtil/gpu/FMA.h8
-rw-r--r--libc/src/__support/UInt.h54
-rw-r--r--libc/src/__support/macros/config.h18
-rw-r--r--libc/src/__support/macros/optimization.h1
-rw-r--r--libc/src/__support/macros/sanitizer.h3
-rw-r--r--libc/src/__support/math_extras.h9
-rw-r--r--libc/src/__support/memory_size.h2
-rw-r--r--libc/src/string/memory_utils/generic/builtin.h8
-rw-r--r--libc/src/string/memory_utils/utils.h5
-rw-r--r--libc/test/src/__support/CPP/bit_test.cpp9
-rw-r--r--libc/test/src/__support/math_extras_test.cpp4
-rw-r--r--libc/utils/gpu/server/rpc_server.cpp5
-rw-r--r--libcxx/include/__algorithm/copy.h6
-rw-r--r--libcxx/include/__algorithm/copy_backward.h6
-rw-r--r--libcxx/include/__algorithm/copy_move_common.h39
-rw-r--r--libcxx/include/__algorithm/move.h6
-rw-r--r--libcxx/include/__algorithm/move_backward.h6
-rw-r--r--libcxx/test/std/numerics/numeric.ops/numeric.ops.sat/saturate_cast.pass.cpp2
-rw-r--r--lld/COFF/Config.h8
-rw-r--r--lld/COFF/Driver.cpp2
-rw-r--r--lld/COFF/DriverUtils.cpp14
-rw-r--r--lld/ELF/Relocations.cpp11
-rw-r--r--lld/test/COFF/exportas.test88
-rw-r--r--lld/test/ELF/pack-dyn-relocs-ifunc.s49
-rw-r--r--lldb/include/lldb/Symbol/UnwindTable.h4
-rw-r--r--lldb/source/Core/Module.cpp2
-rw-r--r--lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp1
-rw-r--r--lldb/source/Symbol/UnwindTable.cpp45
-rw-r--r--lldb/source/Target/StackFrame.cpp1
-rw-r--r--lldb/test/Shell/SymbolFile/target-symbols-add-unwind.test27
-rw-r--r--llvm/docs/LangRef.rst29
-rw-r--r--llvm/docs/ReleaseNotes.rst1
-rw-r--r--llvm/include/llvm/AsmParser/LLToken.h1
-rw-r--r--llvm/include/llvm/BinaryFormat/Dwarf.def1
-rw-r--r--llvm/include/llvm/IR/CallingConv.h3
-rw-r--r--llvm/include/llvm/IR/Intrinsics.td11
-rw-r--r--llvm/include/llvm/IR/Verifier.h1
-rw-r--r--llvm/include/llvm/Object/GOFF.h20
-rw-r--r--llvm/include/llvm/Object/GOFFObjectFile.h56
-rw-r--r--llvm/lib/AsmParser/LLLexer.cpp1
-rw-r--r--llvm/lib/AsmParser/LLParser.cpp4
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp12
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp83
-rw-r--r--llvm/lib/CodeGen/TargetLoweringBase.cpp3
-rw-r--r--llvm/lib/IR/AsmWriter.cpp3
-rw-r--r--llvm/lib/IR/Verifier.cpp32
-rw-r--r--llvm/lib/Object/COFFImportFile.cpp4
-rw-r--r--llvm/lib/Object/GOFFObjectFile.cpp167
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp40
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h11
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp2
-rw-r--r--llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp194
-rw-r--r--llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCKernelDescriptor.cpp98
-rw-r--r--llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCKernelDescriptor.h54
-rw-r--r--llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp404
-rw-r--r--llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h33
-rw-r--r--llvm/lib/Target/AMDGPU/MCTargetDesc/CMakeLists.txt1
-rw-r--r--llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp41
-rw-r--r--llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h7
-rw-r--r--llvm/lib/Target/ARM/MVETPAndVPTOptimisationsPass.cpp1
-rw-r--r--llvm/lib/Target/ARM/Thumb1FrameLowering.cpp11
-rw-r--r--llvm/lib/Target/RISCV/RISCVCallingConv.td13
-rw-r--r--llvm/lib/Target/RISCV/RISCVFrameLowering.cpp97
-rw-r--r--llvm/lib/Target/RISCV/RISCVISelLowering.cpp1
-rw-r--r--llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp15
-rw-r--r--llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp20
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp85
-rw-r--r--llvm/lib/TargetParser/AArch64TargetParser.cpp5
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp28
-rw-r--r--llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp3
-rw-r--r--llvm/test/Analysis/CostModel/RISCV/cast.ll920
-rw-r--r--llvm/test/Analysis/CostModel/RISCV/reduce-scalable-int.ll12
-rw-r--r--llvm/test/Analysis/CostModel/RISCV/rvv-extractelement.ll84
-rw-r--r--llvm/test/Analysis/CostModel/RISCV/rvv-insertelement.ll84
-rw-r--r--llvm/test/Analysis/CostModel/RISCV/shuffle-broadcast.ll2
-rw-r--r--llvm/test/Bitcode/compatibility-3.6.ll16
-rw-r--r--llvm/test/Bitcode/compatibility-3.7.ll16
-rw-r--r--llvm/test/Bitcode/compatibility-3.8.ll16
-rw-r--r--llvm/test/Bitcode/compatibility-3.9.ll16
-rw-r--r--llvm/test/Bitcode/compatibility-4.0.ll16
-rw-r--r--llvm/test/Bitcode/compatibility-5.0.ll16
-rw-r--r--llvm/test/Bitcode/compatibility-6.0.ll16
-rw-r--r--llvm/test/Bitcode/compatibility.ll18
-rw-r--r--llvm/test/Bitcode/thinlto-function-summary.ll6
-rw-r--r--llvm/test/Bitcode/variableArgumentIntrinsic.3.2.ll8
-rw-r--r--llvm/test/CodeGen/AArch64/arm64-abi_align.ll4
-rw-r--r--llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pown.ll12
-rw-r--r--llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-rootn.ll2
-rw-r--r--llvm/test/CodeGen/AMDGPU/global_atomics_i32_system.ll564
-rw-r--r--llvm/test/CodeGen/AMDGPU/global_atomics_i64_system.ll564
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/binop-zext.ll146
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/callee-saved-regs.ll95
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll16
-rw-r--r--llvm/test/CodeGen/Thumb2/mve-vpt-optimisations.mir25
-rw-r--r--llvm/test/CodeGen/X86/combine-pavg.ll30
-rw-r--r--llvm/test/CodeGen/X86/extractelement-load.ll153
-rw-r--r--llvm/test/CodeGen/X86/huge-stack-offset.ll4
-rw-r--r--llvm/test/CodeGen/X86/huge-stack-offset2.ll2
-rw-r--r--llvm/test/CodeGen/X86/masked_store.ll793
-rw-r--r--llvm/test/CodeGen/X86/pr45378.ll40
-rw-r--r--llvm/test/CodeGen/X86/setcc-non-simple-type.ll36
-rw-r--r--llvm/test/CodeGen/X86/shrink_vmul.ll223
-rw-r--r--llvm/test/CodeGen/X86/stack-protector.ll9
-rw-r--r--llvm/test/CodeGen/X86/var-permute-128.ll32
-rw-r--r--llvm/test/CodeGen/X86/vec_int_to_fp.ll305
-rw-r--r--llvm/test/Instrumentation/MemorySanitizer/AArch64/vararg_shadow.ll48
-rw-r--r--llvm/test/Instrumentation/MemorySanitizer/SystemZ/vararg-kernel.ll2
-rw-r--r--llvm/test/Instrumentation/MemorySanitizer/X86/vararg_shadow.ll48
-rw-r--r--llvm/test/Instrumentation/MemorySanitizer/msan_debug_info.ll2
-rw-r--r--llvm/test/MC/AMDGPU/hsa-amdgpu-exprs.s27
-rw-r--r--llvm/test/MC/AMDGPU/hsa-sym-expr-failure.s281
-rw-r--r--llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx10.s190
-rw-r--r--llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx11.s186
-rw-r--r--llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx12.s184
-rw-r--r--llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx7.s168
-rw-r--r--llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx8.s171
-rw-r--r--llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx90a.s148
-rw-r--r--llvm/test/MC/AMDGPU/hsa-tg-split.s74
-rw-r--r--llvm/test/Transforms/GlobalOpt/inalloca-varargs.ll2
-rw-r--r--llvm/test/Transforms/IROutliner/illegal-vaarg.ll12
-rw-r--r--llvm/test/Transforms/IROutliner/outline-vaarg-intrinsic.ll8
-rw-r--r--llvm/test/Transforms/InferAddressSpaces/AMDGPU/mem-intrinsics.ll2
-rw-r--r--llvm/test/Transforms/InstCombine/powi.ll17
-rw-r--r--llvm/test/Transforms/InstCombine/struct-assign-tbaa.ll2
-rw-r--r--llvm/test/Transforms/NewGVN/pr31483.ll2
-rw-r--r--llvm/test/Transforms/Reassociate/vaarg_movable.ll4
-rw-r--r--llvm/test/Transforms/SLPVectorizer/X86/phi-node-bitwidt-op-not.ll95
-rw-r--r--llvm/test/Transforms/SROA/tbaa-struct3.ll2
-rw-r--r--llvm/test/Transforms/Scalarizer/basic-inseltpoison.ll3
-rw-r--r--llvm/test/Transforms/Scalarizer/basic.ll3
-rw-r--r--llvm/test/Verifier/tbaa-struct.ll14
-rw-r--r--llvm/test/tools/llvm-lib/arm64ec-implib.test93
-rw-r--r--llvm/unittests/Object/GOFFObjectFileTest.cpp97
-rw-r--r--llvm/unittests/TargetParser/TargetParserTest.cpp15
-rw-r--r--llvm/utils/TableGen/Common/CMakeLists.txt2
-rw-r--r--llvm/utils/TableGen/Common/GlobalISel/CombinerUtils.cpp23
-rw-r--r--llvm/utils/TableGen/Common/GlobalISel/CombinerUtils.h4
-rw-r--r--llvm/utils/TableGen/Common/GlobalISel/PatternParser.cpp462
-rw-r--r--llvm/utils/TableGen/Common/GlobalISel/PatternParser.h118
-rw-r--r--llvm/utils/TableGen/GlobalISelCombinerEmitter.cpp479
-rw-r--r--llvm/utils/gn/secondary/llvm/lib/Target/AMDGPU/MCTargetDesc/BUILD.gn1
-rw-r--r--llvm/utils/gn/secondary/llvm/utils/TableGen/Common/BUILD.gn2
-rw-r--r--mlir/include/mlir/Dialect/LLVMIR/LLVMIntrinsicOps.td6
-rw-r--r--mlir/include/mlir/IR/Dialect.h4
-rw-r--r--mlir/lib/Dialect/Arith/IR/ArithDialect.cpp6
-rw-r--r--mlir/lib/Dialect/Complex/IR/ComplexDialect.cpp2
-rw-r--r--mlir/lib/Dialect/ControlFlow/IR/ControlFlowOps.cpp6
-rw-r--r--mlir/lib/Dialect/Func/IR/FuncOps.cpp4
-rw-r--r--mlir/lib/Dialect/GPU/IR/GPUDialect.cpp4
-rw-r--r--mlir/lib/Dialect/Index/IR/IndexDialect.cpp2
-rw-r--r--mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp4
-rw-r--r--mlir/lib/Dialect/LLVMIR/IR/ROCDLDialect.cpp2
-rw-r--r--mlir/lib/Dialect/Linalg/IR/LinalgDialect.cpp12
-rw-r--r--mlir/lib/Dialect/Math/IR/MathDialect.cpp2
-rw-r--r--mlir/lib/Dialect/MemRef/IR/MemRefDialect.cpp4
-rw-r--r--mlir/lib/Dialect/SCF/IR/SCF.cpp2
-rw-r--r--mlir/lib/Dialect/SPIRV/IR/SPIRVDialect.cpp2
-rw-r--r--mlir/lib/Dialect/Tensor/IR/TensorDialect.cpp2
-rw-r--r--mlir/lib/Dialect/UB/IR/UBOps.cpp2
-rw-r--r--mlir/lib/Dialect/Vector/IR/VectorOps.cpp4
-rw-r--r--mlir/test/Target/LLVMIR/Import/basic.ll14
-rw-r--r--mlir/test/Target/LLVMIR/Import/intrinsic.ll12
-rw-r--r--mlir/test/Target/LLVMIR/llvmir.mlir8
-rw-r--r--mlir/unittests/IR/InterfaceAttachmentTest.cpp4
-rw-r--r--openmp/runtime/src/kmp_runtime.cpp10
-rw-r--r--utils/bazel/llvm-project-overlay/libc/BUILD.bazel4
-rw-r--r--utils/bazel/llvm-project-overlay/llvm/BUILD.bazel4
355 files changed, 8165 insertions, 4319 deletions
diff --git a/.github/workflows/scorecard.yml b/.github/workflows/scorecard.yml
index b8e8ab26c3ff..ff61cf83a6af 100644
--- a/.github/workflows/scorecard.yml
+++ b/.github/workflows/scorecard.yml
@@ -36,7 +36,7 @@ jobs:
persist-credentials: false
- name: "Run analysis"
- uses: ossf/scorecard-action@e38b1902ae4f44df626f11ba0734b14fb91f8f86 # v2.1.2
+ uses: ossf/scorecard-action@0864cf19026789058feabb7e87baa5f140aac736 # v2.3.1
with:
results_file: results.sarif
results_format: sarif
diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 0dd026a5de5c..0fdd9e3fb3ee 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -454,6 +454,7 @@ Bug Fixes to C++ Support
- Fix a crash when instantiating a lambda that captures ``this`` outside of its context. Fixes (#GH85343).
- Fix an issue where a namespace alias could be defined using a qualified name (all name components
following the first `::` were ignored).
+- Fix an out-of-bounds crash when checking the validity of template partial specializations. (part of #GH86757).
Bug Fixes to AST Handling
^^^^^^^^^^^^^^^^^^^^^^^^^
diff --git a/clang/docs/analyzer/checkers.rst b/clang/docs/analyzer/checkers.rst
index 66da1c7b35f2..8af99a021ebd 100644
--- a/clang/docs/analyzer/checkers.rst
+++ b/clang/docs/analyzer/checkers.rst
@@ -849,10 +849,89 @@ Check for performance anti-patterns when using Grand Central Dispatch.
.. _optin-performance-Padding:
-optin.performance.Padding
-"""""""""""""""""""""""""
+optin.performance.Padding (C, C++, ObjC)
+""""""""""""""""""""""""""""""""""""""""
Check for excessively padded structs.
+This checker detects structs with excessive padding, which can lead to wasted
+memory thus decreased performance by reducing the effectiveness of the
+processor cache. Padding bytes are added by compilers to align data accesses
+as some processors require data to be aligned to certain boundaries. On others,
+unaligned data access are possible, but impose significantly larger latencies.
+
+To avoid padding bytes, the fields of a struct should be ordered by decreasing
+by alignment. Usually, its easier to think of the ``sizeof`` of the fields, and
+ordering the fields by ``sizeof`` would usually also lead to the same optimal
+layout.
+
+In rare cases, one can use the ``#pragma pack(1)`` directive to enforce a packed
+layout too, but it can significantly increase the access times, so reordering the
+fields is usually a better solution.
+
+
+.. code-block:: cpp
+
+ // warn: Excessive padding in 'struct NonOptimal' (35 padding bytes, where 3 is optimal)
+ struct NonOptimal {
+ char c1;
+ // 7 bytes of padding
+ std::int64_t big1; // 8 bytes
+ char c2;
+ // 7 bytes of padding
+ std::int64_t big2; // 8 bytes
+ char c3;
+ // 7 bytes of padding
+ std::int64_t big3; // 8 bytes
+ char c4;
+ // 7 bytes of padding
+ std::int64_t big4; // 8 bytes
+ char c5;
+ // 7 bytes of padding
+ };
+ static_assert(sizeof(NonOptimal) == 4*8+5+5*7);
+
+ // no-warning: The fields are nicely aligned to have the minimal amount of padding bytes.
+ struct Optimal {
+ std::int64_t big1; // 8 bytes
+ std::int64_t big2; // 8 bytes
+ std::int64_t big3; // 8 bytes
+ std::int64_t big4; // 8 bytes
+ char c1;
+ char c2;
+ char c3;
+ char c4;
+ char c5;
+ // 3 bytes of padding
+ };
+ static_assert(sizeof(Optimal) == 4*8+5+3);
+
+ // no-warning: Bit packing representation is also accepted by this checker, but
+ // it can significantly increase access times, so prefer reordering the fields.
+ #pragma pack(1)
+ struct BitPacked {
+ char c1;
+ std::int64_t big1; // 8 bytes
+ char c2;
+ std::int64_t big2; // 8 bytes
+ char c3;
+ std::int64_t big3; // 8 bytes
+ char c4;
+ std::int64_t big4; // 8 bytes
+ char c5;
+ };
+ static_assert(sizeof(BitPacked) == 4*8+5);
+
+The ``AllowedPad`` option can be used to specify a threshold for the number
+padding bytes raising the warning. If the number of padding bytes of the struct
+and the optimal number of padding bytes differ by more than the threshold value,
+a warning will be raised.
+
+By default, the ``AllowedPad`` threshold is 24 bytes.
+
+To override this threshold to e.g. 4 bytes, use the
+``-analyzer-config optin.performance.Padding:AllowedPad=4`` option.
+
+
.. _optin-portability-UnixAPI:
optin.portability.UnixAPI
diff --git a/clang/include/clang-c/Index.h b/clang/include/clang-c/Index.h
index 60db3cf0966c..7a8bd985a91f 100644
--- a/clang/include/clang-c/Index.h
+++ b/clang/include/clang-c/Index.h
@@ -2991,6 +2991,7 @@ enum CXCallingConv {
CXCallingConv_AArch64SVEPCS = 18,
CXCallingConv_M68kRTD = 19,
CXCallingConv_PreserveNone = 20,
+ CXCallingConv_RISCVVectorCall = 21,
CXCallingConv_Invalid = 100,
CXCallingConv_Unexposed = 200
diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td
index 318d4e5ac5ba..80e607525a0a 100644
--- a/clang/include/clang/Basic/Attr.td
+++ b/clang/include/clang/Basic/Attr.td
@@ -3011,6 +3011,13 @@ def PreserveNone : DeclOrTypeAttr, TargetSpecificAttr<TargetAnyX86> {
let Documentation = [PreserveNoneDocs];
}
+def RISCVVectorCC: DeclOrTypeAttr, TargetSpecificAttr<TargetRISCV> {
+ let Spellings = [CXX11<"riscv", "vector_cc">,
+ C23<"riscv", "vector_cc">,
+ Clang<"riscv_vector_cc">];
+ let Documentation = [RISCVVectorCCDocs];
+}
+
def Target : InheritableAttr {
let Spellings = [GCC<"target">];
let Args = [StringArgument<"featuresStr">];
diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td
index 384aebbdf2e3..3ea4d676b4f8 100644
--- a/clang/include/clang/Basic/AttrDocs.td
+++ b/clang/include/clang/Basic/AttrDocs.td
@@ -5494,6 +5494,17 @@ for clang builtin functions.
}];
}
+def RISCVVectorCCDocs : Documentation {
+ let Category = DocCatCallingConvs;
+ let Heading = "riscv::vector_cc, riscv_vector_cc, clang::riscv_vector_cc";
+ let Content = [{
+The ``riscv_vector_cc`` attribute can be applied to a function. It preserves 15
+registers namely, v1-v7 and v24-v31 as callee-saved. Callers thus don't need
+to save these registers before function calls, and callees only need to save
+them if they use them.
+ }];
+}
+
def PreferredNameDocs : Documentation {
let Category = DocCatDecl;
let Content = [{
diff --git a/clang/include/clang/Basic/DiagnosticInstallAPIKinds.td b/clang/include/clang/Basic/DiagnosticInstallAPIKinds.td
index 27df731fa286..e3263fe9ccb9 100644
--- a/clang/include/clang/Basic/DiagnosticInstallAPIKinds.td
+++ b/clang/include/clang/Basic/DiagnosticInstallAPIKinds.td
@@ -18,6 +18,7 @@ def err_no_output_file: Error<"no output file specified">;
def err_no_such_header_file : Error<"no such %select{public|private|project}1 header file: '%0'">;
def warn_no_such_excluded_header_file : Warning<"no such excluded %select{public|private}0 header file: '%1'">, InGroup<InstallAPIViolation>;
def warn_glob_did_not_match: Warning<"glob '%0' did not match any header file">, InGroup<InstallAPIViolation>;
+def err_no_such_umbrella_header_file : Error<"%select{public|private|project}1 umbrella header file not found in input: '%0'">;
} // end of command line category.
let CategoryName = "Verification" in {
diff --git a/clang/include/clang/Basic/Specifiers.h b/clang/include/clang/Basic/Specifiers.h
index 8586405825cf..fb11e8212f8b 100644
--- a/clang/include/clang/Basic/Specifiers.h
+++ b/clang/include/clang/Basic/Specifiers.h
@@ -273,29 +273,30 @@ namespace clang {
/// CallingConv - Specifies the calling convention that a function uses.
enum CallingConv {
- CC_C, // __attribute__((cdecl))
- CC_X86StdCall, // __attribute__((stdcall))
- CC_X86FastCall, // __attribute__((fastcall))
- CC_X86ThisCall, // __attribute__((thiscall))
- CC_X86VectorCall, // __attribute__((vectorcall))
- CC_X86Pascal, // __attribute__((pascal))
- CC_Win64, // __attribute__((ms_abi))
- CC_X86_64SysV, // __attribute__((sysv_abi))
- CC_X86RegCall, // __attribute__((regcall))
- CC_AAPCS, // __attribute__((pcs("aapcs")))
- CC_AAPCS_VFP, // __attribute__((pcs("aapcs-vfp")))
- CC_IntelOclBicc, // __attribute__((intel_ocl_bicc))
- CC_SpirFunction, // default for OpenCL functions on SPIR target
- CC_OpenCLKernel, // inferred for OpenCL kernels
- CC_Swift, // __attribute__((swiftcall))
+ CC_C, // __attribute__((cdecl))
+ CC_X86StdCall, // __attribute__((stdcall))
+ CC_X86FastCall, // __attribute__((fastcall))
+ CC_X86ThisCall, // __attribute__((thiscall))
+ CC_X86VectorCall, // __attribute__((vectorcall))
+ CC_X86Pascal, // __attribute__((pascal))
+ CC_Win64, // __attribute__((ms_abi))
+ CC_X86_64SysV, // __attribute__((sysv_abi))
+ CC_X86RegCall, // __attribute__((regcall))
+ CC_AAPCS, // __attribute__((pcs("aapcs")))
+ CC_AAPCS_VFP, // __attribute__((pcs("aapcs-vfp")))
+ CC_IntelOclBicc, // __attribute__((intel_ocl_bicc))
+ CC_SpirFunction, // default for OpenCL functions on SPIR target
+ CC_OpenCLKernel, // inferred for OpenCL kernels
+ CC_Swift, // __attribute__((swiftcall))
CC_SwiftAsync, // __attribute__((swiftasynccall))
- CC_PreserveMost, // __attribute__((preserve_most))
- CC_PreserveAll, // __attribute__((preserve_all))
+ CC_PreserveMost, // __attribute__((preserve_most))
+ CC_PreserveAll, // __attribute__((preserve_all))
CC_AArch64VectorCall, // __attribute__((aarch64_vector_pcs))
- CC_AArch64SVEPCS, // __attribute__((aarch64_sve_pcs))
- CC_AMDGPUKernelCall, // __attribute__((amdgpu_kernel))
- CC_M68kRTD, // __attribute__((m68k_rtd))
- CC_PreserveNone, // __attribute__((preserve_none))
+ CC_AArch64SVEPCS, // __attribute__((aarch64_sve_pcs))
+ CC_AMDGPUKernelCall, // __attribute__((amdgpu_kernel))
+ CC_M68kRTD, // __attribute__((m68k_rtd))
+ CC_PreserveNone, // __attribute__((preserve_none))
+ CC_RISCVVectorCall, // __attribute__((riscv_vector_cc))
};
/// Checks whether the given calling convention supports variadic
diff --git a/clang/include/clang/InstallAPI/HeaderFile.h b/clang/include/clang/InstallAPI/HeaderFile.h
index 235b4da3add8..c67503d4ad49 100644
--- a/clang/include/clang/InstallAPI/HeaderFile.h
+++ b/clang/include/clang/InstallAPI/HeaderFile.h
@@ -24,8 +24,6 @@
namespace clang::installapi {
enum class HeaderType {
- /// Unset or unknown type.
- Unknown,
/// Represents declarations accessible to all clients.
Public,
/// Represents declarations accessible to a disclosed set of clients.
@@ -33,6 +31,8 @@ enum class HeaderType {
/// Represents declarations only accessible as implementation details to the
/// input library.
Project,
+ /// Unset or unknown type.
+ Unknown,
};
inline StringRef getName(const HeaderType T) {
@@ -62,6 +62,8 @@ class HeaderFile {
bool Excluded{false};
/// Add header file to processing.
bool Extra{false};
+ /// Specify that header file is the umbrella header for library.
+ bool Umbrella{false};
public:
HeaderFile() = delete;
@@ -79,17 +81,21 @@ public:
void setExtra(bool V = true) { Extra = V; }
void setExcluded(bool V = true) { Excluded = V; }
+ void setUmbrellaHeader(bool V = true) { Umbrella = V; }
bool isExtra() const { return Extra; }
bool isExcluded() const { return Excluded; }
+ bool isUmbrellaHeader() const { return Umbrella; }
bool useIncludeName() const {
return Type != HeaderType::Project && !IncludeName.empty();
}
bool operator==(const HeaderFile &Other) const {
- return std::tie(Type, FullPath, IncludeName, Language, Excluded, Extra) ==
- std::tie(Other.Type, Other.FullPath, Other.IncludeName,
- Other.Language, Other.Excluded, Other.Extra);
+ return std::tie(Type, FullPath, IncludeName, Language, Excluded, Extra,
+ Umbrella) == std::tie(Other.Type, Other.FullPath,
+ Other.IncludeName, Other.Language,
+ Other.Excluded, Other.Extra,
+ Other.Umbrella);
}
};
diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h
index 5ecd2f9eb288..3a1abd4c7892 100644
--- a/clang/include/clang/Sema/Sema.h
+++ b/clang/include/clang/Sema/Sema.h
@@ -2234,7 +2234,8 @@ private:
bool CheckRISCVLMUL(CallExpr *TheCall, unsigned ArgNum);
bool CheckRISCVBuiltinFunctionCall(const TargetInfo &TI, unsigned BuiltinID,
CallExpr *TheCall);
- void checkRVVTypeSupport(QualType Ty, SourceLocation Loc, Decl *D);
+ void checkRVVTypeSupport(QualType Ty, SourceLocation Loc, Decl *D,
+ const llvm::StringMap<bool> &FeatureMap);
bool CheckLoongArchBuiltinFunctionCall(const TargetInfo &TI,
unsigned BuiltinID, CallExpr *TheCall);
bool CheckWebAssemblyBuiltinFunctionCall(const TargetInfo &TI,
diff --git a/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td b/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td
index bf46766d44b3..5fe5c9286dab 100644
--- a/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td
+++ b/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td
@@ -908,7 +908,7 @@ def PaddingChecker : Checker<"Padding">,
"24",
Released>
]>,
- Documentation<NotDocumented>;
+ Documentation<HasDocumentation>;
} // end: "padding"
diff --git a/clang/lib/AST/ItaniumMangle.cpp b/clang/lib/AST/ItaniumMangle.cpp
index f619d657ae9f..425f84e8af1f 100644
--- a/clang/lib/AST/ItaniumMangle.cpp
+++ b/clang/lib/AST/ItaniumMangle.cpp
@@ -3445,6 +3445,7 @@ StringRef CXXNameMangler::getCallingConvQualifierName(CallingConv CC) {
case CC_PreserveAll:
case CC_M68kRTD:
case CC_PreserveNone:
+ case CC_RISCVVectorCall:
// FIXME: we should be mangling all of the above.
return "";
diff --git a/clang/lib/AST/Type.cpp b/clang/lib/AST/Type.cpp
index d2ffb23845ac..8f3e26d46019 100644
--- a/clang/lib/AST/Type.cpp
+++ b/clang/lib/AST/Type.cpp
@@ -3484,6 +3484,9 @@ StringRef FunctionType::getNameForCallConv(CallingConv CC) {
case CC_PreserveAll: return "preserve_all";
case CC_M68kRTD: return "m68k_rtd";
case CC_PreserveNone: return "preserve_none";
+ // clang-format off
+ case CC_RISCVVectorCall: return "riscv_vector_cc";
+ // clang-format on
}
llvm_unreachable("Invalid calling convention.");
@@ -4074,6 +4077,7 @@ bool AttributedType::isCallingConv() const {
case attr::PreserveAll:
case attr::M68kRTD:
case attr::PreserveNone:
+ case attr::RISCVVectorCC:
return true;
}
llvm_unreachable("invalid attr kind");
diff --git a/clang/lib/AST/TypePrinter.cpp b/clang/lib/AST/TypePrinter.cpp
index f176d043d525..0aa1d9327d77 100644
--- a/clang/lib/AST/TypePrinter.cpp
+++ b/clang/lib/AST/TypePrinter.cpp
@@ -1071,6 +1071,9 @@ void TypePrinter::printFunctionAfter(const FunctionType::ExtInfo &Info,
case CC_PreserveNone:
OS << " __attribute__((preserve_none))";
break;
+ case CC_RISCVVectorCall:
+ OS << "__attribute__((riscv_vector_cc))";
+ break;
}
}
@@ -1960,6 +1963,9 @@ void TypePrinter::printAttributedAfter(const AttributedType *T,
case attr::PreserveNone:
OS << "preserve_none";
break;
+ case attr::RISCVVectorCC:
+ OS << "riscv_vector_cc";
+ break;
case attr::NoDeref:
OS << "noderef";
break;
diff --git a/clang/lib/Basic/Targets/RISCV.cpp b/clang/lib/Basic/Targets/RISCV.cpp
index a6d4af2b8811..f3d705e1551f 100644
--- a/clang/lib/Basic/Targets/RISCV.cpp
+++ b/clang/lib/Basic/Targets/RISCV.cpp
@@ -467,3 +467,14 @@ ParsedTargetAttr RISCVTargetInfo::parseTargetAttr(StringRef Features) const {
}
return Ret;
}
+
+TargetInfo::CallingConvCheckResult
+RISCVTargetInfo::checkCallingConvention(CallingConv CC) const {
+ switch (CC) {
+ default:
+ return CCCR_Warning;
+ case CC_C:
+ case CC_RISCVVectorCall:
+ return CCCR_OK;
+ }
+}
diff --git a/clang/lib/Basic/Targets/RISCV.h b/clang/lib/Basic/Targets/RISCV.h
index bfbdafb682c8..78580b5b1c10 100644
--- a/clang/lib/Basic/Targets/RISCV.h
+++ b/clang/lib/Basic/Targets/RISCV.h
@@ -110,6 +110,8 @@ public:
bool hasBFloat16Type() const override { return true; }
+ CallingConvCheckResult checkCallingConvention(CallingConv CC) const override;
+
bool useFP16ConversionIntrinsics() const override {
return false;
}
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 3cfdb261a0ea..fdb517eb254d 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -792,7 +792,8 @@ EncompassingIntegerType(ArrayRef<struct WidthAndSignedness> Types) {
Value *CodeGenFunction::EmitVAStartEnd(Value *ArgValue, bool IsStart) {
Intrinsic::ID inst = IsStart ? Intrinsic::vastart : Intrinsic::vaend;
- return Builder.CreateCall(CGM.getIntrinsic(inst), ArgValue);
+ return Builder.CreateCall(CGM.getIntrinsic(inst, {ArgValue->getType()}),
+ ArgValue);
}
/// Checks if using the result of __builtin_object_size(p, @p From) in place of
@@ -3018,7 +3019,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
case Builtin::BI__builtin_va_copy: {
Value *DstPtr = EmitVAListRef(E->getArg(0)).getPointer();
Value *SrcPtr = EmitVAListRef(E->getArg(1)).getPointer();
- Builder.CreateCall(CGM.getIntrinsic(Intrinsic::vacopy), {DstPtr, SrcPtr});
+ Builder.CreateCall(CGM.getIntrinsic(Intrinsic::vacopy, {DstPtr->getType()}),
+ {DstPtr, SrcPtr});
return RValue::get(nullptr);
}
case Builtin::BIabs:
diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp
index 475d96b0e87d..b8adf5c26b3a 100644
--- a/clang/lib/CodeGen/CGCall.cpp
+++ b/clang/lib/CodeGen/CGCall.cpp
@@ -74,6 +74,9 @@ unsigned CodeGenTypes::ClangCallConvToLLVMCallConv(CallingConv CC) {
case CC_SwiftAsync: return llvm::CallingConv::SwiftTail;
case CC_M68kRTD: return llvm::CallingConv::M68k_RTD;
case CC_PreserveNone: return llvm::CallingConv::PreserveNone;
+ // clang-format off
+ case CC_RISCVVectorCall: return llvm::CallingConv::RISCV_VectorCall;
+ // clang-format on
}
}
@@ -260,6 +263,9 @@ static CallingConv getCallingConventionForDecl(const ObjCMethodDecl *D,
if (D->hasAttr<PreserveNoneAttr>())
return CC_PreserveNone;
+ if (D->hasAttr<RISCVVectorCCAttr>())
+ return CC_RISCVVectorCall;
+
return CC_C;
}
diff --git a/clang/lib/CodeGen/CGDebugInfo.cpp b/clang/lib/CodeGen/CGDebugInfo.cpp
index 0e20de2005b2..2a385d85aa2b 100644
--- a/clang/lib/CodeGen/CGDebugInfo.cpp
+++ b/clang/lib/CodeGen/CGDebugInfo.cpp
@@ -1452,6 +1452,8 @@ static unsigned getDwarfCC(CallingConv CC) {
return llvm::dwarf::DW_CC_LLVM_M68kRTD;
case CC_PreserveNone:
return llvm::dwarf::DW_CC_LLVM_PreserveNone;
+ case CC_RISCVVectorCall:
+ return llvm::dwarf::DW_CC_LLVM_RISCVVectorCall;
}
return 0;
}
diff --git a/clang/lib/Sema/Sema.cpp b/clang/lib/Sema/Sema.cpp
index b55f433a8be7..72393bea6205 100644
--- a/clang/lib/Sema/Sema.cpp
+++ b/clang/lib/Sema/Sema.cpp
@@ -2065,8 +2065,11 @@ void Sema::checkTypeSupport(QualType Ty, SourceLocation Loc, ValueDecl *D) {
targetDiag(D->getLocation(), diag::note_defined_here, FD) << D;
}
- if (TI.hasRISCVVTypes() && Ty->isRVVSizelessBuiltinType())
- checkRVVTypeSupport(Ty, Loc, D);
+ if (TI.hasRISCVVTypes() && Ty->isRVVSizelessBuiltinType() && FD) {
+ llvm::StringMap<bool> CallerFeatureMap;
+ Context.getFunctionFeatureMap(CallerFeatureMap, FD);
+ checkRVVTypeSupport(Ty, Loc, D, CallerFeatureMap);
+ }
// Don't allow SVE types in functions without a SVE target.
if (Ty->isSVESizelessBuiltinType() && FD && FD->hasBody()) {
diff --git a/clang/lib/Sema/SemaAPINotes.cpp b/clang/lib/Sema/SemaAPINotes.cpp
index 836c633e9d20..a3128306c664 100644
--- a/clang/lib/Sema/SemaAPINotes.cpp
+++ b/clang/lib/Sema/SemaAPINotes.cpp
@@ -52,49 +52,54 @@ static void applyNullability(Sema &S, Decl *D, NullabilityKind Nullability,
if (!Metadata.IsActive)
return;
- auto IsModified = [&](Decl *D, QualType QT,
- NullabilityKind Nullability) -> bool {
+ auto GetModified =
+ [&](Decl *D, QualType QT,
+ NullabilityKind Nullability) -> std::optional<QualType> {
QualType Original = QT;
S.CheckImplicitNullabilityTypeSpecifier(QT, Nullability, D->getLocation(),
isa<ParmVarDecl>(D),
/*OverrideExisting=*/true);
- return QT.getTypePtr() != Original.getTypePtr();
+ return (QT.getTypePtr() != Original.getTypePtr()) ? std::optional(QT)
+ : std::nullopt;
};
if (auto Function = dyn_cast<FunctionDecl>(D)) {
- if (IsModified(D, Function->getReturnType(), Nullability)) {
- QualType FnType = Function->getType();
- Function->setType(FnType);
+ if (auto Modified =
+ GetModified(D, Function->getReturnType(), Nullability)) {
+ const FunctionType *FnType = Function->getType()->castAs<FunctionType>();
+ if (const FunctionProtoType *proto = dyn_cast<FunctionProtoType>(FnType))
+ Function->setType(S.Context.getFunctionType(
+ *Modified, proto->getParamTypes(), proto->getExtProtoInfo()));
+ else
+ Function->setType(
+ S.Context.getFunctionNoProtoType(*Modified, FnType->getExtInfo()));
}
} else if (auto Method = dyn_cast<ObjCMethodDecl>(D)) {
- QualType Type = Method->getReturnType();
- if (IsModified(D, Type, Nullability)) {
- Method->setReturnType(Type);
+ if (auto Modified = GetModified(D, Method->getReturnType(), Nullability)) {
+ Method->setReturnType(*Modified);
// Make it a context-sensitive keyword if we can.
- if (!isIndirectPointerType(Type))
+ if (!isIndirectPointerType(*Modified))
Method->setObjCDeclQualifier(Decl::ObjCDeclQualifier(
Method->getObjCDeclQualifier() | Decl::OBJC_TQ_CSNullability));
}
} else if (auto Value = dyn_cast<ValueDecl>(D)) {
- QualType Type = Value->getType();
- if (IsModified(D, Type, Nullability)) {
- Value->setType(Type);
+ if (auto Modified = GetModified(D, Value->getType(), Nullability)) {
+ Value->setType(*Modified);
// Make it a context-sensitive keyword if we can.
if (auto Parm = dyn_cast<ParmVarDecl>(D)) {
- if (Parm->isObjCMethodParameter() && !isIndirectPointerType(Type))
+ if (Parm->isObjCMethodParameter() && !isIndirectPointerType(*Modified))
Parm->setObjCDeclQualifier(Decl::ObjCDeclQualifier(
Parm->getObjCDeclQualifier() | Decl::OBJC_TQ_CSNullability));
}
}
} else if (auto Property = dyn_cast<ObjCPropertyDecl>(D)) {
- QualType Type = Property->getType();
- if (IsModified(D, Type, Nullability)) {
- Property->setType(Type, Property->getTypeSourceInfo());
+ if (auto Modified = GetModified(D, Property->getType(), Nullability)) {
+ Property->setType(*Modified, Property->getTypeSourceInfo());
// Make it a property attribute if we can.
- if (!isIndirectPointerType(Type))
+ if (!isIndirectPointerType(*Modified))
Property->setPropertyAttributes(
ObjCPropertyAttribute::kind_null_resettable);
}
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index 084495813309..447e73686b4f 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -5760,57 +5760,6 @@ static bool CheckInvalidVLENandLMUL(const TargetInfo &TI, CallExpr *TheCall,
bool Sema::CheckRISCVBuiltinFunctionCall(const TargetInfo &TI,
unsigned BuiltinID,
CallExpr *TheCall) {
- // CodeGenFunction can also detect this, but this gives a better error
- // message.
- bool FeatureMissing = false;
- SmallVector<StringRef> ReqFeatures;
- StringRef Features = Context.BuiltinInfo.getRequiredFeatures(BuiltinID);
- Features.split(ReqFeatures, ',', -1, false);
-
- // Check if each required feature is included
- for (StringRef F : ReqFeatures) {
- SmallVector<StringRef> ReqOpFeatures;
- F.split(ReqOpFeatures, '|');
-
- if (llvm::none_of(ReqOpFeatures,
- [&TI](StringRef OF) { return TI.hasFeature(OF); })) {
- std::string FeatureStrs;
- bool IsExtension = true;
- for (StringRef OF : ReqOpFeatures) {
- // If the feature is 64bit, alter the string so it will print better in
- // the diagnostic.
- if (OF == "64bit") {
- assert(ReqOpFeatures.size() == 1 && "Expected '64bit' to be alone");
- OF = "RV64";
- IsExtension = false;
- }
- if (OF == "32bit") {
- assert(ReqOpFeatures.size() == 1 && "Expected '32bit' to be alone");
- OF = "RV32";
- IsExtension = false;
- }
-
- // Convert features like "zbr" and "experimental-zbr" to "Zbr".
- OF.consume_front("experimental-");
- std::string FeatureStr = OF.str();
- FeatureStr[0] = std::toupper(FeatureStr[0]);
- // Combine strings.
- FeatureStrs += FeatureStrs.empty() ? "" : ", ";
- FeatureStrs += "'";
- FeatureStrs += FeatureStr;
- FeatureStrs += "'";
- }
- // Error message
- FeatureMissing = true;
- Diag(TheCall->getBeginLoc(), diag::err_riscv_builtin_requires_extension)
- << IsExtension
- << TheCall->getSourceRange() << StringRef(FeatureStrs);
- }
- }
-
- if (FeatureMissing)
- return true;
-
// vmulh.vv, vmulh.vx, vmulhu.vv, vmulhu.vx, vmulhsu.vv, vmulhsu.vx,
// vsmul.vv, vsmul.vx are not included for EEW=64 in Zve64*.
switch (BuiltinID) {
@@ -6714,36 +6663,35 @@ bool Sema::CheckWebAssemblyBuiltinFunctionCall(const TargetInfo &TI,
return false;
}
-void Sema::checkRVVTypeSupport(QualType Ty, SourceLocation Loc, Decl *D) {
- const TargetInfo &TI = Context.getTargetInfo();
-
+void Sema::checkRVVTypeSupport(QualType Ty, SourceLocation Loc, Decl *D,
+ const llvm::StringMap<bool> &FeatureMap) {
ASTContext::BuiltinVectorTypeInfo Info =
Context.getBuiltinVectorTypeInfo(Ty->castAs<BuiltinType>());
unsigned EltSize = Context.getTypeSize(Info.ElementType);
unsigned MinElts = Info.EC.getKnownMinValue();
if (Info.ElementType->isSpecificBuiltinType(BuiltinType::Double) &&
- !TI.hasFeature("zve64d"))
+ !FeatureMap.lookup("zve64d"))
Diag(Loc, diag::err_riscv_type_requires_extension, D) << Ty << "zve64d";
// (ELEN, LMUL) pairs of (8, mf8), (16, mf4), (32, mf2), (64, m1) requires at
// least zve64x
else if (((EltSize == 64 && Info.ElementType->isIntegerType()) ||
MinElts == 1) &&
- !TI.hasFeature("zve64x"))
+ !FeatureMap.lookup("zve64x"))
Diag(Loc, diag::err_riscv_type_requires_extension, D) << Ty << "zve64x";
- else if (Info.ElementType->isFloat16Type() && !TI.hasFeature("zvfh") &&
- !TI.hasFeature("zvfhmin"))
+ else if (Info.ElementType->isFloat16Type() && !FeatureMap.lookup("zvfh") &&
+ !FeatureMap.lookup("zvfhmin"))
Diag(Loc, diag::err_riscv_type_requires_extension, D)
<< Ty << "zvfh or zvfhmin";
else if (Info.ElementType->isBFloat16Type() &&
- !TI.hasFeature("experimental-zvfbfmin"))
+ !FeatureMap.lookup("experimental-zvfbfmin"))
Diag(Loc, diag::err_riscv_type_requires_extension, D) << Ty << "zvfbfmin";
else if (Info.ElementType->isSpecificBuiltinType(BuiltinType::Float) &&
- !TI.hasFeature("zve32f"))
+ !FeatureMap.lookup("zve32f"))
Diag(Loc, diag::err_riscv_type_requires_extension, D) << Ty << "zve32f";
// Given that caller already checked isRVVType() before calling this function,
// if we don't have at least zve32x supported, then we need to emit error.
- else if (!TI.hasFeature("zve32x"))
+ else if (!FeatureMap.lookup("zve32x"))
Diag(Loc, diag::err_riscv_type_requires_extension, D) << Ty << "zve32x";
}
diff --git a/clang/lib/Sema/SemaConcept.cpp b/clang/lib/Sema/SemaConcept.cpp
index 1c546e9f5894..b6c4d3d540ef 100644
--- a/clang/lib/Sema/SemaConcept.cpp
+++ b/clang/lib/Sema/SemaConcept.cpp
@@ -1269,10 +1269,18 @@ substituteParameterMappings(Sema &S, NormalizedConstraint &N,
: SourceLocation()));
Atomic.ParameterMapping.emplace(TempArgs, OccurringIndices.count());
}
+ SourceLocation InstLocBegin =
+ ArgsAsWritten->arguments().empty()
+ ? ArgsAsWritten->getLAngleLoc()
+ : ArgsAsWritten->arguments().front().getSourceRange().getBegin();
+ SourceLocation InstLocEnd =
+ ArgsAsWritten->arguments().empty()
+ ? ArgsAsWritten->getRAngleLoc()
+ : ArgsAsWritten->arguments().front().getSourceRange().getEnd();
Sema::InstantiatingTemplate Inst(
- S, ArgsAsWritten->arguments().front().getSourceRange().getBegin(),
+ S, InstLocBegin,
Sema::InstantiatingTemplate::ParameterMappingSubstitution{}, Concept,
- ArgsAsWritten->arguments().front().getSourceRange());
+ {InstLocBegin, InstLocEnd});
if (S.SubstTemplateArguments(*Atomic.ParameterMapping, MLTAL, SubstArgs))
return true;
diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp
index 66aad2592cb3..8b44d24f5273 100644
--- a/clang/lib/Sema/SemaDecl.cpp
+++ b/clang/lib/Sema/SemaDecl.cpp
@@ -8962,8 +8962,13 @@ void Sema::CheckVariableDeclarationType(VarDecl *NewVD) {
}
}
- if (T->isRVVSizelessBuiltinType())
- checkRVVTypeSupport(T, NewVD->getLocation(), cast<Decl>(CurContext));
+ if (T->isRVVSizelessBuiltinType() && isa<FunctionDecl>(CurContext)) {
+ const FunctionDecl *FD = cast<FunctionDecl>(CurContext);
+ llvm::StringMap<bool> CallerFeatureMap;
+ Context.getFunctionFeatureMap(CallerFeatureMap, FD);
+ checkRVVTypeSupport(T, NewVD->getLocation(), cast<Decl>(CurContext),
+ CallerFeatureMap);
+ }
}
/// Perform semantic checking on a newly-created variable
diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp
index 0a62c656d824..f25f3afd0f4a 100644
--- a/clang/lib/Sema/SemaDeclAttr.cpp
+++ b/clang/lib/Sema/SemaDeclAttr.cpp
@@ -5271,6 +5271,9 @@ static void handleCallConvAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
case ParsedAttr::AT_PreserveNone:
D->addAttr(::new (S.Context) PreserveNoneAttr(S.Context, AL));
return;
+ case ParsedAttr::AT_RISCVVectorCC:
+ D->addAttr(::new (S.Context) RISCVVectorCCAttr(S.Context, AL));
+ return;
default:
llvm_unreachable("unexpected attribute kind");
}
@@ -5475,6 +5478,9 @@ bool Sema::CheckCallingConvAttr(const ParsedAttr &Attrs, CallingConv &CC,
case ParsedAttr::AT_PreserveNone:
CC = CC_PreserveNone;
break;
+ case ParsedAttr::AT_RISCVVectorCC:
+ CC = CC_RISCVVectorCall;
+ break;
default: llvm_unreachable("unexpected attribute kind");
}
@@ -9637,6 +9643,7 @@ ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D, const ParsedAttr &AL,
case ParsedAttr::AT_AMDGPUKernelCall:
case ParsedAttr::AT_M68kRTD:
case ParsedAttr::AT_PreserveNone:
+ case ParsedAttr::AT_RISCVVectorCC:
handleCallConvAttr(S, D, AL);
break;
case ParsedAttr::AT_Suppress:
diff --git a/clang/lib/Sema/SemaObjCProperty.cpp b/clang/lib/Sema/SemaObjCProperty.cpp
index 4636d89ebf2b..f9e1ad0121e2 100644
--- a/clang/lib/Sema/SemaObjCProperty.cpp
+++ b/clang/lib/Sema/SemaObjCProperty.cpp
@@ -638,8 +638,6 @@ ObjCPropertyDecl *Sema::CreatePropertyDecl(Scope *S,
PDecl->setInvalidDecl();
}
- ProcessDeclAttributes(S, PDecl, FD.D);
-
// Regardless of setter/getter attribute, we save the default getter/setter
// selector names in anticipation of declaration of setter/getter methods.
PDecl->setGetterName(GetterSel, GetterNameLoc);
@@ -647,6 +645,8 @@ ObjCPropertyDecl *Sema::CreatePropertyDecl(Scope *S,
PDecl->setPropertyAttributesAsWritten(
makePropertyAttributesAsWritten(AttributesAsWritten));
+ ProcessDeclAttributes(S, PDecl, FD.D);
+
if (Attributes & ObjCPropertyAttribute::kind_readonly)
PDecl->setPropertyAttributes(ObjCPropertyAttribute::kind_readonly);
diff --git a/clang/lib/Sema/SemaType.cpp b/clang/lib/Sema/SemaType.cpp
index d7521a5363a3..fd94caa4e1d4 100644
--- a/clang/lib/Sema/SemaType.cpp
+++ b/clang/lib/Sema/SemaType.cpp
@@ -138,7 +138,8 @@ static void diagnoseBadTypeAttribute(Sema &S, const ParsedAttr &attr,
case ParsedAttr::AT_PreserveMost: \
case ParsedAttr::AT_PreserveAll: \
case ParsedAttr::AT_M68kRTD: \
- case ParsedAttr::AT_PreserveNone
+ case ParsedAttr::AT_PreserveNone: \
+ case ParsedAttr::AT_RISCVVectorCC
// Function type attributes.
#define FUNCTION_TYPE_ATTRS_CASELIST \
@@ -7939,6 +7940,8 @@ static Attr *getCCTypeAttr(ASTContext &Ctx, ParsedAttr &Attr) {
return createSimpleAttr<M68kRTDAttr>(Ctx, Attr);
case ParsedAttr::AT_PreserveNone:
return createSimpleAttr<PreserveNoneAttr>(Ctx, Attr);
+ case ParsedAttr::AT_RISCVVectorCC:
+ return createSimpleAttr<RISCVVectorCCAttr>(Ctx, Attr);
}
llvm_unreachable("unexpected attribute kind!");
}
diff --git a/clang/test/APINotes/Inputs/APINotes/SomeOtherKit.apinotes b/clang/test/APINotes/Inputs/APINotes/SomeOtherKit.apinotes
new file mode 100644
index 000000000000..ccdc4e15d34d
--- /dev/null
+++ b/clang/test/APINotes/Inputs/APINotes/SomeOtherKit.apinotes
@@ -0,0 +1,8 @@
+Name: SomeOtherKit
+Classes:
+ - Name: A
+ Methods:
+ - Selector: "methodB"
+ MethodKind: Instance
+ Availability: none
+ AvailabilityMsg: "anything but this"
diff --git a/clang/test/APINotes/Inputs/BrokenHeaders/APINotes.apinotes b/clang/test/APINotes/Inputs/BrokenHeaders/APINotes.apinotes
new file mode 100644
index 000000000000..cd5475b13423
--- /dev/null
+++ b/clang/test/APINotes/Inputs/BrokenHeaders/APINotes.apinotes
@@ -0,0 +1,5 @@
+Name: SomeBrokenLib
+Functions:
+ - Name: do_something_with_pointers
+ Nu llabilityOfRet: O
+ # the space is intentional, to make sure we don't crash on malformed API Notes
diff --git a/clang/test/APINotes/Inputs/BrokenHeaders/SomeBrokenLib.h b/clang/test/APINotes/Inputs/BrokenHeaders/SomeBrokenLib.h
new file mode 100644
index 000000000000..b09c6f63eae0
--- /dev/null
+++ b/clang/test/APINotes/Inputs/BrokenHeaders/SomeBrokenLib.h
@@ -0,0 +1,6 @@
+#ifndef SOME_BROKEN_LIB_H
+#define SOME_BROKEN_LIB_H
+
+void do_something_with_pointers(int *ptr1, int *ptr2);
+
+#endif // SOME_BROKEN_LIB_H
diff --git a/clang/test/APINotes/Inputs/BrokenHeaders2/APINotes.apinotes b/clang/test/APINotes/Inputs/BrokenHeaders2/APINotes.apinotes
new file mode 100644
index 000000000000..33eeaaada999
--- /dev/null
+++ b/clang/test/APINotes/Inputs/BrokenHeaders2/APINotes.apinotes
@@ -0,0 +1,7 @@
+Name: SomeBrokenLib
+Functions:
+ - Name: do_something_with_pointers
+ NullabilityOfRet: O
+ - Name: do_something_with_pointers
+ NullabilityOfRet: O
+
diff --git a/clang/test/APINotes/Inputs/BrokenHeaders2/SomeBrokenLib.h b/clang/test/APINotes/Inputs/BrokenHeaders2/SomeBrokenLib.h
new file mode 100644
index 000000000000..b09c6f63eae0
--- /dev/null
+++ b/clang/test/APINotes/Inputs/BrokenHeaders2/SomeBrokenLib.h
@@ -0,0 +1,6 @@
+#ifndef SOME_BROKEN_LIB_H
+#define SOME_BROKEN_LIB_H
+
+void do_something_with_pointers(int *ptr1, int *ptr2);
+
+#endif // SOME_BROKEN_LIB_H
diff --git a/clang/test/APINotes/Inputs/Frameworks/FrameworkWithActualPrivateModule.framework/Headers/FrameworkWithActualPrivateModule.h b/clang/test/APINotes/Inputs/Frameworks/FrameworkWithActualPrivateModule.framework/Headers/FrameworkWithActualPrivateModule.h
new file mode 100644
index 000000000000..523de4f7ce08
--- /dev/null
+++ b/clang/test/APINotes/Inputs/Frameworks/FrameworkWithActualPrivateModule.framework/Headers/FrameworkWithActualPrivateModule.h
@@ -0,0 +1 @@
+extern int FrameworkWithActualPrivateModule;
diff --git a/clang/test/APINotes/Inputs/Frameworks/FrameworkWithActualPrivateModule.framework/Modules/module.modulemap b/clang/test/APINotes/Inputs/Frameworks/FrameworkWithActualPrivateModule.framework/Modules/module.modulemap
new file mode 100644
index 000000000000..859d723716be
--- /dev/null
+++ b/clang/test/APINotes/Inputs/Frameworks/FrameworkWithActualPrivateModule.framework/Modules/module.modulemap
@@ -0,0 +1,5 @@
+framework module FrameworkWithActualPrivateModule {
+ umbrella header "FrameworkWithActualPrivateModule.h"
+ export *
+ module * { export * }
+}
diff --git a/clang/test/APINotes/Inputs/Frameworks/FrameworkWithActualPrivateModule.framework/Modules/module.private.modulemap b/clang/test/APINotes/Inputs/Frameworks/FrameworkWithActualPrivateModule.framework/Modules/module.private.modulemap
new file mode 100644
index 000000000000..e7fafe3bcbb1
--- /dev/null
+++ b/clang/test/APINotes/Inputs/Frameworks/FrameworkWithActualPrivateModule.framework/Modules/module.private.modulemap
@@ -0,0 +1,5 @@
+framework module FrameworkWithActualPrivateModule_Private {
+ umbrella header "FrameworkWithActualPrivateModule_Private.h"
+ export *
+ module * { export * }
+}
diff --git a/clang/test/APINotes/Inputs/Frameworks/FrameworkWithActualPrivateModule.framework/PrivateHeaders/FrameworkWithActualPrivateModule_Private.apinotes b/clang/test/APINotes/Inputs/Frameworks/FrameworkWithActualPrivateModule.framework/PrivateHeaders/FrameworkWithActualPrivateModule_Private.apinotes
new file mode 100644
index 000000000000..831cf1e93d35
--- /dev/null
+++ b/clang/test/APINotes/Inputs/Frameworks/FrameworkWithActualPrivateModule.framework/PrivateHeaders/FrameworkWithActualPrivateModule_Private.apinotes
@@ -0,0 +1 @@
+Name: FrameworkWithActualPrivateModule_Private
diff --git a/clang/test/APINotes/Inputs/Frameworks/FrameworkWithActualPrivateModule.framework/PrivateHeaders/FrameworkWithActualPrivateModule_Private.h b/clang/test/APINotes/Inputs/Frameworks/FrameworkWithActualPrivateModule.framework/PrivateHeaders/FrameworkWithActualPrivateModule_Private.h
new file mode 100644
index 000000000000..c07a3e95d740
--- /dev/null
+++ b/clang/test/APINotes/Inputs/Frameworks/FrameworkWithActualPrivateModule.framework/PrivateHeaders/FrameworkWithActualPrivateModule_Private.h
@@ -0,0 +1,2 @@
+#include <FrameworkWithActualPrivateModule/FrameworkWithActualPrivateModule.h>
+extern int FrameworkWithActualPrivateModule_Private;
diff --git a/clang/test/APINotes/Inputs/Frameworks/FrameworkWithWrongCase.framework/Headers/FrameworkWithWrongCase.h b/clang/test/APINotes/Inputs/Frameworks/FrameworkWithWrongCase.framework/Headers/FrameworkWithWrongCase.h
new file mode 100644
index 000000000000..4f3b631c27e3
--- /dev/null
+++ b/clang/test/APINotes/Inputs/Frameworks/FrameworkWithWrongCase.framework/Headers/FrameworkWithWrongCase.h
@@ -0,0 +1 @@
+extern int FrameworkWithWrongCase;
diff --git a/clang/test/APINotes/Inputs/Frameworks/FrameworkWithWrongCase.framework/Modules/module.modulemap b/clang/test/APINotes/Inputs/Frameworks/FrameworkWithWrongCase.framework/Modules/module.modulemap
new file mode 100644
index 000000000000..e97d361039a1
--- /dev/null
+++ b/clang/test/APINotes/Inputs/Frameworks/FrameworkWithWrongCase.framework/Modules/module.modulemap
@@ -0,0 +1,5 @@
+framework module FrameworkWithWrongCase {
+ umbrella header "FrameworkWithWrongCase.h"
+ export *
+ module * { export * }
+}
diff --git a/clang/test/APINotes/Inputs/Frameworks/FrameworkWithWrongCase.framework/PrivateHeaders/FrameworkWithWrongCase_Private.apinotes b/clang/test/APINotes/Inputs/Frameworks/FrameworkWithWrongCase.framework/PrivateHeaders/FrameworkWithWrongCase_Private.apinotes
new file mode 100644
index 000000000000..ae5447c61e33
--- /dev/null
+++ b/clang/test/APINotes/Inputs/Frameworks/FrameworkWithWrongCase.framework/PrivateHeaders/FrameworkWithWrongCase_Private.apinotes
@@ -0,0 +1 @@
+Name: FrameworkWithWrongCase
diff --git a/clang/test/APINotes/Inputs/Frameworks/FrameworkWithWrongCasePrivate.framework/Headers/FrameworkWithWrongCasePrivate.h b/clang/test/APINotes/Inputs/Frameworks/FrameworkWithWrongCasePrivate.framework/Headers/FrameworkWithWrongCasePrivate.h
new file mode 100644
index 000000000000..d3d61483191c
--- /dev/null
+++ b/clang/test/APINotes/Inputs/Frameworks/FrameworkWithWrongCasePrivate.framework/Headers/FrameworkWithWrongCasePrivate.h
@@ -0,0 +1 @@
+extern int FrameworkWithWrongCasePrivate;
diff --git a/clang/test/APINotes/Inputs/Frameworks/FrameworkWithWrongCasePrivate.framework/Modules/module.modulemap b/clang/test/APINotes/Inputs/Frameworks/FrameworkWithWrongCasePrivate.framework/Modules/module.modulemap
new file mode 100644
index 000000000000..04b96adbbfeb
--- /dev/null
+++ b/clang/test/APINotes/Inputs/Frameworks/FrameworkWithWrongCasePrivate.framework/Modules/module.modulemap
@@ -0,0 +1,5 @@
+framework module FrameworkWithWrongCasePrivate {
+ umbrella header "FrameworkWithWrongCasePrivate.h"
+ export *
+ module * { export * }
+}
diff --git a/clang/test/APINotes/Inputs/Frameworks/FrameworkWithWrongCasePrivate.framework/Modules/module.private.modulemap b/clang/test/APINotes/Inputs/Frameworks/FrameworkWithWrongCasePrivate.framework/Modules/module.private.modulemap
new file mode 100644
index 000000000000..d6ad53cdc717
--- /dev/null
+++ b/clang/test/APINotes/Inputs/Frameworks/FrameworkWithWrongCasePrivate.framework/Modules/module.private.modulemap
@@ -0,0 +1 @@
+module FrameworkWithWrongCasePrivate.Inner {}
diff --git a/clang/test/APINotes/Inputs/Frameworks/FrameworkWithWrongCasePrivate.framework/PrivateHeaders/FrameworkWithWrongCasePrivate_Private.apinotes b/clang/test/APINotes/Inputs/Frameworks/FrameworkWithWrongCasePrivate.framework/PrivateHeaders/FrameworkWithWrongCasePrivate_Private.apinotes
new file mode 100644
index 000000000000..d7af293e8125
--- /dev/null
+++ b/clang/test/APINotes/Inputs/Frameworks/FrameworkWithWrongCasePrivate.framework/PrivateHeaders/FrameworkWithWrongCasePrivate_Private.apinotes
@@ -0,0 +1 @@
+Name: FrameworkWithWrongCasePrivate
diff --git a/clang/test/APINotes/Inputs/Frameworks/LayeredKit.framework/Headers/LayeredKit.h b/clang/test/APINotes/Inputs/Frameworks/LayeredKit.framework/Headers/LayeredKit.h
new file mode 100644
index 000000000000..a95d19ecbe9a
--- /dev/null
+++ b/clang/test/APINotes/Inputs/Frameworks/LayeredKit.framework/Headers/LayeredKit.h
@@ -0,0 +1,11 @@
+@import LayeredKitImpl;
+
+// @interface declarations already don't inherit attributes from forward
+// declarations, so in order to test this properly we have to /not/ define
+// UpwardClass anywhere.
+
+// @interface UpwardClass
+// @end
+
+@protocol UpwardProto
+@end
diff --git a/clang/test/APINotes/Inputs/Frameworks/LayeredKit.framework/Modules/module.modulemap b/clang/test/APINotes/Inputs/Frameworks/LayeredKit.framework/Modules/module.modulemap
new file mode 100644
index 000000000000..04bbe72a2b6e
--- /dev/null
+++ b/clang/test/APINotes/Inputs/Frameworks/LayeredKit.framework/Modules/module.modulemap
@@ -0,0 +1,5 @@
+framework module LayeredKit {
+ umbrella header "LayeredKit.h"
+ export *
+ module * { export * }
+}
diff --git a/clang/test/APINotes/Inputs/Frameworks/LayeredKitImpl.framework/Headers/LayeredKitImpl.apinotes b/clang/test/APINotes/Inputs/Frameworks/LayeredKitImpl.framework/Headers/LayeredKitImpl.apinotes
new file mode 100644
index 000000000000..bece28cfe605
--- /dev/null
+++ b/clang/test/APINotes/Inputs/Frameworks/LayeredKitImpl.framework/Headers/LayeredKitImpl.apinotes
@@ -0,0 +1,9 @@
+Name: LayeredKitImpl
+Classes:
+- Name: PerfectlyNormalClass
+ Availability: none
+- Name: UpwardClass
+ Availability: none
+Protocols:
+- Name: UpwardProto
+ Availability: none
diff --git a/clang/test/APINotes/Inputs/Frameworks/LayeredKitImpl.framework/Headers/LayeredKitImpl.h b/clang/test/APINotes/Inputs/Frameworks/LayeredKitImpl.framework/Headers/LayeredKitImpl.h
new file mode 100644
index 000000000000..99591d35803a
--- /dev/null
+++ b/clang/test/APINotes/Inputs/Frameworks/LayeredKitImpl.framework/Headers/LayeredKitImpl.h
@@ -0,0 +1,7 @@
+@protocol UpwardProto;
+@class UpwardClass;
+
+@interface PerfectlyNormalClass
+@end
+
+void doImplementationThings(UpwardClass *first, id <UpwardProto> second) __attribute((unavailable));
diff --git a/clang/test/APINotes/Inputs/Frameworks/LayeredKitImpl.framework/Modules/module.modulemap b/clang/test/APINotes/Inputs/Frameworks/LayeredKitImpl.framework/Modules/module.modulemap
new file mode 100644
index 000000000000..58a6e55c1067
--- /dev/null
+++ b/clang/test/APINotes/Inputs/Frameworks/LayeredKitImpl.framework/Modules/module.modulemap
@@ -0,0 +1,5 @@
+framework module LayeredKitImpl {
+ umbrella header "LayeredKitImpl.h"
+ export *
+ module * { export * }
+}
diff --git a/clang/test/APINotes/Inputs/Frameworks/SimpleKit.framework/Modules/module.modulemap b/clang/test/APINotes/Inputs/Frameworks/SimpleKit.framework/Modules/module.modulemap
new file mode 100644
index 000000000000..2d07e76c0a14
--- /dev/null
+++ b/clang/test/APINotes/Inputs/Frameworks/SimpleKit.framework/Modules/module.modulemap
@@ -0,0 +1,5 @@
+framework module SimpleKit {
+ umbrella header "SimpleKit.h"
+ export *
+ module * { export * }
+}
diff --git a/clang/test/APINotes/Inputs/Frameworks/SomeKit.framework/APINotes/SomeKit.apinotes b/clang/test/APINotes/Inputs/Frameworks/SomeKit.framework/APINotes/SomeKit.apinotes
new file mode 100644
index 000000000000..817af123fc77
--- /dev/null
+++ b/clang/test/APINotes/Inputs/Frameworks/SomeKit.framework/APINotes/SomeKit.apinotes
@@ -0,0 +1,74 @@
+Name: SomeKit
+Classes:
+ - Name: A
+ Methods:
+ - Selector: "transform:"
+ MethodKind: Instance
+ Availability: none
+ AvailabilityMsg: "anything but this"
+ - Selector: "transform:integer:"
+ MethodKind: Instance
+ NullabilityOfRet: N
+ Nullability: [ N, S ]
+ Properties:
+ - Name: intValue
+ PropertyKind: Instance
+ Availability: none
+ AvailabilityMsg: "wouldn't work anyway"
+ - Name: nonnullAInstance
+ PropertyKind: Instance
+ Nullability: N
+ - Name: nonnullAClass
+ PropertyKind: Class
+ Nullability: N
+ - Name: nonnullABoth
+ Nullability: N
+ - Name: B
+ Availability: none
+ AvailabilityMsg: "just don't"
+ - Name: C
+ Methods:
+ - Selector: "initWithA:"
+ MethodKind: Instance
+ DesignatedInit: true
+ - Name: OverriddenTypes
+ Methods:
+ - Selector: "methodToMangle:second:"
+ MethodKind: Instance
+ ResultType: 'char *'
+ Parameters:
+ - Position: 0
+ Type: 'SOMEKIT_DOUBLE *'
+ - Position: 1
+ Type: 'float *'
+ Properties:
+ - Name: intPropertyToMangle
+ PropertyKind: Instance
+ Type: 'double *'
+Functions:
+ - Name: global_int_fun
+ ResultType: 'char *'
+ Parameters:
+ - Position: 0
+ Type: 'double *'
+ - Position: 1
+ Type: 'float *'
+Globals:
+ - Name: global_int_ptr
+ Type: 'double *'
+SwiftVersions:
+ - Version: 3.0
+ Classes:
+ - Name: A
+ Methods:
+ - Selector: "transform:integer:"
+ MethodKind: Instance
+ NullabilityOfRet: O
+ Nullability: [ O, S ]
+ Properties:
+ - Name: explicitNonnullInstance
+ PropertyKind: Instance
+ Nullability: O
+ - Name: explicitNullableInstance
+ PropertyKind: Instance
+ Nullability: N
diff --git a/clang/test/APINotes/Inputs/Frameworks/SomeKit.framework/APINotes/SomeKit_private.apinotes b/clang/test/APINotes/Inputs/Frameworks/SomeKit.framework/APINotes/SomeKit_private.apinotes
new file mode 100644
index 000000000000..28ede9dfa25c
--- /dev/null
+++ b/clang/test/APINotes/Inputs/Frameworks/SomeKit.framework/APINotes/SomeKit_private.apinotes
@@ -0,0 +1,15 @@
+Name: SomeKit
+Classes:
+ - Name: A
+ Methods:
+ - Selector: "privateTransform:input:"
+ MethodKind: Instance
+ NullabilityOfRet: N
+ Nullability: [ N, S ]
+ Properties:
+ - Name: internalProperty
+ Nullability: N
+Protocols:
+ - Name: InternalProtocol
+ Availability: none
+ AvailabilityMsg: "not for you"
diff --git a/clang/test/APINotes/Inputs/Frameworks/SomeKit.framework/Headers/SomeKitForNullAnnotation.h b/clang/test/APINotes/Inputs/Frameworks/SomeKit.framework/Headers/SomeKitForNullAnnotation.h
new file mode 100644
index 000000000000..bc0c5da8848e
--- /dev/null
+++ b/clang/test/APINotes/Inputs/Frameworks/SomeKit.framework/Headers/SomeKitForNullAnnotation.h
@@ -0,0 +1,55 @@
+#ifndef SOMEKIT_H
+#define SOMEKIT_H
+
+#define ROOT_CLASS __attribute__((objc_root_class))
+
+ROOT_CLASS
+@interface A
+-(A*)transform:(A*)input;
+-(A*)transform:(A*)input integer:(int)integer;
+
+@property (nonatomic, readonly, retain) A* someA;
+@property (nonatomic, retain) A* someOtherA;
+
+@property (nonatomic) int intValue;
+@end
+
+@interface B : A
+@end
+
+@interface C : A
+- (instancetype)init;
+- (instancetype)initWithA:(A*)a;
+@end
+
+
+@interface MyClass : A
+- Inst;
++ Clas;
+@end
+
+struct CGRect {
+ float origin;
+ float size;
+};
+typedef struct CGRect NSRect;
+
+@interface I
+- (void) Meth : (NSRect[4])exposedRects;
+- (void) Meth1 : (const I*)exposedRects;
+- (void) Meth2 : (const I*)exposedRects;
+- (void) Meth3 : (I*)exposedRects;
+- (const I*) Meth4;
+- (const I*) Meth5 : (int) Arg1 : (const I*)Arg2 : (double)Arg3 : (const I*) Arg4 :(const volatile id) Arg5;
+- (volatile const I*) Meth6 : (const char *)Arg1 : (const char *)Arg2 : (double)Arg3 : (const I*) Arg4 :(const volatile id) Arg5;
+@end
+
+@class NSURL, NSArray, NSError;
+@interface INTF_BLOCKS
+ + (void)getNonLocalVersionsOfItemAtURL:(NSURL *)url completionHandler:(void (^)(NSArray *nonLocalFileVersions, NSError *error))completionHandler;
+ + (void *)getNonLocalVersionsOfItemAtURL2:(NSURL *)url completionHandler:(void (^)(NSArray *nonLocalFileVersions, NSError *error))completionHandler;
+ + (NSError **)getNonLocalVersionsOfItemAtURL3:(int)url completionHandler:(void (^)(NSArray *nonLocalFileVersions, NSError *error))completionHandler;
+ + (id)getNonLocalVersionsOfItemAtURL4:(NSURL *)url completionHandler:(void (^)(int nonLocalFileVersions, NSError *error, NSURL*))completionHandler;
+@end
+
+#endif
diff --git a/clang/test/APINotes/Inputs/Frameworks/SomeKit.framework/Modules/module.modulemap b/clang/test/APINotes/Inputs/Frameworks/SomeKit.framework/Modules/module.modulemap
new file mode 100644
index 000000000000..3abee2df0be1
--- /dev/null
+++ b/clang/test/APINotes/Inputs/Frameworks/SomeKit.framework/Modules/module.modulemap
@@ -0,0 +1,5 @@
+framework module SomeKit {
+ umbrella header "SomeKit.h"
+ export *
+ module * { export * }
+}
diff --git a/clang/test/APINotes/Inputs/Frameworks/SomeKit.framework/Modules/module.private.modulemap b/clang/test/APINotes/Inputs/Frameworks/SomeKit.framework/Modules/module.private.modulemap
new file mode 100644
index 000000000000..bbda9d08e399
--- /dev/null
+++ b/clang/test/APINotes/Inputs/Frameworks/SomeKit.framework/Modules/module.private.modulemap
@@ -0,0 +1,8 @@
+module SomeKit.Private {
+ header "SomeKit_Private.h"
+ export *
+
+ explicit module NullAnnotation {
+ header "SomeKit_PrivateForNullAnnotation.h"
+ }
+}
diff --git a/clang/test/APINotes/Inputs/Frameworks/SomeKit.framework/Modules/module_private.modulemap b/clang/test/APINotes/Inputs/Frameworks/SomeKit.framework/Modules/module_private.modulemap
new file mode 100644
index 000000000000..e31034317cb8
--- /dev/null
+++ b/clang/test/APINotes/Inputs/Frameworks/SomeKit.framework/Modules/module_private.modulemap
@@ -0,0 +1,8 @@
+explicit framework module SomeKit.Private {
+ header "SomeKit_Private.h"
+ explicit NullAnnotation { header "SomeKit_PrivateForNullAnnotation.h" }
+ export *
+ module * { export * }
+syntax error
+
+}
diff --git a/clang/test/APINotes/Inputs/Frameworks/SomeKit.framework/PrivateHeaders/SomeKit_Private.h b/clang/test/APINotes/Inputs/Frameworks/SomeKit.framework/PrivateHeaders/SomeKit_Private.h
new file mode 100644
index 000000000000..c7611123e4ad
--- /dev/null
+++ b/clang/test/APINotes/Inputs/Frameworks/SomeKit.framework/PrivateHeaders/SomeKit_Private.h
@@ -0,0 +1,16 @@
+#ifndef SOMEKIT_PRIVATE_H
+#define SOMEKIT_PRIVATE_H
+
+#import <SomeKit/SomeKit.h>
+
+@interface A(Private)
+-(A*)privateTransform:(A*)input;
+
+@property (nonatomic) A* internalProperty;
+@end
+
+@protocol InternalProtocol
+@end
+
+#endif
+
diff --git a/clang/test/APINotes/Inputs/Frameworks/SomeKit.framework/PrivateHeaders/SomeKit_PrivateForNullAnnotation.h b/clang/test/APINotes/Inputs/Frameworks/SomeKit.framework/PrivateHeaders/SomeKit_PrivateForNullAnnotation.h
new file mode 100644
index 000000000000..bae4456b4080
--- /dev/null
+++ b/clang/test/APINotes/Inputs/Frameworks/SomeKit.framework/PrivateHeaders/SomeKit_PrivateForNullAnnotation.h
@@ -0,0 +1,17 @@
+#ifndef SOMEKIT_PRIVATE_H
+#define SOMEKIT_PRIVATE_H
+
+#import <SomeKit/SomeKitForNullAnnotation.h>
+
+@interface A(Private)
+-(A*)privateTransform:(A*)input;
+
+@property (nonatomic) A* internalProperty;
+@end
+
+@protocol InternalProtocol
+- (id) MomeMethod;
+@end
+
+#endif
+
diff --git a/clang/test/APINotes/Inputs/Frameworks/SomeKit.framework/PrivateHeaders/SomeKit_private.apinotes b/clang/test/APINotes/Inputs/Frameworks/SomeKit.framework/PrivateHeaders/SomeKit_private.apinotes
new file mode 100644
index 000000000000..28ede9dfa25c
--- /dev/null
+++ b/clang/test/APINotes/Inputs/Frameworks/SomeKit.framework/PrivateHeaders/SomeKit_private.apinotes
@@ -0,0 +1,15 @@
+Name: SomeKit
+Classes:
+ - Name: A
+ Methods:
+ - Selector: "privateTransform:input:"
+ MethodKind: Instance
+ NullabilityOfRet: N
+ Nullability: [ N, S ]
+ Properties:
+ - Name: internalProperty
+ Nullability: N
+Protocols:
+ - Name: InternalProtocol
+ Availability: none
+ AvailabilityMsg: "not for you"
diff --git a/clang/test/APINotes/Inputs/Frameworks/SomeOtherKit.framework/APINotes/SomeOtherKit.apinotes b/clang/test/APINotes/Inputs/Frameworks/SomeOtherKit.framework/APINotes/SomeOtherKit.apinotes
new file mode 100644
index 000000000000..2ad546b8f8bc
--- /dev/null
+++ b/clang/test/APINotes/Inputs/Frameworks/SomeOtherKit.framework/APINotes/SomeOtherKit.apinotes
@@ -0,0 +1,8 @@
+Name: SomeOtherKit
+Classes:
+ - Name: A
+ Methods:
+ - Selector: "methodA"
+ MethodKind: Instance
+ Availability: none
+ AvailabilityMsg: "anything but this"
diff --git a/clang/test/APINotes/Inputs/Frameworks/SomeOtherKit.framework/Headers/SomeOtherKit.apinotes b/clang/test/APINotes/Inputs/Frameworks/SomeOtherKit.framework/Headers/SomeOtherKit.apinotes
new file mode 100644
index 000000000000..2ad546b8f8bc
--- /dev/null
+++ b/clang/test/APINotes/Inputs/Frameworks/SomeOtherKit.framework/Headers/SomeOtherKit.apinotes
@@ -0,0 +1,8 @@
+Name: SomeOtherKit
+Classes:
+ - Name: A
+ Methods:
+ - Selector: "methodA"
+ MethodKind: Instance
+ Availability: none
+ AvailabilityMsg: "anything but this"
diff --git a/clang/test/APINotes/Inputs/Frameworks/SomeOtherKit.framework/Headers/SomeOtherKit.h b/clang/test/APINotes/Inputs/Frameworks/SomeOtherKit.framework/Headers/SomeOtherKit.h
new file mode 100644
index 000000000000..3911d765230c
--- /dev/null
+++ b/clang/test/APINotes/Inputs/Frameworks/SomeOtherKit.framework/Headers/SomeOtherKit.h
@@ -0,0 +1,9 @@
+#ifndef SOME_OTHER_KIT_H
+
+__attribute__((objc_root_class))
+@interface A
+-(void)methodA;
+-(void)methodB;
+@end
+
+#endif
diff --git a/clang/test/APINotes/Inputs/Frameworks/SomeOtherKit.framework/Modules/module.modulemap b/clang/test/APINotes/Inputs/Frameworks/SomeOtherKit.framework/Modules/module.modulemap
new file mode 100644
index 000000000000..0aaad92e041c
--- /dev/null
+++ b/clang/test/APINotes/Inputs/Frameworks/SomeOtherKit.framework/Modules/module.modulemap
@@ -0,0 +1,5 @@
+framework module SomeOtherKit {
+ umbrella header "SomeOtherKit.h"
+ export *
+ module * { export * }
+}
diff --git a/clang/test/APINotes/Inputs/Frameworks/TopLevelPrivateKit.framework/Headers/TopLevelPrivateKit.h b/clang/test/APINotes/Inputs/Frameworks/TopLevelPrivateKit.framework/Headers/TopLevelPrivateKit.h
new file mode 100644
index 000000000000..d3376f1dac5d
--- /dev/null
+++ b/clang/test/APINotes/Inputs/Frameworks/TopLevelPrivateKit.framework/Headers/TopLevelPrivateKit.h
@@ -0,0 +1 @@
+extern int TopLevelPrivateKit_Public;
diff --git a/clang/test/APINotes/Inputs/Frameworks/TopLevelPrivateKit.framework/Headers/TopLevelPrivateKit_Private.apinotes b/clang/test/APINotes/Inputs/Frameworks/TopLevelPrivateKit.framework/Headers/TopLevelPrivateKit_Private.apinotes
new file mode 100644
index 000000000000..ece1dd220adf
--- /dev/null
+++ b/clang/test/APINotes/Inputs/Frameworks/TopLevelPrivateKit.framework/Headers/TopLevelPrivateKit_Private.apinotes
@@ -0,0 +1 @@
+garbage here because this file shouldn't get read \ No newline at end of file
diff --git a/clang/test/APINotes/Inputs/Frameworks/TopLevelPrivateKit.framework/Modules/module.modulemap b/clang/test/APINotes/Inputs/Frameworks/TopLevelPrivateKit.framework/Modules/module.modulemap
new file mode 100644
index 000000000000..70faa54e8347
--- /dev/null
+++ b/clang/test/APINotes/Inputs/Frameworks/TopLevelPrivateKit.framework/Modules/module.modulemap
@@ -0,0 +1,5 @@
+framework module TopLevelPrivateKit {
+ umbrella header "TopLevelPrivateKit.h"
+ export *
+ module * { export * }
+}
diff --git a/clang/test/APINotes/Inputs/Frameworks/TopLevelPrivateKit.framework/Modules/module.private.modulemap b/clang/test/APINotes/Inputs/Frameworks/TopLevelPrivateKit.framework/Modules/module.private.modulemap
new file mode 100644
index 000000000000..0958a14d6710
--- /dev/null
+++ b/clang/test/APINotes/Inputs/Frameworks/TopLevelPrivateKit.framework/Modules/module.private.modulemap
@@ -0,0 +1,5 @@
+framework module TopLevelPrivateKit_Private {
+ umbrella header "TopLevelPrivateKit_Private.h"
+ export *
+ module * { export * }
+}
diff --git a/clang/test/APINotes/Inputs/Frameworks/TopLevelPrivateKit.framework/PrivateHeaders/TopLevelPrivateKit.apinotes b/clang/test/APINotes/Inputs/Frameworks/TopLevelPrivateKit.framework/PrivateHeaders/TopLevelPrivateKit.apinotes
new file mode 100644
index 000000000000..908dae0e3b0b
--- /dev/null
+++ b/clang/test/APINotes/Inputs/Frameworks/TopLevelPrivateKit.framework/PrivateHeaders/TopLevelPrivateKit.apinotes
@@ -0,0 +1 @@
+garbage here because this file shouldn't get read
diff --git a/clang/test/APINotes/Inputs/Frameworks/TopLevelPrivateKit.framework/PrivateHeaders/TopLevelPrivateKit_Private.apinotes b/clang/test/APINotes/Inputs/Frameworks/TopLevelPrivateKit.framework/PrivateHeaders/TopLevelPrivateKit_Private.apinotes
new file mode 100644
index 000000000000..43323621588b
--- /dev/null
+++ b/clang/test/APINotes/Inputs/Frameworks/TopLevelPrivateKit.framework/PrivateHeaders/TopLevelPrivateKit_Private.apinotes
@@ -0,0 +1,4 @@
+Name: TopLevelPrivateKit_Private
+Globals:
+- Name: TopLevelPrivateKit_Private
+ Type: float
diff --git a/clang/test/APINotes/Inputs/Frameworks/TopLevelPrivateKit.framework/PrivateHeaders/TopLevelPrivateKit_Private.h b/clang/test/APINotes/Inputs/Frameworks/TopLevelPrivateKit.framework/PrivateHeaders/TopLevelPrivateKit_Private.h
new file mode 100644
index 000000000000..39cbfe6e9918
--- /dev/null
+++ b/clang/test/APINotes/Inputs/Frameworks/TopLevelPrivateKit.framework/PrivateHeaders/TopLevelPrivateKit_Private.h
@@ -0,0 +1 @@
+extern int TopLevelPrivateKit_Private;
diff --git a/clang/test/APINotes/Inputs/Frameworks/TopLevelPrivateKit.framework/PrivateHeaders/TopLevelPrivateKit_Private_private.apinotes b/clang/test/APINotes/Inputs/Frameworks/TopLevelPrivateKit.framework/PrivateHeaders/TopLevelPrivateKit_Private_private.apinotes
new file mode 100644
index 000000000000..ece1dd220adf
--- /dev/null
+++ b/clang/test/APINotes/Inputs/Frameworks/TopLevelPrivateKit.framework/PrivateHeaders/TopLevelPrivateKit_Private_private.apinotes
@@ -0,0 +1 @@
+garbage here because this file shouldn't get read \ No newline at end of file
diff --git a/clang/test/APINotes/Inputs/Frameworks/VersionedKit.framework/Headers/VersionedKit.apinotes b/clang/test/APINotes/Inputs/Frameworks/VersionedKit.framework/Headers/VersionedKit.apinotes
new file mode 100644
index 000000000000..572c714b3d61
--- /dev/null
+++ b/clang/test/APINotes/Inputs/Frameworks/VersionedKit.framework/Headers/VersionedKit.apinotes
@@ -0,0 +1,156 @@
+Name: VersionedKit
+Classes:
+ - Name: TestProperties
+ SwiftObjCMembers: true
+ Properties:
+ - Name: accessorsOnly
+ PropertyKind: Instance
+ SwiftImportAsAccessors: true
+ - Name: accessorsOnlyForClass
+ PropertyKind: Class
+ SwiftImportAsAccessors: true
+ - Name: accessorsOnlyExceptInVersion3
+ PropertyKind: Instance
+ SwiftImportAsAccessors: true
+ - Name: accessorsOnlyForClassExceptInVersion3
+ PropertyKind: Class
+ SwiftImportAsAccessors: true
+Functions:
+ - Name: unversionedRenameDUMP
+ SwiftName: 'unversionedRename_NOTES()'
+Tags:
+ - Name: APINotedFlagEnum
+ FlagEnum: true
+ - Name: APINotedOpenEnum
+ EnumExtensibility: open
+ - Name: APINotedClosedEnum
+ EnumExtensibility: closed
+ - Name: SoonToBeCFEnum
+ EnumKind: CFEnum
+ - Name: SoonToBeNSEnum
+ EnumKind: NSEnum
+ - Name: SoonToBeCFOptions
+ EnumKind: CFOptions
+ - Name: SoonToBeNSOptions
+ EnumKind: NSOptions
+ - Name: SoonToBeCFClosedEnum
+ EnumKind: CFClosedEnum
+ - Name: SoonToBeNSClosedEnum
+ EnumKind: NSClosedEnum
+ - Name: UndoAllThatHasBeenDoneToMe
+ EnumKind: none
+Typedefs:
+ - Name: MultiVersionedTypedef34Notes
+ SwiftName: MultiVersionedTypedef34Notes_NEW
+ - Name: MultiVersionedTypedef345Notes
+ SwiftName: MultiVersionedTypedef345Notes_NEW
+ - Name: MultiVersionedTypedef4Notes
+ SwiftName: MultiVersionedTypedef4Notes_NEW
+ - Name: MultiVersionedTypedef45Notes
+ SwiftName: MultiVersionedTypedef45Notes_NEW
+SwiftVersions:
+ - Version: 3.0
+ Classes:
+ - Name: MyReferenceType
+ SwiftBridge: ''
+ - Name: TestGenericDUMP
+ SwiftImportAsNonGeneric: true
+ - Name: TestProperties
+ SwiftObjCMembers: false
+ Properties:
+ - Name: accessorsOnlyInVersion3
+ PropertyKind: Instance
+ SwiftImportAsAccessors: true
+ - Name: accessorsOnlyForClassInVersion3
+ PropertyKind: Class
+ SwiftImportAsAccessors: true
+ - Name: accessorsOnlyExceptInVersion3
+ PropertyKind: Instance
+ SwiftImportAsAccessors: false
+ - Name: accessorsOnlyForClassExceptInVersion3
+ PropertyKind: Class
+ SwiftImportAsAccessors: false
+ - Name: Swift3RenamedOnlyDUMP
+ SwiftName: SpecialSwift3Name
+ - Name: Swift3RenamedAlsoDUMP
+ SwiftName: SpecialSwift3Also
+ Functions:
+ - Name: moveToPointDUMP
+ SwiftName: 'moveTo(a:b:)'
+ - Name: acceptClosure
+ Parameters:
+ - Position: 0
+ NoEscape: false
+ - Name: privateFunc
+ SwiftPrivate: false
+ Tags:
+ - Name: MyErrorCode
+ NSErrorDomain: ''
+ - Name: NewlyFlagEnum
+ FlagEnum: false
+ - Name: OpenToClosedEnum
+ EnumExtensibility: open
+ - Name: ClosedToOpenEnum
+ EnumExtensibility: closed
+ - Name: NewlyClosedEnum
+ EnumExtensibility: none
+ - Name: NewlyOpenEnum
+ EnumExtensibility: none
+ Typedefs:
+ - Name: MyDoubleWrapper
+ SwiftWrapper: none
+ - Name: MultiVersionedTypedef34
+ SwiftName: MultiVersionedTypedef34_3
+ - Name: MultiVersionedTypedef34Header
+ SwiftName: MultiVersionedTypedef34Header_3
+ - Name: MultiVersionedTypedef34Notes
+ SwiftName: MultiVersionedTypedef34Notes_3
+ - Name: MultiVersionedTypedef345
+ SwiftName: MultiVersionedTypedef345_3
+ - Name: MultiVersionedTypedef345Header
+ SwiftName: MultiVersionedTypedef345Header_3
+ - Name: MultiVersionedTypedef345Notes
+ SwiftName: MultiVersionedTypedef345Notes_3
+ - Version: 5
+ Typedefs:
+ - Name: MultiVersionedTypedef345
+ SwiftName: MultiVersionedTypedef345_5
+ - Name: MultiVersionedTypedef345Header
+ SwiftName: MultiVersionedTypedef345Header_5
+ - Name: MultiVersionedTypedef345Notes
+ SwiftName: MultiVersionedTypedef345Notes_5
+ - Name: MultiVersionedTypedef45
+ SwiftName: MultiVersionedTypedef45_5
+ - Name: MultiVersionedTypedef45Header
+ SwiftName: MultiVersionedTypedef45Header_5
+ - Name: MultiVersionedTypedef45Notes
+ SwiftName: MultiVersionedTypedef45Notes_5
+ - Version: 4 # Versions are deliberately ordered as "3, 5, 4" to catch bugs.
+ Classes:
+ - Name: Swift4RenamedDUMP
+ SwiftName: SpecialSwift4Name
+ Typedefs:
+ - Name: MultiVersionedTypedef34
+ SwiftName: MultiVersionedTypedef34_4
+ - Name: MultiVersionedTypedef34Header
+ SwiftName: MultiVersionedTypedef34Header_4
+ - Name: MultiVersionedTypedef34Notes
+ SwiftName: MultiVersionedTypedef34Notes_4
+ - Name: MultiVersionedTypedef345
+ SwiftName: MultiVersionedTypedef345_4
+ - Name: MultiVersionedTypedef345Header
+ SwiftName: MultiVersionedTypedef345Header_4
+ - Name: MultiVersionedTypedef345Notes
+ SwiftName: MultiVersionedTypedef345Notes_4
+ - Name: MultiVersionedTypedef4
+ SwiftName: MultiVersionedTypedef4_4
+ - Name: MultiVersionedTypedef4Header
+ SwiftName: MultiVersionedTypedef4Header_4
+ - Name: MultiVersionedTypedef4Notes
+ SwiftName: MultiVersionedTypedef4Notes_4
+ - Name: MultiVersionedTypedef45
+ SwiftName: MultiVersionedTypedef45_4
+ - Name: MultiVersionedTypedef45Header
+ SwiftName: MultiVersionedTypedef45Header_4
+ - Name: MultiVersionedTypedef45Notes
+ SwiftName: MultiVersionedTypedef45Notes_4
diff --git a/clang/test/APINotes/Inputs/Frameworks/VersionedKit.framework/Headers/VersionedKit.h b/clang/test/APINotes/Inputs/Frameworks/VersionedKit.framework/Headers/VersionedKit.h
new file mode 100644
index 000000000000..9ce95633c523
--- /dev/null
+++ b/clang/test/APINotes/Inputs/Frameworks/VersionedKit.framework/Headers/VersionedKit.h
@@ -0,0 +1,137 @@
+void moveToPointDUMP(double x, double y) __attribute__((swift_name("moveTo(x:y:)")));
+
+void unversionedRenameDUMP(void) __attribute__((swift_name("unversionedRename_HEADER()")));
+
+void acceptClosure(void (^ __attribute__((noescape)) block)(void));
+
+void privateFunc(void) __attribute__((swift_private));
+
+typedef double MyDoubleWrapper __attribute__((swift_wrapper(struct)));
+
+#if __OBJC__
+@class NSString;
+
+extern NSString *MyErrorDomain;
+
+enum __attribute__((ns_error_domain(MyErrorDomain))) MyErrorCode {
+ MyErrorCodeFailed = 1
+};
+
+__attribute__((swift_bridge("MyValueType")))
+@interface MyReferenceType
+@end
+
+@interface TestProperties
+@property (nonatomic, readwrite, retain) id accessorsOnly;
+@property (nonatomic, readwrite, retain, class) id accessorsOnlyForClass;
+
+@property (nonatomic, readwrite, retain) id accessorsOnlyInVersion3;
+@property (nonatomic, readwrite, retain, class) id accessorsOnlyForClassInVersion3;
+
+@property (nonatomic, readwrite, retain) id accessorsOnlyExceptInVersion3;
+@property (nonatomic, readwrite, retain, class) id accessorsOnlyForClassExceptInVersion3;
+@end
+
+@interface Base
+@end
+
+@interface TestGenericDUMP<Element> : Base
+- (Element)element;
+@end
+
+@interface Swift3RenamedOnlyDUMP
+@end
+
+__attribute__((swift_name("Swift4Name")))
+@interface Swift3RenamedAlsoDUMP
+@end
+
+@interface Swift4RenamedDUMP
+@end
+
+#endif
+
+
+enum __attribute__((flag_enum)) FlagEnum {
+ FlagEnumA = 1,
+ FlagEnumB = 2
+};
+
+enum __attribute__((flag_enum)) NewlyFlagEnum {
+ NewlyFlagEnumA = 1,
+ NewlyFlagEnumB = 2
+};
+
+enum APINotedFlagEnum {
+ APINotedFlagEnumA = 1,
+ APINotedFlagEnumB = 2
+};
+
+
+enum __attribute__((enum_extensibility(open))) OpenEnum {
+ OpenEnumA = 1,
+};
+
+enum __attribute__((enum_extensibility(open))) NewlyOpenEnum {
+ NewlyOpenEnumA = 1,
+};
+
+enum __attribute__((enum_extensibility(closed))) NewlyClosedEnum {
+ NewlyClosedEnumA = 1,
+};
+
+enum __attribute__((enum_extensibility(open))) ClosedToOpenEnum {
+ ClosedToOpenEnumA = 1,
+};
+
+enum __attribute__((enum_extensibility(closed))) OpenToClosedEnum {
+ OpenToClosedEnumA = 1,
+};
+
+enum APINotedOpenEnum {
+ APINotedOpenEnumA = 1,
+};
+
+enum APINotedClosedEnum {
+ APINotedClosedEnumA = 1,
+};
+
+
+enum SoonToBeCFEnum {
+ SoonToBeCFEnumA = 1
+};
+enum SoonToBeNSEnum {
+ SoonToBeNSEnumA = 1
+};
+enum SoonToBeCFOptions {
+ SoonToBeCFOptionsA = 1
+};
+enum SoonToBeNSOptions {
+ SoonToBeNSOptionsA = 1
+};
+enum SoonToBeCFClosedEnum {
+ SoonToBeCFClosedEnumA = 1
+};
+enum SoonToBeNSClosedEnum {
+ SoonToBeNSClosedEnumA = 1
+};
+enum UndoAllThatHasBeenDoneToMe {
+ UndoAllThatHasBeenDoneToMeA = 1
+} __attribute__((flag_enum)) __attribute__((enum_extensibility(closed)));
+
+
+typedef int MultiVersionedTypedef4;
+typedef int MultiVersionedTypedef4Notes;
+typedef int MultiVersionedTypedef4Header __attribute__((swift_name("MultiVersionedTypedef4Header_NEW")));
+
+typedef int MultiVersionedTypedef34;
+typedef int MultiVersionedTypedef34Notes;
+typedef int MultiVersionedTypedef34Header __attribute__((swift_name("MultiVersionedTypedef34Header_NEW")));
+
+typedef int MultiVersionedTypedef45;
+typedef int MultiVersionedTypedef45Notes;
+typedef int MultiVersionedTypedef45Header __attribute__((swift_name("MultiVersionedTypedef45Header_NEW")));
+
+typedef int MultiVersionedTypedef345;
+typedef int MultiVersionedTypedef345Notes;
+typedef int MultiVersionedTypedef345Header __attribute__((swift_name("MultiVersionedTypedef345Header_NEW")));
diff --git a/clang/test/APINotes/Inputs/Frameworks/VersionedKit.framework/Modules/module.modulemap b/clang/test/APINotes/Inputs/Frameworks/VersionedKit.framework/Modules/module.modulemap
new file mode 100644
index 000000000000..6d957fd68009
--- /dev/null
+++ b/clang/test/APINotes/Inputs/Frameworks/VersionedKit.framework/Modules/module.modulemap
@@ -0,0 +1,5 @@
+framework module VersionedKit {
+ umbrella header "VersionedKit.h"
+ export *
+ module * { export * }
+}
diff --git a/clang/test/APINotes/Inputs/Headers/APINotes.apinotes b/clang/test/APINotes/Inputs/Headers/APINotes.apinotes
new file mode 100644
index 000000000000..08210fc70565
--- /dev/null
+++ b/clang/test/APINotes/Inputs/Headers/APINotes.apinotes
@@ -0,0 +1,18 @@
+Name: HeaderLib
+SwiftInferImportAsMember: true
+Functions:
+ - Name: custom_realloc
+ NullabilityOfRet: N
+ Nullability: [ N, S ]
+ - Name: unavailable_function
+ Availability: none
+ AvailabilityMsg: "I beg you not to use this"
+ - Name: do_something_with_pointers
+ NullabilityOfRet: O
+ Nullability: [ N, O ]
+
+Globals:
+ - Name: global_int
+ Nullability: N
+ - Name: unavailable_global_int
+ Availability: none
diff --git a/clang/test/APINotes/Inputs/Headers/BrokenTypes.apinotes b/clang/test/APINotes/Inputs/Headers/BrokenTypes.apinotes
new file mode 100644
index 000000000000..00f7b5074e98
--- /dev/null
+++ b/clang/test/APINotes/Inputs/Headers/BrokenTypes.apinotes
@@ -0,0 +1,10 @@
+Name: BrokenTypes
+Functions:
+ - Name: break_me_function
+ ResultType: 'int * with extra junk'
+ Parameters:
+ - Position: 0
+ Type: 'not_a_type'
+Globals:
+ - Name: break_me_variable
+ Type: 'double'
diff --git a/clang/test/APINotes/Inputs/Headers/BrokenTypes.h b/clang/test/APINotes/Inputs/Headers/BrokenTypes.h
new file mode 100644
index 000000000000..fee054b74cf7
--- /dev/null
+++ b/clang/test/APINotes/Inputs/Headers/BrokenTypes.h
@@ -0,0 +1,8 @@
+#ifndef BROKEN_TYPES_H
+#define BROKEN_TYPES_H
+
+char break_me_function(void *ptr);
+
+extern char break_me_variable;
+
+#endif // BROKEN_TYPES_H
diff --git a/clang/test/APINotes/Inputs/Headers/ExternCtx.apinotes b/clang/test/APINotes/Inputs/Headers/ExternCtx.apinotes
new file mode 100644
index 000000000000..0f47ac6deea8
--- /dev/null
+++ b/clang/test/APINotes/Inputs/Headers/ExternCtx.apinotes
@@ -0,0 +1,15 @@
+Name: ExternCtx
+Globals:
+ - Name: globalInExternC
+ Availability: none
+ AvailabilityMsg: "oh no"
+ - Name: globalInExternCXX
+ Availability: none
+ AvailabilityMsg: "oh no #2"
+Functions:
+ - Name: globalFuncInExternC
+ Availability: none
+ AvailabilityMsg: "oh no #3"
+ - Name: globalFuncInExternCXX
+ Availability: none
+ AvailabilityMsg: "oh no #4"
diff --git a/clang/test/APINotes/Inputs/Headers/ExternCtx.h b/clang/test/APINotes/Inputs/Headers/ExternCtx.h
new file mode 100644
index 000000000000..669d443f60ec
--- /dev/null
+++ b/clang/test/APINotes/Inputs/Headers/ExternCtx.h
@@ -0,0 +1,11 @@
+extern "C" {
+ static int globalInExternC = 1;
+
+ static void globalFuncInExternC() {}
+}
+
+extern "C++" {
+ static int globalInExternCXX = 2;
+
+ static void globalFuncInExternCXX() {}
+}
diff --git a/clang/test/APINotes/Inputs/Headers/HeaderLib.apinotes b/clang/test/APINotes/Inputs/Headers/HeaderLib.apinotes
new file mode 100644
index 000000000000..7dcb22476a1d
--- /dev/null
+++ b/clang/test/APINotes/Inputs/Headers/HeaderLib.apinotes
@@ -0,0 +1,37 @@
+Name: HeaderLib
+SwiftInferImportAsMember: true
+Functions:
+ - Name: custom_realloc
+ NullabilityOfRet: N
+ Nullability: [ N, S ]
+ - Name: unavailable_function
+ Availability: none
+ AvailabilityMsg: "I beg you not to use this"
+ - Name: do_something_with_pointers
+ NullabilityOfRet: O
+ Nullability: [ N, O ]
+ - Name: do_something_with_arrays
+ Parameters:
+ - Position: 0
+ Nullability: N
+ - Position: 1
+ Nullability: N
+ - Name: take_pointer_and_int
+ Parameters:
+ - Position: 0
+ Nullability: N
+ NoEscape: true
+ - Position: 1
+ NoEscape: true
+Globals:
+ - Name: global_int
+ Nullability: N
+ - Name: unavailable_global_int
+ Availability: none
+Tags:
+ - Name: unavailable_struct
+ Availability: none
+
+Typedefs:
+ - Name: unavailable_typedef
+ Availability: none
diff --git a/clang/test/APINotes/Inputs/Headers/HeaderLib.h b/clang/test/APINotes/Inputs/Headers/HeaderLib.h
new file mode 100644
index 000000000000..806524960785
--- /dev/null
+++ b/clang/test/APINotes/Inputs/Headers/HeaderLib.h
@@ -0,0 +1,19 @@
+#ifndef HEADER_LIB_H
+#define HEADER_LIB_H
+
+void *custom_realloc(void *member, unsigned size);
+
+int *global_int;
+
+int unavailable_function(void);
+int unavailable_global_int;
+
+void do_something_with_pointers(int *ptr1, int *ptr2);
+void do_something_with_arrays(int simple[], int nested[][2]);
+
+typedef int unavailable_typedef;
+struct unavailable_struct { int x, y, z; };
+
+void take_pointer_and_int(int *ptr1, int value);
+
+#endif
diff --git a/clang/test/APINotes/Inputs/Headers/InstancetypeModule.apinotes b/clang/test/APINotes/Inputs/Headers/InstancetypeModule.apinotes
new file mode 100644
index 000000000000..813eb506f39a
--- /dev/null
+++ b/clang/test/APINotes/Inputs/Headers/InstancetypeModule.apinotes
@@ -0,0 +1,10 @@
+Name: InstancetypeModule
+Classes:
+- Name: SomeBaseClass
+ Methods:
+ - Selector: instancetypeFactoryMethod
+ MethodKind: Class
+ ResultType: SomeBaseClass * _Nonnull
+ - Selector: staticFactoryMethod
+ MethodKind: Class
+ ResultType: SomeBaseClass * _Nonnull
diff --git a/clang/test/APINotes/Inputs/Headers/InstancetypeModule.h b/clang/test/APINotes/Inputs/Headers/InstancetypeModule.h
new file mode 100644
index 000000000000..767f201d9faf
--- /dev/null
+++ b/clang/test/APINotes/Inputs/Headers/InstancetypeModule.h
@@ -0,0 +1,10 @@
+@interface Object
+@end
+
+@interface SomeBaseClass : Object
++ (nullable instancetype)instancetypeFactoryMethod;
++ (nullable SomeBaseClass *)staticFactoryMethod;
+@end
+
+@interface SomeSubclass : SomeBaseClass
+@end
diff --git a/clang/test/APINotes/Inputs/Headers/ModuleWithWrongCase.h b/clang/test/APINotes/Inputs/Headers/ModuleWithWrongCase.h
new file mode 100644
index 000000000000..867a15cae9a6
--- /dev/null
+++ b/clang/test/APINotes/Inputs/Headers/ModuleWithWrongCase.h
@@ -0,0 +1 @@
+extern int ModuleWithWrongCase;
diff --git a/clang/test/APINotes/Inputs/Headers/ModuleWithWrongCasePrivate.h b/clang/test/APINotes/Inputs/Headers/ModuleWithWrongCasePrivate.h
new file mode 100644
index 000000000000..aa014296ca7d
--- /dev/null
+++ b/clang/test/APINotes/Inputs/Headers/ModuleWithWrongCasePrivate.h
@@ -0,0 +1 @@
+extern int ModuleWithWrongCasePrivate;
diff --git a/clang/test/APINotes/Inputs/Headers/ModuleWithWrongCasePrivate_Private.apinotes b/clang/test/APINotes/Inputs/Headers/ModuleWithWrongCasePrivate_Private.apinotes
new file mode 100644
index 000000000000..dc6dc50bab6e
--- /dev/null
+++ b/clang/test/APINotes/Inputs/Headers/ModuleWithWrongCasePrivate_Private.apinotes
@@ -0,0 +1 @@
+Name: ModuleWithWrongCasePrivate
diff --git a/clang/test/APINotes/Inputs/Headers/ModuleWithWrongCase_Private.apinotes b/clang/test/APINotes/Inputs/Headers/ModuleWithWrongCase_Private.apinotes
new file mode 100644
index 000000000000..dc6dc50bab6e
--- /dev/null
+++ b/clang/test/APINotes/Inputs/Headers/ModuleWithWrongCase_Private.apinotes
@@ -0,0 +1 @@
+Name: ModuleWithWrongCasePrivate
diff --git a/clang/test/APINotes/Inputs/Headers/Namespaces.apinotes b/clang/test/APINotes/Inputs/Headers/Namespaces.apinotes
new file mode 100644
index 000000000000..e9da36787b63
--- /dev/null
+++ b/clang/test/APINotes/Inputs/Headers/Namespaces.apinotes
@@ -0,0 +1,53 @@
+---
+Name: Namespaces
+Globals:
+ - Name: varInInlineNamespace
+ SwiftName: swiftVarInInlineNamespace
+Functions:
+ - Name: funcInNamespace
+ SwiftName: inWrongContext()
+ - Name: funcInInlineNamespace
+ SwiftName: swiftFuncInInlineNamespace()
+Tags:
+ - Name: char_box
+ SwiftName: InWrongContext
+Namespaces:
+ - Name: Namespace1
+ Typedefs:
+ - Name: my_typedef
+ SwiftName: SwiftTypedef
+ - Name: my_using_decl
+ SwiftName: SwiftUsingDecl
+ Globals:
+ - Name: varInNamespace
+ SwiftName: swiftVarInNamespace
+ Functions:
+ - Name: funcInNamespace
+ SwiftName: swiftFuncInNamespace()
+ Tags:
+ - Name: char_box
+ SwiftName: CharBox
+ Namespaces:
+ - Name: Nested1
+ Globals:
+ - Name: varInNestedNamespace
+ SwiftName: swiftVarInNestedNamespace
+ Functions:
+ - Name: funcInNestedNamespace
+ SwiftName: swiftFuncInNestedNamespace(_:)
+ Tags:
+ - Name: char_box
+ SwiftName: NestedCharBox
+ Namespaces:
+ - Name: Namespace1
+ Tags:
+ - Name: char_box
+ SwiftName: DeepNestedCharBox
+ - Name: Nested2
+ Globals:
+ - Name: varInNestedNamespace
+ SwiftName: swiftAnotherVarInNestedNamespace
+ - Name: InlineNamespace1
+ Functions:
+ - Name: funcInInlineNamespace
+ SwiftName: shouldNotSpellOutInlineNamespaces()
diff --git a/clang/test/APINotes/Inputs/Headers/Namespaces.h b/clang/test/APINotes/Inputs/Headers/Namespaces.h
new file mode 100644
index 000000000000..6a79e996be86
--- /dev/null
+++ b/clang/test/APINotes/Inputs/Headers/Namespaces.h
@@ -0,0 +1,39 @@
+namespace Namespace1 { namespace Nested1 {} }
+
+namespace Namespace1 {
+static int varInNamespace = 1;
+struct char_box { char c; };
+void funcInNamespace();
+
+namespace Nested1 {
+void funcInNestedNamespace(int i);
+struct char_box {
+ char c;
+};
+}
+
+namespace Nested1 {
+static int varInNestedNamespace = 1;
+void funcInNestedNamespace(int i);
+
+namespace Namespace1 {
+struct char_box { char c; };
+} // namespace Namespace1
+} // namespace Nested1
+
+namespace Nested2 {
+static int varInNestedNamespace = 2;
+} // namespace Nested2
+
+namespace Nested1 { namespace Namespace1 {} }
+} // namespace Namespace1
+
+namespace Namespace1 {
+typedef int my_typedef;
+using my_using_decl = int;
+}
+
+inline namespace InlineNamespace1 {
+static int varInInlineNamespace = 3;
+void funcInInlineNamespace();
+}
diff --git a/clang/test/APINotes/Inputs/Headers/PrivateLib.apinotes b/clang/test/APINotes/Inputs/Headers/PrivateLib.apinotes
new file mode 100644
index 000000000000..5f62284aadca
--- /dev/null
+++ b/clang/test/APINotes/Inputs/Headers/PrivateLib.apinotes
@@ -0,0 +1,4 @@
+Name: HeaderLib
+Globals:
+- Name: PrivateLib
+ Type: float
diff --git a/clang/test/APINotes/Inputs/Headers/PrivateLib.h b/clang/test/APINotes/Inputs/Headers/PrivateLib.h
new file mode 100644
index 000000000000..59aeef09bdd3
--- /dev/null
+++ b/clang/test/APINotes/Inputs/Headers/PrivateLib.h
@@ -0,0 +1 @@
+extern int PrivateLib;
diff --git a/clang/test/APINotes/Inputs/Headers/PrivateLib_private.apinotes b/clang/test/APINotes/Inputs/Headers/PrivateLib_private.apinotes
new file mode 100644
index 000000000000..908dae0e3b0b
--- /dev/null
+++ b/clang/test/APINotes/Inputs/Headers/PrivateLib_private.apinotes
@@ -0,0 +1 @@
+garbage here because this file shouldn't get read
diff --git a/clang/test/APINotes/Inputs/Headers/SwiftImportAs.apinotes b/clang/test/APINotes/Inputs/Headers/SwiftImportAs.apinotes
new file mode 100644
index 000000000000..5dbb83cab86b
--- /dev/null
+++ b/clang/test/APINotes/Inputs/Headers/SwiftImportAs.apinotes
@@ -0,0 +1,9 @@
+---
+Name: SwiftImportAs
+Tags:
+- Name: ImmortalRefType
+ SwiftImportAs: reference
+- Name: RefCountedType
+ SwiftImportAs: reference
+ SwiftReleaseOp: RCRelease
+ SwiftRetainOp: RCRetain
diff --git a/clang/test/APINotes/Inputs/Headers/SwiftImportAs.h b/clang/test/APINotes/Inputs/Headers/SwiftImportAs.h
new file mode 100644
index 000000000000..82b8a6749c4f
--- /dev/null
+++ b/clang/test/APINotes/Inputs/Headers/SwiftImportAs.h
@@ -0,0 +1,6 @@
+struct ImmortalRefType {};
+
+struct RefCountedType { int value; };
+
+inline void RCRetain(RefCountedType *x) { x->value++; }
+inline void RCRelease(RefCountedType *x) { x->value--; }
diff --git a/clang/test/APINotes/Inputs/Headers/module.modulemap b/clang/test/APINotes/Inputs/Headers/module.modulemap
new file mode 100644
index 000000000000..98b4ee3e96cf
--- /dev/null
+++ b/clang/test/APINotes/Inputs/Headers/module.modulemap
@@ -0,0 +1,31 @@
+module ExternCtx {
+ header "ExternCtx.h"
+}
+
+module HeaderLib {
+ header "HeaderLib.h"
+}
+
+module InstancetypeModule {
+ header "InstancetypeModule.h"
+}
+
+module BrokenTypes {
+ header "BrokenTypes.h"
+}
+
+module ModuleWithWrongCase {
+ header "ModuleWithWrongCase.h"
+}
+
+module ModuleWithWrongCasePrivate {
+ header "ModuleWithWrongCasePrivate.h"
+}
+
+module Namespaces {
+ header "Namespaces.h"
+}
+
+module SwiftImportAs {
+ header "SwiftImportAs.h"
+}
diff --git a/clang/test/APINotes/Inputs/Headers/module.private.modulemap b/clang/test/APINotes/Inputs/Headers/module.private.modulemap
new file mode 100644
index 000000000000..2ecf322ed18d
--- /dev/null
+++ b/clang/test/APINotes/Inputs/Headers/module.private.modulemap
@@ -0,0 +1,5 @@
+module PrivateLib {
+ header "PrivateLib.h"
+}
+
+module ModuleWithWrongCasePrivate.Inner {}
diff --git a/clang/test/APINotes/Inputs/yaml-reader-errors/UIKit.apinotes b/clang/test/APINotes/Inputs/yaml-reader-errors/UIKit.apinotes
new file mode 100644
index 000000000000..77db84400899
--- /dev/null
+++ b/clang/test/APINotes/Inputs/yaml-reader-errors/UIKit.apinotes
@@ -0,0 +1,65 @@
+---
+Name: UIKit
+Classes:
+ - Name: UIFont
+ Methods:
+ - Selector: 'fontWithName:size:'
+ MethodKind: Instance
+ Nullability: [ N ]
+ NullabilityOfRet: O
+ DesignatedInit: true
+# CHECK: duplicate definition of method '-[UIFont fontWithName:size:]'
+ - Selector: 'fontWithName:size:'
+ MethodKind: Instance
+ Nullability: [ N ]
+ NullabilityOfRet: O
+ DesignatedInit: true
+ Properties:
+ - Name: familyName
+ Nullability: N
+ - Name: fontName
+ Nullability: N
+# CHECK: duplicate definition of instance property 'UIFont.familyName'
+ - Name: familyName
+ Nullability: N
+# CHECK: multiple definitions of class 'UIFont'
+ - Name: UIFont
+Protocols:
+ - Name: MyProto
+ AuditedForNullability: true
+# CHECK: multiple definitions of protocol 'MyProto'
+ - Name: MyProto
+ AuditedForNullability: true
+Functions:
+ - Name: 'globalFoo'
+ Nullability: [ N, N, O, S ]
+ NullabilityOfRet: O
+ - Name: 'globalFoo2'
+ Nullability: [ N, N, O, S ]
+ NullabilityOfRet: O
+Globals:
+ - Name: globalVar
+ Nullability: O
+ - Name: globalVar2
+ Nullability: O
+Tags:
+# CHECK: cannot mix EnumKind and FlagEnum (for FlagAndEnumKind)
+ - Name: FlagAndEnumKind
+ FlagEnum: true
+ EnumKind: CFOptions
+# CHECK: cannot mix EnumKind and FlagEnum (for FlagAndEnumKind2)
+ - Name: FlagAndEnumKind2
+ EnumKind: CFOptions
+ FlagEnum: false
+# CHECK: cannot mix EnumKind and EnumExtensibility (for ExtensibilityAndEnumKind)
+ - Name: ExtensibilityAndEnumKind
+ EnumExtensibility: open
+ EnumKind: CFOptions
+# CHECK: cannot mix EnumKind and EnumExtensibility (for ExtensibilityAndEnumKind2)
+ - Name: ExtensibilityAndEnumKind2
+ EnumKind: CFOptions
+ EnumExtensibility: closed
+# CHECK: cannot mix EnumKind and EnumExtensibility (for ExtensibilityAndEnumKind3)
+ - Name: ExtensibilityAndEnumKind3
+ EnumKind: none
+ EnumExtensibility: none
diff --git a/clang/test/APINotes/Inputs/yaml-reader-errors/UIKit.h b/clang/test/APINotes/Inputs/yaml-reader-errors/UIKit.h
new file mode 100644
index 000000000000..55313ae260ae
--- /dev/null
+++ b/clang/test/APINotes/Inputs/yaml-reader-errors/UIKit.h
@@ -0,0 +1 @@
+extern int yesOfCourseThisIsWhatUIKitLooksLike;
diff --git a/clang/test/APINotes/Inputs/yaml-reader-errors/module.modulemap b/clang/test/APINotes/Inputs/yaml-reader-errors/module.modulemap
new file mode 100644
index 000000000000..3d683d705cac
--- /dev/null
+++ b/clang/test/APINotes/Inputs/yaml-reader-errors/module.modulemap
@@ -0,0 +1,3 @@
+module UIKit {
+ header "UIKit.h"
+}
diff --git a/clang/test/APINotes/availability.m b/clang/test/APINotes/availability.m
new file mode 100644
index 000000000000..2ddc2a73da80
--- /dev/null
+++ b/clang/test/APINotes/availability.m
@@ -0,0 +1,48 @@
+// RUN: rm -rf %t
+// RUN: %clang_cc1 -fmodules -Wno-private-module -fimplicit-module-maps -fmodules-cache-path=%t/ModulesCache -fapinotes-modules -fsyntax-only -I %S/Inputs/Headers -F %S/Inputs/Frameworks %s -verify
+
+#include "HeaderLib.h"
+#import <SomeKit/SomeKit.h>
+#import <SomeKit/SomeKit_Private.h>
+
+int main() {
+ int i;
+ i = unavailable_function(); // expected-error{{'unavailable_function' is unavailable: I beg you not to use this}}
+ // expected-note@HeaderLib.h:8{{'unavailable_function' has been explicitly marked unavailable here}}
+ i = unavailable_global_int; // expected-error{{'unavailable_global_int' is unavailable}}
+ // expected-note@HeaderLib.h:9{{'unavailable_global_int' has been explicitly marked unavailable here}}
+
+ unavailable_typedef t; // expected-error{{'unavailable_typedef' is unavailable}}
+ // expected-note@HeaderLib.h:14{{'unavailable_typedef' has been explicitly marked unavailable here}}
+
+ struct unavailable_struct s; // expected-error{{'unavailable_struct' is unavailable}}
+ // expected-note@HeaderLib.h:15{{'unavailable_struct' has been explicitly marked unavailable here}}
+
+ B *b = 0; // expected-error{{'B' is unavailable: just don't}}
+ // expected-note@SomeKit/SomeKit.h:15{{'B' has been explicitly marked unavailable here}}
+
+ id<InternalProtocol> proto = 0; // expected-error{{'InternalProtocol' is unavailable: not for you}}
+ // expected-note@SomeKit/SomeKit_Private.h:12{{'InternalProtocol' has been explicitly marked unavailable here}}
+
+ A *a = 0;
+ i = a.intValue; // expected-error{{intValue' is unavailable: wouldn't work anyway}}
+ // expected-note@SomeKit/SomeKit.h:12{{'intValue' has been explicitly marked unavailable here}}
+
+ [a transform:a]; // expected-error{{'transform:' is unavailable: anything but this}}
+ // expected-note@SomeKit/SomeKit.h:6{{'transform:' has been explicitly marked unavailable here}}
+
+ [a implicitGetOnlyInstance]; // expected-error{{'implicitGetOnlyInstance' is unavailable: getter gone}}
+ // expected-note@SomeKit/SomeKit.h:53{{'implicitGetOnlyInstance' has been explicitly marked unavailable here}}
+ [A implicitGetOnlyClass]; // expected-error{{'implicitGetOnlyClass' is unavailable: getter gone}}
+ // expected-note@SomeKit/SomeKit.h:54{{'implicitGetOnlyClass' has been explicitly marked unavailable here}}
+ [a implicitGetSetInstance]; // expected-error{{'implicitGetSetInstance' is unavailable: getter gone}}
+ // expected-note@SomeKit/SomeKit.h:56{{'implicitGetSetInstance' has been explicitly marked unavailable here}}
+ [a setImplicitGetSetInstance: a]; // expected-error{{'setImplicitGetSetInstance:' is unavailable: setter gone}}
+ // expected-note@SomeKit/SomeKit.h:56{{'setImplicitGetSetInstance:' has been explicitly marked unavailable here}}
+ [A implicitGetSetClass]; // expected-error{{'implicitGetSetClass' is unavailable: getter gone}}
+ // expected-note@SomeKit/SomeKit.h:57{{'implicitGetSetClass' has been explicitly marked unavailable here}}
+ [A setImplicitGetSetClass: a]; // expected-error{{'setImplicitGetSetClass:' is unavailable: setter gone}}
+ // expected-note@SomeKit/SomeKit.h:57{{'setImplicitGetSetClass:' has been explicitly marked unavailable here}}
+ return 0;
+}
+
diff --git a/clang/test/APINotes/broken_types.m b/clang/test/APINotes/broken_types.m
new file mode 100644
index 000000000000..ee33ff7c4b4b
--- /dev/null
+++ b/clang/test/APINotes/broken_types.m
@@ -0,0 +1,19 @@
+// RUN: rm -rf %t && mkdir -p %t
+// RUN: not %clang_cc1 -fmodules -fimplicit-module-maps -fmodules-cache-path=%t/ModulesCache -fapinotes-modules -fsyntax-only -I %S/Inputs/Headers -F %S/Inputs/Frameworks %s 2> %t.err
+// RUN: FileCheck %s < %t.err
+
+#include "BrokenTypes.h"
+
+// CHECK: <API Notes>:1:1: error: unknown type name 'not_a_type'
+// CHECK-NEXT: not_a_type
+// CHECK-NEXT: ^
+
+// CHECK: <API Notes>:1:7: error: unparsed tokens following type
+// CHECK-NEXT: int * with extra junk
+// CHECK-NEXT: ^
+
+// CHECK: BrokenTypes.h:4:6: error: API notes replacement type 'int *' has a different size from original type 'char'
+
+// CHECK: BrokenTypes.h:6:13: error: API notes replacement type 'double' has a different size from original type 'char'
+
+// CHECK: 5 errors generated.
diff --git a/clang/test/APINotes/case-for-private-apinotes-file.c b/clang/test/APINotes/case-for-private-apinotes-file.c
new file mode 100644
index 000000000000..6aff3db54918
--- /dev/null
+++ b/clang/test/APINotes/case-for-private-apinotes-file.c
@@ -0,0 +1,22 @@
+// REQUIRES: case-insensitive-filesystem
+
+// RUN: rm -rf %t
+// RUN: %clang_cc1 -fsyntax-only -fmodules -fapinotes-modules -fimplicit-module-maps -fmodules-cache-path=%t -F %S/Inputs/Frameworks -I %S/Inputs/Headers %s 2>&1 | FileCheck %s
+
+// RUN: rm -rf %t
+// RUN: %clang_cc1 -fsyntax-only -fmodules -fapinotes-modules -fimplicit-module-maps -fmodules-cache-path=%t -iframework %S/Inputs/Frameworks -isystem %S/Inputs/Headers %s -Werror
+
+// RUN: rm -rf %t
+// RUN: %clang_cc1 -fsyntax-only -fmodules -fapinotes-modules -fimplicit-module-maps -fmodules-cache-path=%t -iframework %S/Inputs/Frameworks -isystem %S/Inputs/Headers %s -Wnonportable-private-system-apinotes-path 2>&1 | FileCheck %s
+
+#include <ModuleWithWrongCase.h>
+#include <ModuleWithWrongCasePrivate.h>
+#include <FrameworkWithWrongCase/FrameworkWithWrongCase.h>
+#include <FrameworkWithWrongCasePrivate/FrameworkWithWrongCasePrivate.h>
+#include <FrameworkWithActualPrivateModule/FrameworkWithActualPrivateModule_Private.h>
+
+// CHECK-NOT: warning:
+// CHECK: warning: private API notes file for module 'ModuleWithWrongCasePrivate' should be named 'ModuleWithWrongCasePrivate_private.apinotes', not 'ModuleWithWrongCasePrivate_Private.apinotes'
+// CHECK-NOT: warning:
+// CHECK: warning: private API notes file for module 'FrameworkWithWrongCasePrivate' should be named 'FrameworkWithWrongCasePrivate_private.apinotes', not 'FrameworkWithWrongCasePrivate_Private.apinotes'
+// CHECK-NOT: warning:
diff --git a/clang/test/APINotes/extern-context.cpp b/clang/test/APINotes/extern-context.cpp
new file mode 100644
index 000000000000..331dee002361
--- /dev/null
+++ b/clang/test/APINotes/extern-context.cpp
@@ -0,0 +1,23 @@
+// RUN: rm -rf %t && mkdir -p %t
+// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -fmodules-cache-path=%t/ModulesCache -fdisable-module-hash -fapinotes-modules -fsyntax-only -I %S/Inputs/Headers %s -ast-dump -ast-dump-filter globalInExternC -x c++ | FileCheck -check-prefix=CHECK-EXTERN-C %s
+// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -fmodules-cache-path=%t/ModulesCache -fdisable-module-hash -fapinotes-modules -fsyntax-only -I %S/Inputs/Headers %s -ast-dump -ast-dump-filter globalInExternCXX -x c++ | FileCheck -check-prefix=CHECK-EXTERN-CXX %s
+// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -fmodules-cache-path=%t/ModulesCache -fdisable-module-hash -fapinotes-modules -fsyntax-only -I %S/Inputs/Headers %s -ast-dump -ast-dump-filter globalFuncInExternC -x c++ | FileCheck -check-prefix=CHECK-FUNC-EXTERN-C %s
+// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -fmodules-cache-path=%t/ModulesCache -fdisable-module-hash -fapinotes-modules -fsyntax-only -I %S/Inputs/Headers %s -ast-dump -ast-dump-filter globalFuncInExternCXX -x c++ | FileCheck -check-prefix=CHECK-FUNC-EXTERN-CXX %s
+
+#include "ExternCtx.h"
+
+// CHECK-EXTERN-C: Dumping globalInExternC:
+// CHECK-EXTERN-C: VarDecl {{.+}} imported in ExternCtx globalInExternC 'int'
+// CHECK-EXTERN-C: UnavailableAttr {{.+}} <<invalid sloc>> "oh no"
+
+// CHECK-EXTERN-CXX: Dumping globalInExternCXX:
+// CHECK-EXTERN-CXX: VarDecl {{.+}} imported in ExternCtx globalInExternCXX 'int'
+// CHECK-EXTERN-CXX: UnavailableAttr {{.+}} <<invalid sloc>> "oh no #2"
+
+// CHECK-FUNC-EXTERN-C: Dumping globalFuncInExternC:
+// CHECK-FUNC-EXTERN-C: FunctionDecl {{.+}} imported in ExternCtx globalFuncInExternC 'void ()'
+// CHECK-FUNC-EXTERN-C: UnavailableAttr {{.+}} <<invalid sloc>> "oh no #3"
+
+// CHECK-FUNC-EXTERN-CXX: Dumping globalFuncInExternCXX:
+// CHECK-FUNC-EXTERN-CXX: FunctionDecl {{.+}} imported in ExternCtx globalFuncInExternCXX 'void ()'
+// CHECK-FUNC-EXTERN-CXX: UnavailableAttr {{.+}} <<invalid sloc>> "oh no #4"
diff --git a/clang/test/APINotes/instancetype.m b/clang/test/APINotes/instancetype.m
new file mode 100644
index 000000000000..30339e5386f6
--- /dev/null
+++ b/clang/test/APINotes/instancetype.m
@@ -0,0 +1,9 @@
+// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -fmodules-cache-path=%t/ModulesCache -fapinotes-modules -fsyntax-only -I %S/Inputs/Headers -verify %s
+
+@import InstancetypeModule;
+
+void test() {
+ // The nullability is here to verify that the API notes were applied.
+ int good = [SomeSubclass instancetypeFactoryMethod]; // expected-error {{initializing 'int' with an expression of type 'SomeSubclass * _Nonnull'}}
+ int bad = [SomeSubclass staticFactoryMethod]; // expected-error {{initializing 'int' with an expression of type 'SomeBaseClass * _Nonnull'}}
+}
diff --git a/clang/test/APINotes/module-cache.m b/clang/test/APINotes/module-cache.m
new file mode 100644
index 000000000000..5dcaf1181f9d
--- /dev/null
+++ b/clang/test/APINotes/module-cache.m
@@ -0,0 +1,65 @@
+// RUN: rm -rf %t
+
+// Set up directories
+// RUN: mkdir -p %t/APINotes
+// RUN: cp %S/Inputs/APINotes/SomeOtherKit.apinotes %t/APINotes/SomeOtherKit.apinotes
+// RUN: mkdir -p %t/Frameworks
+// RUN: cp -r %S/Inputs/Frameworks/SomeOtherKit.framework %t/Frameworks
+
+// First build: check that 'methodB' is unavailable but 'methodA' is available.
+// RUN: not %clang_cc1 -fmodules -fimplicit-module-maps -Rmodule-build -fmodules-cache-path=%t/ModulesCache -iapinotes-modules %t/APINotes -F %t/Frameworks %s > %t/before.log 2>&1
+// RUN: FileCheck -check-prefix=CHECK-METHODB %s < %t/before.log
+// RUN: FileCheck -check-prefix=CHECK-REBUILD %s < %t/before.log
+// RUN: FileCheck -check-prefix=CHECK-ONE-ERROR %s < %t/before.log
+
+// Do it again; now we're using caches.
+// RUN: not %clang_cc1 -fmodules -fimplicit-module-maps -Rmodule-build -fmodules-cache-path=%t/ModulesCache -iapinotes-modules %t/APINotes -F %t/Frameworks %s > %t/before.log 2>&1
+// RUN: FileCheck -check-prefix=CHECK-METHODB %s < %t/before.log
+// RUN: FileCheck -check-prefix=CHECK-WITHOUT-REBUILD %s < %t/before.log
+// RUN: FileCheck -check-prefix=CHECK-ONE-ERROR %s < %t/before.log
+
+// Add a blank line to the header to force the module to rebuild, without
+// (yet) changing API notes.
+// RUN: echo >> %t/Frameworks/SomeOtherKit.framework/Headers/SomeOtherKit.h
+// RUN: not %clang_cc1 -fmodules -fimplicit-module-maps -Rmodule-build -fmodules-cache-path=%t/ModulesCache -iapinotes-modules %t/APINotes -F %t/Frameworks %s > %t/before.log 2>&1
+// RUN: FileCheck -check-prefix=CHECK-METHODB %s < %t/before.log
+// RUN: FileCheck -check-prefix=CHECK-REBUILD %s < %t/before.log
+// RUN: FileCheck -check-prefix=CHECK-ONE-ERROR %s < %t/before.log
+
+// Change the API notes file, after the module has rebuilt once.
+// RUN: echo ' - Selector: "methodA"' >> %t/APINotes/SomeOtherKit.apinotes
+// RUN: echo ' MethodKind: Instance' >> %t/APINotes/SomeOtherKit.apinotes
+// RUN: echo ' Availability: none' >> %t/APINotes/SomeOtherKit.apinotes
+// RUN: echo ' AvailabilityMsg: "not here either"' >> %t/APINotes/SomeOtherKit.apinotes
+
+// Build again: check that both methods are now unavailable and that the module rebuilt.
+// RUN: not %clang_cc1 -fmodules -fimplicit-module-maps -Rmodule-build -fmodules-cache-path=%t/ModulesCache -iapinotes-modules %t/APINotes -F %t/Frameworks %s > %t/after.log 2>&1
+// RUN: FileCheck -check-prefix=CHECK-METHODA %s < %t/after.log
+// RUN: FileCheck -check-prefix=CHECK-METHODB %s < %t/after.log
+// RUN: FileCheck -check-prefix=CHECK-REBUILD %s < %t/after.log
+// RUN: FileCheck -check-prefix=CHECK-TWO-ERRORS %s < %t/after.log
+
+// Run the build again: check that both methods are now unavailable
+// RUN: not %clang_cc1 -fmodules -fimplicit-module-maps -Rmodule-build -fmodules-cache-path=%t/ModulesCache -iapinotes-modules %t/APINotes -F %t/Frameworks %s > %t/after.log 2>&1
+// RUN: FileCheck -check-prefix=CHECK-METHODA %s < %t/after.log
+// RUN: FileCheck -check-prefix=CHECK-METHODB %s < %t/after.log
+// RUN: FileCheck -check-prefix=CHECK-WITHOUT-REBUILD %s < %t/after.log
+// RUN: FileCheck -check-prefix=CHECK-TWO-ERRORS %s < %t/after.log
+
+@import SomeOtherKit;
+
+void test(A *a) {
+ // CHECK-METHODA: error: 'methodA' is unavailable: not here either
+ [a methodA];
+
+ // CHECK-METHODB: error: 'methodB' is unavailable: anything but this
+ [a methodB];
+}
+
+// CHECK-REBUILD: remark: building module{{.*}}SomeOtherKit
+
+// CHECK-WITHOUT-REBUILD-NOT: remark: building module{{.*}}SomeOtherKit
+
+// CHECK-ONE-ERROR: 1 error generated.
+// CHECK-TWO-ERRORS: 2 errors generated.
+
diff --git a/clang/test/APINotes/namespaces.cpp b/clang/test/APINotes/namespaces.cpp
new file mode 100644
index 000000000000..2f9d93c2ea0e
--- /dev/null
+++ b/clang/test/APINotes/namespaces.cpp
@@ -0,0 +1,69 @@
+// RUN: rm -rf %t && mkdir -p %t
+// RUN: %clang_cc1 -fmodules -fblocks -fimplicit-module-maps -fmodules-cache-path=%t/ModulesCache/CxxInterop -fdisable-module-hash -fapinotes-modules -fsyntax-only -I %S/Inputs/Headers -F %S/Inputs/Frameworks %s -x objective-c++
+// RUN: %clang_cc1 -fmodules -fblocks -fimplicit-module-maps -fmodules-cache-path=%t/ModulesCache/CxxInterop -fdisable-module-hash -fapinotes-modules -fsyntax-only -I %S/Inputs/Headers -F %S/Inputs/Frameworks %s -ast-dump -ast-dump-filter Namespace1::my_typedef -x objective-c++ | FileCheck -check-prefix=CHECK-TYPEDEF-IN-NAMESPACE %s
+// RUN: %clang_cc1 -fmodules -fblocks -fimplicit-module-maps -fmodules-cache-path=%t/ModulesCache/CxxInterop -fdisable-module-hash -fapinotes-modules -fsyntax-only -I %S/Inputs/Headers -F %S/Inputs/Frameworks %s -ast-dump -ast-dump-filter Namespace1::my_using_decl -x objective-c++ | FileCheck -check-prefix=CHECK-USING-DECL-IN-NAMESPACE %s
+// RUN: %clang_cc1 -fmodules -fblocks -fimplicit-module-maps -fmodules-cache-path=%t/ModulesCache/CxxInterop -fdisable-module-hash -fapinotes-modules -fsyntax-only -I %S/Inputs/Headers -F %S/Inputs/Frameworks %s -ast-dump -ast-dump-filter Namespace1::varInNamespace -x objective-c++ | FileCheck -check-prefix=CHECK-GLOBAL-IN-NAMESPACE %s
+// RUN: %clang_cc1 -fmodules -fblocks -fimplicit-module-maps -fmodules-cache-path=%t/ModulesCache/CxxInterop -fdisable-module-hash -fapinotes-modules -fsyntax-only -I %S/Inputs/Headers -F %S/Inputs/Frameworks %s -ast-dump -ast-dump-filter Namespace1::funcInNamespace -x objective-c++ | FileCheck -check-prefix=CHECK-FUNC-IN-NAMESPACE %s
+// RUN: %clang_cc1 -fmodules -fblocks -fimplicit-module-maps -fmodules-cache-path=%t/ModulesCache/CxxInterop -fdisable-module-hash -fapinotes-modules -fsyntax-only -I %S/Inputs/Headers -F %S/Inputs/Frameworks %s -ast-dump -ast-dump-filter Namespace1::char_box -x objective-c++ | FileCheck -check-prefix=CHECK-STRUCT-IN-NAMESPACE %s
+// RUN: %clang_cc1 -fmodules -fblocks -fimplicit-module-maps -fmodules-cache-path=%t/ModulesCache/CxxInterop -fdisable-module-hash -fapinotes-modules -fsyntax-only -I %S/Inputs/Headers -F %S/Inputs/Frameworks %s -ast-dump -ast-dump-filter Namespace1::Nested1::varInNestedNamespace -x objective-c++ | FileCheck -check-prefix=CHECK-GLOBAL-IN-NESTED-NAMESPACE %s
+// RUN: %clang_cc1 -fmodules -fblocks -fimplicit-module-maps -fmodules-cache-path=%t/ModulesCache/CxxInterop -fdisable-module-hash -fapinotes-modules -fsyntax-only -I %S/Inputs/Headers -F %S/Inputs/Frameworks %s -ast-dump -ast-dump-filter Namespace1::Nested2::varInNestedNamespace -x objective-c++ | FileCheck -check-prefix=CHECK-ANOTHER-GLOBAL-IN-NESTED-NAMESPACE %s
+// RUN: %clang_cc1 -fmodules -fblocks -fimplicit-module-maps -fmodules-cache-path=%t/ModulesCache/CxxInterop -fdisable-module-hash -fapinotes-modules -fsyntax-only -I %S/Inputs/Headers -F %S/Inputs/Frameworks %s -ast-dump -ast-dump-filter Namespace1::Nested1::char_box -x objective-c++ | FileCheck -check-prefix=CHECK-STRUCT-IN-NESTED-NAMESPACE %s
+// RUN: %clang_cc1 -fmodules -fblocks -fimplicit-module-maps -fmodules-cache-path=%t/ModulesCache/CxxInterop -fdisable-module-hash -fapinotes-modules -fsyntax-only -I %S/Inputs/Headers -F %S/Inputs/Frameworks %s -ast-dump -ast-dump-filter Namespace1::Nested1::funcInNestedNamespace -x objective-c++ | FileCheck -check-prefix=CHECK-FUNC-IN-NESTED-NAMESPACE %s
+// RUN: %clang_cc1 -fmodules -fblocks -fimplicit-module-maps -fmodules-cache-path=%t/ModulesCache/CxxInterop -fdisable-module-hash -fapinotes-modules -fsyntax-only -I %S/Inputs/Headers -F %S/Inputs/Frameworks %s -ast-dump -ast-dump-filter Namespace1::Nested1::Namespace1::char_box -x objective-c++ | FileCheck -check-prefix=CHECK-STRUCT-IN-DEEP-NESTED-NAMESPACE %s
+// RUN: %clang_cc1 -fmodules -fblocks -fimplicit-module-maps -fmodules-cache-path=%t/ModulesCache/CxxInterop -fdisable-module-hash -fapinotes-modules -fsyntax-only -I %S/Inputs/Headers -F %S/Inputs/Frameworks %s -ast-dump -ast-dump-filter varInInlineNamespace -x objective-c++ | FileCheck -check-prefix=CHECK-GLOBAL-IN-INLINE-NAMESPACE %s
+// RUN: %clang_cc1 -fmodules -fblocks -fimplicit-module-maps -fmodules-cache-path=%t/ModulesCache/CxxInterop -fdisable-module-hash -fapinotes-modules -fsyntax-only -I %S/Inputs/Headers -F %S/Inputs/Frameworks %s -ast-dump -ast-dump-filter funcInInlineNamespace -x objective-c++ | FileCheck -check-prefix=CHECK-FUNC-IN-INLINE-NAMESPACE %s
+
+#import <Namespaces.h>
+
+// CHECK-TYPEDEF-IN-NAMESPACE: Dumping Namespace1::my_typedef:
+// CHECK-TYPEDEF-IN-NAMESPACE-NEXT: TypedefDecl {{.+}} imported in Namespaces my_typedef 'int'
+// CHECK-TYPEDEF-IN-NAMESPACE: SwiftNameAttr {{.+}} <<invalid sloc>> "SwiftTypedef"
+
+// CHECK-USING-DECL-IN-NAMESPACE: Dumping Namespace1::my_using_decl:
+// CHECK-USING-DECL-IN-NAMESPACE-NEXT: TypeAliasDecl {{.+}} imported in Namespaces my_using_decl 'int'
+// CHECK-USING-DECL-IN-NAMESPACE: SwiftNameAttr {{.+}} <<invalid sloc>> "SwiftUsingDecl"
+
+// CHECK-GLOBAL-IN-NAMESPACE: Dumping Namespace1::varInNamespace:
+// CHECK-GLOBAL-IN-NAMESPACE-NEXT: VarDecl {{.+}} imported in Namespaces varInNamespace 'int' static cinit
+// CHECK-GLOBAL-IN-NAMESPACE-NEXT: IntegerLiteral {{.+}} 'int' 1
+// CHECK-GLOBAL-IN-NAMESPACE-NEXT: SwiftNameAttr {{.+}} <<invalid sloc>> "swiftVarInNamespace"
+
+// CHECK-FUNC-IN-NAMESPACE: Dumping Namespace1::funcInNamespace:
+// CHECK-FUNC-IN-NAMESPACE-NEXT: FunctionDecl {{.+}} imported in Namespaces funcInNamespace 'void ()'
+// CHECK-FUNC-IN-NAMESPACE-NEXT: SwiftNameAttr {{.+}} <<invalid sloc>> "swiftFuncInNamespace()"
+
+// CHECK-STRUCT-IN-NAMESPACE: Dumping Namespace1::char_box:
+// CHECK-STRUCT-IN-NAMESPACE-NEXT: CXXRecordDecl {{.+}} imported in Namespaces <undeserialized declarations> struct char_box
+// CHECK-STRUCT-IN-NAMESPACE: SwiftNameAttr {{.+}} <<invalid sloc>> "CharBox"
+
+// CHECK-GLOBAL-IN-NESTED-NAMESPACE: Dumping Namespace1::Nested1::varInNestedNamespace:
+// CHECK-GLOBAL-IN-NESTED-NAMESPACE-NEXT: VarDecl {{.+}} imported in Namespaces varInNestedNamespace 'int' static cinit
+// CHECK-GLOBAL-IN-NESTED-NAMESPACE-NEXT: IntegerLiteral {{.+}} 'int' 1
+// CHECK-GLOBAL-IN-NESTED-NAMESPACE-NEXT: SwiftNameAttr {{.+}} <<invalid sloc>> "swiftVarInNestedNamespace"
+
+// CHECK-ANOTHER-GLOBAL-IN-NESTED-NAMESPACE: Dumping Namespace1::Nested2::varInNestedNamespace:
+// CHECK-ANOTHER-GLOBAL-IN-NESTED-NAMESPACE-NEXT: VarDecl {{.+}} imported in Namespaces varInNestedNamespace 'int' static cinit
+// CHECK-ANOTHER-GLOBAL-IN-NESTED-NAMESPACE-NEXT: IntegerLiteral {{.+}} 'int' 2
+// CHECK-ANOTHER-GLOBAL-IN-NESTED-NAMESPACE-NEXT: SwiftNameAttr {{.+}} <<invalid sloc>> "swiftAnotherVarInNestedNamespace"
+
+// CHECK-FUNC-IN-NESTED-NAMESPACE: Dumping Namespace1::Nested1::funcInNestedNamespace:
+// CHECK-FUNC-IN-NESTED-NAMESPACE-NEXT: FunctionDecl {{.+}} imported in Namespaces funcInNestedNamespace 'void (int)'
+// CHECK-FUNC-IN-NESTED-NAMESPACE-NEXT: ParmVarDecl {{.+}} i 'int'
+// CHECK-FUNC-IN-NESTED-NAMESPACE-NEXT: SwiftNameAttr {{.+}} <<invalid sloc>> "swiftFuncInNestedNamespace(_:)"
+
+// CHECK-STRUCT-IN-NESTED-NAMESPACE: Dumping Namespace1::Nested1::char_box:
+// CHECK-STRUCT-IN-NESTED-NAMESPACE-NEXT: CXXRecordDecl {{.+}} imported in Namespaces <undeserialized declarations> struct char_box
+// CHECK-STRUCT-IN-NESTED-NAMESPACE: SwiftNameAttr {{.+}} <<invalid sloc>> "NestedCharBox"
+
+// CHECK-STRUCT-IN-DEEP-NESTED-NAMESPACE: Dumping Namespace1::Nested1::Namespace1::char_box:
+// CHECK-STRUCT-IN-DEEP-NESTED-NAMESPACE-NEXT: CXXRecordDecl {{.+}} imported in Namespaces <undeserialized declarations> struct char_box
+// CHECK-STRUCT-IN-DEEP-NESTED-NAMESPACE: SwiftNameAttr {{.+}} <<invalid sloc>> "DeepNestedCharBox"
+
+// CHECK-GLOBAL-IN-INLINE-NAMESPACE: Dumping varInInlineNamespace:
+// CHECK-GLOBAL-IN-INLINE-NAMESPACE-NEXT: VarDecl {{.+}} imported in Namespaces varInInlineNamespace 'int' static cinit
+// CHECK-GLOBAL-IN-INLINE-NAMESPACE-NEXT: IntegerLiteral {{.+}} 'int' 3
+// CHECK-GLOBAL-IN-INLINE-NAMESPACE-NEXT: SwiftNameAttr {{.+}} <<invalid sloc>> "swiftVarInInlineNamespace"
+
+// CHECK-FUNC-IN-INLINE-NAMESPACE: Dumping funcInInlineNamespace:
+// CHECK-FUNC-IN-INLINE-NAMESPACE-NEXT: FunctionDecl {{.+}} imported in Namespaces funcInInlineNamespace 'void ()'
+// CHECK-FUNC-IN-INLINE-NAMESPACE-NEXT: SwiftNameAttr {{.+}} <<invalid sloc>> "swiftFuncInInlineNamespace()"
diff --git a/clang/test/APINotes/nullability.c b/clang/test/APINotes/nullability.c
new file mode 100644
index 000000000000..e07fc2e5c117
--- /dev/null
+++ b/clang/test/APINotes/nullability.c
@@ -0,0 +1,21 @@
+// RUN: rm -rf %t && mkdir -p %t
+// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -fmodules-cache-path=%t/ModulesCache -fapinotes-modules -fsyntax-only -I %S/Inputs/Headers -F %S/Inputs/Frameworks %s -verify
+
+#include "HeaderLib.h"
+
+int main() {
+ custom_realloc(0, 0); // expected-warning{{null passed to a callee that requires a non-null argument}}
+ int i = 0;
+ do_something_with_pointers(&i, 0);
+ do_something_with_pointers(0, &i); // expected-warning{{null passed to a callee that requires a non-null argument}}
+
+ extern void *p;
+ do_something_with_arrays(0, p); // expected-warning{{null passed to a callee that requires a non-null argument}}
+ do_something_with_arrays(p, 0); // expected-warning{{null passed to a callee that requires a non-null argument}}
+
+ take_pointer_and_int(0, 0); // expected-warning{{null passed to a callee that requires a non-null argument}}
+
+ float *fp = global_int; // expected-warning{{incompatible pointer types initializing 'float *' with an expression of type 'int * _Nonnull'}}
+ return 0;
+}
+
diff --git a/clang/test/APINotes/nullability.m b/clang/test/APINotes/nullability.m
new file mode 100644
index 000000000000..21ec6680fa71
--- /dev/null
+++ b/clang/test/APINotes/nullability.m
@@ -0,0 +1,46 @@
+// RUN: rm -rf %t && mkdir -p %t
+// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -fmodules-cache-path=%t/ModulesCache -fapinotes-modules -Wno-private-module -fsyntax-only -I %S/Inputs/Headers -F %S/Inputs/Frameworks %s -verify
+
+// Test with Swift version 3.0. This should only affect the few APIs that have an entry in the 3.0 tables.
+
+// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -fmodules-cache-path=%t/ModulesCache -fapinotes-modules -Wno-private-module -fapinotes-swift-version=3.0 -fsyntax-only -I %S/Inputs/Headers -F %S/Inputs/Frameworks %s -verify -DSWIFT_VERSION_3_0 -fmodules-ignore-macro=SWIFT_VERSION_3_0
+
+#import <SomeKit/SomeKit.h>
+
+int main() {
+ A *a;
+
+#if SWIFT_VERSION_3_0
+ float *fp = // expected-warning{{incompatible pointer types initializing 'float *' with an expression of type 'A * _Nullable'}}
+ [a transform: 0 integer: 0];
+#else
+ float *fp = // expected-warning{{incompatible pointer types initializing 'float *' with an expression of type 'A *'}}
+ [a transform: 0 integer: 0]; // expected-warning{{null passed to a callee that requires a non-null argument}}
+#endif
+
+ [a setNonnullAInstance: 0]; // expected-warning{{null passed to a callee that requires a non-null argument}}
+ [A setNonnullAInstance: 0]; // no warning
+ a.nonnullAInstance = 0; // expected-warning{{null passed to a callee that requires a non-null argument}}
+ A* _Nonnull aPtr = a.nonnullAInstance; // no warning
+
+ [a setNonnullAClass: 0]; // no warning
+ [A setNonnullAClass: 0]; // expected-warning{{null passed to a callee that requires a non-null argument}}
+
+ [a setNonnullABoth: 0]; // expected-warning{{null passed to a callee that requires a non-null argument}}
+ [A setNonnullABoth: 0]; // expected-warning{{null passed to a callee that requires a non-null argument}}
+
+ [a setInternalProperty: 0]; // expected-warning{{null passed to a callee that requires a non-null argument}}
+
+#if SWIFT_VERSION_3_0
+ // Version 3 information overrides header information.
+ [a setExplicitNonnullInstance: 0]; // okay
+ [a setExplicitNullableInstance: 0]; // expected-warning{{null passed to a callee that requires a non-null argument}}
+#else
+ // Header information overrides unversioned information.
+ [a setExplicitNonnullInstance: 0]; // expected-warning{{null passed to a callee that requires a non-null argument}}
+ [a setExplicitNullableInstance: 0]; // okay
+#endif
+
+ return 0;
+}
+
diff --git a/clang/test/APINotes/objc-forward-declarations.m b/clang/test/APINotes/objc-forward-declarations.m
new file mode 100644
index 000000000000..e82bed205550
--- /dev/null
+++ b/clang/test/APINotes/objc-forward-declarations.m
@@ -0,0 +1,12 @@
+// RUN: rm -rf %t && mkdir -p %t
+// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -fmodules-cache-path=%t/ModulesCache -fapinotes-modules -fsyntax-only -F %S/Inputs/Frameworks %s -verify
+
+@import LayeredKit;
+
+void test(
+ UpwardClass *okayClass,
+ id <UpwardProto> okayProto,
+ PerfectlyNormalClass *badClass // expected-error {{'PerfectlyNormalClass' is unavailable}}
+) {
+ // expected-note@LayeredKitImpl/LayeredKitImpl.h:4 {{'PerfectlyNormalClass' has been explicitly marked unavailable here}}
+}
diff --git a/clang/test/APINotes/objc_designated_inits.m b/clang/test/APINotes/objc_designated_inits.m
new file mode 100644
index 000000000000..1f2b8ed534b7
--- /dev/null
+++ b/clang/test/APINotes/objc_designated_inits.m
@@ -0,0 +1,17 @@
+// RUN: rm -rf %t && mkdir -p %t
+// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -fmodules-cache-path=%t/ModulesCache -fapinotes-modules -Wno-private-module -fsyntax-only -I %S/Inputs/Headers -F %S/Inputs/Frameworks %s -verify
+
+#include "HeaderLib.h"
+#import <SomeKit/SomeKit.h>
+
+@interface CSub : C
+-(instancetype)initWithA:(A*)a;
+@end
+
+@implementation CSub
+-(instancetype)initWithA:(A*)a { // expected-warning{{designated initializer missing a 'super' call to a designated initializer of the super class}}
+ // expected-note@SomeKit/SomeKit.h:20 2{{method marked as designated initializer of the class here}}
+ self = [super init]; // expected-warning{{designated initializer invoked a non-designated initializer}}
+ return self;
+}
+@end
diff --git a/clang/test/APINotes/properties.m b/clang/test/APINotes/properties.m
new file mode 100644
index 000000000000..f218092a66e1
--- /dev/null
+++ b/clang/test/APINotes/properties.m
@@ -0,0 +1,42 @@
+// RUN: rm -rf %t && mkdir -p %t
+
+// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -fmodules-cache-path=%t/ModulesCache -fapinotes-modules -fblocks -fsyntax-only -I %S/Inputs/Headers -F %S/Inputs/Frameworks %s -ast-dump -ast-dump-filter 'TestProperties::' | FileCheck -check-prefix=CHECK -check-prefix=CHECK-4 %s
+// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -fmodules-cache-path=%t/ModulesCache -fapinotes-modules -fblocks -fsyntax-only -I %S/Inputs/Headers -F %S/Inputs/Frameworks %s -ast-dump -ast-dump-filter 'TestProperties::' -fapinotes-swift-version=3 | FileCheck -check-prefix=CHECK -check-prefix=CHECK-3 %s
+
+@import VersionedKit;
+
+// CHECK-LABEL: ObjCPropertyDecl {{.+}} accessorsOnly 'id'
+// CHECK-NEXT: SwiftImportPropertyAsAccessorsAttr {{.+}} <<invalid sloc>>
+// CHECK-NOT: Attr
+
+// CHECK-LABEL: ObjCPropertyDecl {{.+}} accessorsOnlyForClass 'id'
+// CHECK-NEXT: SwiftImportPropertyAsAccessorsAttr {{.+}} <<invalid sloc>>
+// CHECK-NOT: Attr
+
+// CHECK-LABEL: ObjCPropertyDecl {{.+}} accessorsOnlyInVersion3 'id'
+// CHECK-3-NEXT: SwiftImportPropertyAsAccessorsAttr {{.+}} <<invalid sloc>>
+// CHECK-4-NEXT: SwiftVersionedAdditionAttr {{.+}} 3.0{{$}}
+// CHECK-4-NEXT: SwiftImportPropertyAsAccessorsAttr {{.+}} <<invalid sloc>>
+// CHECK-NOT: Attr
+
+// CHECK-LABEL: ObjCPropertyDecl {{.+}} accessorsOnlyForClassInVersion3 'id'
+// CHECK-3-NEXT: SwiftImportPropertyAsAccessorsAttr {{.+}} <<invalid sloc>>
+// CHECK-4-NEXT: SwiftVersionedAdditionAttr {{.+}} 3.0{{$}}
+// CHECK-4-NEXT: SwiftImportPropertyAsAccessorsAttr {{.+}} <<invalid sloc>>
+// CHECK-NOT: Attr
+
+// CHECK-LABEL: ObjCPropertyDecl {{.+}} accessorsOnlyExceptInVersion3 'id'
+// CHECK-3-NEXT: SwiftVersionedAdditionAttr {{.+}} Implicit 3.0 IsReplacedByActive{{$}}
+// CHECK-3-NEXT: SwiftImportPropertyAsAccessorsAttr {{.+}} <<invalid sloc>>
+// CHECK-4-NEXT: SwiftImportPropertyAsAccessorsAttr {{.+}} <<invalid sloc>>
+// CHECK-4-NEXT: SwiftVersionedRemovalAttr {{.+}} Implicit 3.0 {{[0-9]+}}
+// CHECK-NOT: Attr
+
+// CHECK-LABEL: ObjCPropertyDecl {{.+}} accessorsOnlyForClassExceptInVersion3 'id'
+// CHECK-3-NEXT: SwiftVersionedAdditionAttr {{.+}} Implicit 3.0 IsReplacedByActive{{$}}
+// CHECK-3-NEXT: SwiftImportPropertyAsAccessorsAttr {{.+}} <<invalid sloc>>
+// CHECK-4-NEXT: SwiftImportPropertyAsAccessorsAttr {{.+}} <<invalid sloc>>
+// CHECK-4-NEXT: SwiftVersionedRemovalAttr {{.+}} Implicit 3.0 {{[0-9]+}}
+// CHECK-NOT: Attr
+
+// CHECK-LABEL: Decl
diff --git a/clang/test/APINotes/retain-count-convention.m b/clang/test/APINotes/retain-count-convention.m
new file mode 100644
index 000000000000..4bf9610a352a
--- /dev/null
+++ b/clang/test/APINotes/retain-count-convention.m
@@ -0,0 +1,38 @@
+// RUN: rm -rf %t && mkdir -p %t
+// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -fmodules-cache-path=%t/ModulesCache -fapinotes-modules -fdisable-module-hash -fsyntax-only -F %S/Inputs/Frameworks %s
+// RUN: %clang_cc1 -ast-print %t/ModulesCache/SimpleKit.pcm | FileCheck %s
+// RUN: %clang_cc1 -ast-dump -ast-dump-filter 'DUMP' %t/ModulesCache/SimpleKit.pcm | FileCheck -check-prefix CHECK-DUMP %s
+
+#import <SimpleKit/SimpleKit.h>
+
+// CHECK: void *getCFOwnedToUnowned(void) __attribute__((cf_returns_not_retained));
+// CHECK: void *getCFUnownedToOwned(void) __attribute__((cf_returns_retained));
+// CHECK: void *getCFOwnedToNone(void) __attribute__((cf_unknown_transfer));
+// CHECK: id getObjCOwnedToUnowned(void) __attribute__((ns_returns_not_retained));
+// CHECK: id getObjCUnownedToOwned(void) __attribute__((ns_returns_retained));
+// CHECK: int indirectGetCFOwnedToUnowned(void * _Nullable *out __attribute__((cf_returns_not_retained)));
+// CHECK: int indirectGetCFUnownedToOwned(void * _Nullable *out __attribute__((cf_returns_retained)));
+// CHECK: int indirectGetCFOwnedToNone(void * _Nullable *out);
+// CHECK: int indirectGetCFNoneToOwned(void **out __attribute__((cf_returns_not_retained)));
+
+// CHECK-LABEL: @interface MethodTest
+// CHECK: - (id)getOwnedToUnowned __attribute__((ns_returns_not_retained));
+// CHECK: - (id)getUnownedToOwned __attribute__((ns_returns_retained));
+// CHECK: @end
+
+// CHECK-DUMP-LABEL: Dumping getCFAuditedToUnowned_DUMP:
+// CHECK-DUMP-NEXT: FunctionDecl
+// CHECK-DUMP-NEXT: CFReturnsNotRetainedAttr
+// CHECK-DUMP-NEXT: CFAuditedTransferAttr
+// CHECK-DUMP-NOT: Attr
+
+// CHECK-DUMP-LABEL: Dumping getCFAuditedToOwned_DUMP:
+// CHECK-DUMP-NEXT: FunctionDecl
+// CHECK-DUMP-NEXT: CFReturnsRetainedAttr
+// CHECK-DUMP-NEXT: CFAuditedTransferAttr
+// CHECK-DUMP-NOT: Attr
+
+// CHECK-DUMP-LABEL: Dumping getCFAuditedToNone_DUMP:
+// CHECK-DUMP-NEXT: FunctionDecl
+// CHECK-DUMP-NEXT: CFUnknownTransferAttr
+// CHECK-DUMP-NOT: Attr
diff --git a/clang/test/APINotes/search-order.m b/clang/test/APINotes/search-order.m
new file mode 100644
index 000000000000..17e81d5eb2d6
--- /dev/null
+++ b/clang/test/APINotes/search-order.m
@@ -0,0 +1,25 @@
+// RUN: rm -rf %t && mkdir -p %t
+
+// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -fmodules-cache-path=%t/ModulesCache -fapinotes-modules -fsyntax-only -I %S/Inputs/Headers -F %S/Inputs/Frameworks %s -DFROM_FRAMEWORK=1 -verify
+
+// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -fmodules-cache-path=%t/ModulesCache -iapinotes-modules %S/Inputs/APINotes -fsyntax-only -I %S/Inputs/Headers -F %S/Inputs/Frameworks %s -DFROM_SEARCH_PATH=1 -verify
+
+// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -fmodules-cache-path=%t/ModulesCache -fapinotes-modules -iapinotes-modules %S/Inputs/APINotes -fsyntax-only -I %S/Inputs/Headers -F %S/Inputs/Frameworks %s -DFROM_FRAMEWORK=1 -verify
+
+@import SomeOtherKit;
+
+void test(A *a) {
+#if FROM_FRAMEWORK
+ [a methodA]; // expected-error{{unavailable}}
+ [a methodB];
+
+ // expected-note@SomeOtherKit/SomeOtherKit.h:5{{'methodA' has been explicitly marked unavailable here}}
+#elif FROM_SEARCH_PATH
+ [a methodA];
+ [a methodB]; // expected-error{{unavailable}}
+
+ // expected-note@SomeOtherKit/SomeOtherKit.h:6{{'methodB' has been explicitly marked unavailable here}}
+#else
+# error Not something we need to test
+#endif
+}
diff --git a/clang/test/APINotes/swift-import-as.cpp b/clang/test/APINotes/swift-import-as.cpp
new file mode 100644
index 000000000000..904857e58593
--- /dev/null
+++ b/clang/test/APINotes/swift-import-as.cpp
@@ -0,0 +1,16 @@
+// RUN: rm -rf %t && mkdir -p %t
+// RUN: %clang_cc1 -fmodules -fblocks -fimplicit-module-maps -fmodules-cache-path=%t/ModulesCache -fdisable-module-hash -fapinotes-modules -fsyntax-only -I %S/Inputs/Headers %s -x c++
+// RUN: %clang_cc1 -fmodules -fblocks -fimplicit-module-maps -fmodules-cache-path=%t/ModulesCache -fdisable-module-hash -fapinotes-modules -fsyntax-only -I %S/Inputs/Headers %s -x c++ -ast-dump -ast-dump-filter ImmortalRefType | FileCheck -check-prefix=CHECK-IMMORTAL %s
+// RUN: %clang_cc1 -fmodules -fblocks -fimplicit-module-maps -fmodules-cache-path=%t/ModulesCache -fdisable-module-hash -fapinotes-modules -fsyntax-only -I %S/Inputs/Headers %s -x c++ -ast-dump -ast-dump-filter RefCountedType | FileCheck -check-prefix=CHECK-REF-COUNTED %s
+
+#include <SwiftImportAs.h>
+
+// CHECK-IMMORTAL: Dumping ImmortalRefType:
+// CHECK-IMMORTAL-NEXT: CXXRecordDecl {{.+}} imported in SwiftImportAs {{.+}} struct ImmortalRefType
+// CHECK-IMMORTAL: SwiftAttrAttr {{.+}} <<invalid sloc>> "import_reference"
+
+// CHECK-REF-COUNTED: Dumping RefCountedType:
+// CHECK-REF-COUNTED-NEXT: CXXRecordDecl {{.+}} imported in SwiftImportAs {{.+}} struct RefCountedType
+// CHECK-REF-COUNTED: SwiftAttrAttr {{.+}} <<invalid sloc>> "import_reference"
+// CHECK-REF-COUNTED: SwiftAttrAttr {{.+}} <<invalid sloc>> "retain:RCRetain"
+// CHECK-REF-COUNTED: SwiftAttrAttr {{.+}} <<invalid sloc>> "release:RCRelease"
diff --git a/clang/test/APINotes/top-level-private-modules.c b/clang/test/APINotes/top-level-private-modules.c
new file mode 100644
index 000000000000..0da72b2e36f4
--- /dev/null
+++ b/clang/test/APINotes/top-level-private-modules.c
@@ -0,0 +1,8 @@
+// RUN: rm -rf %t && mkdir -p %t
+// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -fmodules-cache-path=%t/ModulesCache -fapinotes-modules -Wno-private-module -fsyntax-only -I %S/Inputs/Headers -F %S/Inputs/Frameworks %s -verify
+
+#include <PrivateLib.h>
+#include <TopLevelPrivateKit/TopLevelPrivateKit_Private.h>
+
+void *testPlain = PrivateLib; // expected-error {{initializing 'void *' with an expression of incompatible type 'float'}}
+void *testFramework = TopLevelPrivateKit_Private; // expected-error {{initializing 'void *' with an expression of incompatible type 'float'}}
diff --git a/clang/test/APINotes/types.m b/clang/test/APINotes/types.m
new file mode 100644
index 000000000000..133d504713d7
--- /dev/null
+++ b/clang/test/APINotes/types.m
@@ -0,0 +1,28 @@
+// RUN: rm -rf %t && mkdir -p %t
+// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -fmodules-cache-path=%t/ModulesCache -fapinotes-modules -Wno-private-module -fdisable-module-hash -fsyntax-only -I %S/Inputs/Headers -F %S/Inputs/Frameworks %s -verify
+// RUN: %clang_cc1 -ast-print %t/ModulesCache/SimpleKit.pcm | FileCheck %s
+
+#import <SomeKit/SomeKit.h>
+#import <SimpleKit/SimpleKit.h>
+
+// CHECK: struct __attribute__((swift_name("SuccessfullyRenamedA"))) RenamedAgainInAPINotesA {
+// CHECK: struct __attribute__((swift_name("SuccessfullyRenamedB"))) RenamedAgainInAPINotesB {
+
+void test(OverriddenTypes *overridden) {
+ int *ip1 = global_int_ptr; // expected-warning{{incompatible pointer types initializing 'int *' with an expression of type 'double (*)(int, int)'}}
+
+ int *ip2 = global_int_fun( // expected-warning{{incompatible pointer types initializing 'int *' with an expression of type 'char *'}}
+ ip2, // expected-warning{{incompatible pointer types passing 'int *' to parameter of type 'double *'}}
+ ip2); // expected-warning{{incompatible pointer types passing 'int *' to parameter of type 'float *'}}
+
+ int *ip3 = [overridden // expected-warning{{incompatible pointer types initializing 'int *' with an expression of type 'char *'}}
+ methodToMangle: ip3 // expected-warning{{incompatible pointer types sending 'int *' to parameter of type 'double *'}}
+ second: ip3]; // expected-warning{{incompatible pointer types sending 'int *' to parameter of type 'float *'}}
+
+ int *ip4 = overridden.intPropertyToMangle; // expected-warning{{incompatible pointer types initializing 'int *' with an expression of type 'double *'}}
+}
+
+// expected-note@SomeKit/SomeKit.h:42{{passing argument to parameter 'ptr' here}}
+// expected-note@SomeKit/SomeKit.h:42{{passing argument to parameter 'ptr2' here}}
+// expected-note@SomeKit/SomeKit.h:48{{passing argument to parameter 'ptr1' here}}
+// expected-note@SomeKit/SomeKit.h:48{{passing argument to parameter 'ptr2' here}}
diff --git a/clang/test/APINotes/versioned-multi.c b/clang/test/APINotes/versioned-multi.c
new file mode 100644
index 000000000000..48c51fd932e1
--- /dev/null
+++ b/clang/test/APINotes/versioned-multi.c
@@ -0,0 +1,69 @@
+// RUN: rm -rf %t && mkdir -p %t
+
+// Build and check the unversioned module file.
+// RUN: %clang_cc1 -fmodules -fblocks -fimplicit-module-maps -fmodules-cache-path=%t/ModulesCache/Unversioned -fdisable-module-hash -fapinotes-modules -fsyntax-only -I %S/Inputs/Headers -F %S/Inputs/Frameworks %s
+// RUN: %clang_cc1 -ast-print %t/ModulesCache/Unversioned/VersionedKit.pcm | FileCheck -check-prefix=CHECK-UNVERSIONED %s
+
+// Build and check the various versions.
+// RUN: %clang_cc1 -fmodules -fblocks -fimplicit-module-maps -fmodules-cache-path=%t/ModulesCache/Versioned3 -fdisable-module-hash -fapinotes-modules -fapinotes-swift-version=3 -fsyntax-only -I %S/Inputs/Headers -F %S/Inputs/Frameworks %s
+// RUN: %clang_cc1 -ast-print %t/ModulesCache/Versioned3/VersionedKit.pcm | FileCheck -check-prefix=CHECK-VERSIONED-3 %s
+
+// RUN: %clang_cc1 -fmodules -fblocks -fimplicit-module-maps -fmodules-cache-path=%t/ModulesCache/Versioned4 -fdisable-module-hash -fapinotes-modules -fapinotes-swift-version=4 -fsyntax-only -I %S/Inputs/Headers -F %S/Inputs/Frameworks %s
+// RUN: %clang_cc1 -ast-print %t/ModulesCache/Versioned4/VersionedKit.pcm | FileCheck -check-prefix=CHECK-VERSIONED-4 %s
+
+// RUN: %clang_cc1 -fmodules -fblocks -fimplicit-module-maps -fmodules-cache-path=%t/ModulesCache/Versioned5 -fdisable-module-hash -fapinotes-modules -fapinotes-swift-version=5 -fsyntax-only -I %S/Inputs/Headers -F %S/Inputs/Frameworks %s
+// RUN: %clang_cc1 -ast-print %t/ModulesCache/Versioned5/VersionedKit.pcm | FileCheck -check-prefix=CHECK-VERSIONED-5 %s
+
+#import <VersionedKit/VersionedKit.h>
+
+// CHECK-UNVERSIONED: typedef int MultiVersionedTypedef4;
+// CHECK-UNVERSIONED: typedef int MultiVersionedTypedef4Notes __attribute__((swift_name("MultiVersionedTypedef4Notes_NEW")));
+// CHECK-UNVERSIONED: typedef int MultiVersionedTypedef4Header __attribute__((swift_name("MultiVersionedTypedef4Header_NEW")));
+// CHECK-UNVERSIONED: typedef int MultiVersionedTypedef34;
+// CHECK-UNVERSIONED: typedef int MultiVersionedTypedef34Notes __attribute__((swift_name("MultiVersionedTypedef34Notes_NEW")));
+// CHECK-UNVERSIONED: typedef int MultiVersionedTypedef34Header __attribute__((swift_name("MultiVersionedTypedef34Header_NEW")));
+// CHECK-UNVERSIONED: typedef int MultiVersionedTypedef45;
+// CHECK-UNVERSIONED: typedef int MultiVersionedTypedef45Notes __attribute__((swift_name("MultiVersionedTypedef45Notes_NEW")));
+// CHECK-UNVERSIONED: typedef int MultiVersionedTypedef45Header __attribute__((swift_name("MultiVersionedTypedef45Header_NEW")));
+// CHECK-UNVERSIONED: typedef int MultiVersionedTypedef345;
+// CHECK-UNVERSIONED: typedef int MultiVersionedTypedef345Notes __attribute__((swift_name("MultiVersionedTypedef345Notes_NEW")));
+// CHECK-UNVERSIONED: typedef int MultiVersionedTypedef345Header __attribute__((swift_name("MultiVersionedTypedef345Header_NEW")));
+
+// CHECK-VERSIONED-3: typedef int MultiVersionedTypedef4 __attribute__((swift_name("MultiVersionedTypedef4_4")));
+// CHECK-VERSIONED-3: typedef int MultiVersionedTypedef4Notes __attribute__((swift_name("MultiVersionedTypedef4Notes_4")));
+// CHECK-VERSIONED-3: typedef int MultiVersionedTypedef4Header __attribute__((swift_name("MultiVersionedTypedef4Header_4")));
+// CHECK-VERSIONED-3: typedef int MultiVersionedTypedef34 __attribute__((swift_name("MultiVersionedTypedef34_3")));
+// CHECK-VERSIONED-3: typedef int MultiVersionedTypedef34Notes __attribute__((swift_name("MultiVersionedTypedef34Notes_3")));
+// CHECK-VERSIONED-3: typedef int MultiVersionedTypedef34Header __attribute__((swift_name("MultiVersionedTypedef34Header_3")));
+// CHECK-VERSIONED-3: typedef int MultiVersionedTypedef45 __attribute__((swift_name("MultiVersionedTypedef45_4")));
+// CHECK-VERSIONED-3: typedef int MultiVersionedTypedef45Notes __attribute__((swift_name("MultiVersionedTypedef45Notes_4")));
+// CHECK-VERSIONED-3: typedef int MultiVersionedTypedef45Header __attribute__((swift_name("MultiVersionedTypedef45Header_4")));
+// CHECK-VERSIONED-3: typedef int MultiVersionedTypedef345 __attribute__((swift_name("MultiVersionedTypedef345_3")));
+// CHECK-VERSIONED-3: typedef int MultiVersionedTypedef345Notes __attribute__((swift_name("MultiVersionedTypedef345Notes_3")));
+// CHECK-VERSIONED-3: typedef int MultiVersionedTypedef345Header __attribute__((swift_name("MultiVersionedTypedef345Header_3")));
+
+// CHECK-VERSIONED-4: typedef int MultiVersionedTypedef4 __attribute__((swift_name("MultiVersionedTypedef4_4")));
+// CHECK-VERSIONED-4: typedef int MultiVersionedTypedef4Notes __attribute__((swift_name("MultiVersionedTypedef4Notes_4")));
+// CHECK-VERSIONED-4: typedef int MultiVersionedTypedef4Header __attribute__((swift_name("MultiVersionedTypedef4Header_4")));
+// CHECK-VERSIONED-4: typedef int MultiVersionedTypedef34 __attribute__((swift_name("MultiVersionedTypedef34_4")));
+// CHECK-VERSIONED-4: typedef int MultiVersionedTypedef34Notes __attribute__((swift_name("MultiVersionedTypedef34Notes_4")));
+// CHECK-VERSIONED-4: typedef int MultiVersionedTypedef34Header __attribute__((swift_name("MultiVersionedTypedef34Header_4")));
+// CHECK-VERSIONED-4: typedef int MultiVersionedTypedef45 __attribute__((swift_name("MultiVersionedTypedef45_4")));
+// CHECK-VERSIONED-4: typedef int MultiVersionedTypedef45Notes __attribute__((swift_name("MultiVersionedTypedef45Notes_4")));
+// CHECK-VERSIONED-4: typedef int MultiVersionedTypedef45Header __attribute__((swift_name("MultiVersionedTypedef45Header_4")));
+// CHECK-VERSIONED-4: typedef int MultiVersionedTypedef345 __attribute__((swift_name("MultiVersionedTypedef345_4")));
+// CHECK-VERSIONED-4: typedef int MultiVersionedTypedef345Notes __attribute__((swift_name("MultiVersionedTypedef345Notes_4")));
+// CHECK-VERSIONED-4: typedef int MultiVersionedTypedef345Header __attribute__((swift_name("MultiVersionedTypedef345Header_4")));
+
+// CHECK-VERSIONED-5: typedef int MultiVersionedTypedef4;
+// CHECK-VERSIONED-5: typedef int MultiVersionedTypedef4Notes __attribute__((swift_name("MultiVersionedTypedef4Notes_NEW")));
+// CHECK-VERSIONED-5: typedef int MultiVersionedTypedef4Header __attribute__((swift_name("MultiVersionedTypedef4Header_NEW")));
+// CHECK-VERSIONED-5: typedef int MultiVersionedTypedef34;
+// CHECK-VERSIONED-5: typedef int MultiVersionedTypedef34Notes __attribute__((swift_name("MultiVersionedTypedef34Notes_NEW")));
+// CHECK-VERSIONED-5: typedef int MultiVersionedTypedef34Header __attribute__((swift_name("MultiVersionedTypedef34Header_NEW")));
+// CHECK-VERSIONED-5: typedef int MultiVersionedTypedef45 __attribute__((swift_name("MultiVersionedTypedef45_5")));
+// CHECK-VERSIONED-5: typedef int MultiVersionedTypedef45Notes __attribute__((swift_name("MultiVersionedTypedef45Notes_5")));
+// CHECK-VERSIONED-5: typedef int MultiVersionedTypedef45Header __attribute__((swift_name("MultiVersionedTypedef45Header_5")));
+// CHECK-VERSIONED-5: typedef int MultiVersionedTypedef345 __attribute__((swift_name("MultiVersionedTypedef345_5")));
+// CHECK-VERSIONED-5: typedef int MultiVersionedTypedef345Notes __attribute__((swift_name("MultiVersionedTypedef345Notes_5")));
+// CHECK-VERSIONED-5: typedef int MultiVersionedTypedef345Header __attribute__((swift_name("MultiVersionedTypedef345Header_5")));
diff --git a/clang/test/APINotes/versioned.m b/clang/test/APINotes/versioned.m
new file mode 100644
index 000000000000..61cc8c3f7c4d
--- /dev/null
+++ b/clang/test/APINotes/versioned.m
@@ -0,0 +1,187 @@
+// RUN: rm -rf %t && mkdir -p %t
+
+// Build and check the unversioned module file.
+// RUN: %clang_cc1 -fmodules -fblocks -fimplicit-module-maps -fmodules-cache-path=%t/ModulesCache/Unversioned -fdisable-module-hash -fapinotes-modules -fsyntax-only -I %S/Inputs/Headers -F %S/Inputs/Frameworks %s
+// RUN: %clang_cc1 -ast-print %t/ModulesCache/Unversioned/VersionedKit.pcm | FileCheck -check-prefix=CHECK-UNVERSIONED %s
+// RUN: %clang_cc1 -fmodules -fblocks -fimplicit-module-maps -fmodules-cache-path=%t/ModulesCache/Unversioned -fdisable-module-hash -fapinotes-modules -fsyntax-only -I %S/Inputs/Headers -F %S/Inputs/Frameworks %s -ast-dump -ast-dump-filter 'DUMP' | FileCheck -check-prefix=CHECK-DUMP -check-prefix=CHECK-UNVERSIONED-DUMP %s
+
+// Build and check the versioned module file.
+// RUN: %clang_cc1 -fmodules -fblocks -fimplicit-module-maps -fmodules-cache-path=%t/ModulesCache/Versioned -fdisable-module-hash -fapinotes-modules -fapinotes-swift-version=3 -fsyntax-only -I %S/Inputs/Headers -F %S/Inputs/Frameworks %s
+// RUN: %clang_cc1 -ast-print %t/ModulesCache/Versioned/VersionedKit.pcm | FileCheck -check-prefix=CHECK-VERSIONED %s
+// RUN: %clang_cc1 -fmodules -fblocks -fimplicit-module-maps -fmodules-cache-path=%t/ModulesCache/Versioned -fdisable-module-hash -fapinotes-modules -fapinotes-swift-version=3 -fsyntax-only -I %S/Inputs/Headers -F %S/Inputs/Frameworks %s -ast-dump -ast-dump-filter 'DUMP' | FileCheck -check-prefix=CHECK-DUMP -check-prefix=CHECK-VERSIONED-DUMP %s
+
+#import <VersionedKit/VersionedKit.h>
+
+// CHECK-UNVERSIONED: void moveToPointDUMP(double x, double y) __attribute__((swift_name("moveTo(x:y:)")));
+// CHECK-VERSIONED: void moveToPointDUMP(double x, double y) __attribute__((swift_name("moveTo(a:b:)")));
+
+// CHECK-DUMP-LABEL: Dumping moveToPointDUMP
+// CHECK-VERSIONED-DUMP: SwiftVersionedAdditionAttr {{.+}} Implicit 3.0 IsReplacedByActive{{$}}
+// CHECK-VERSIONED-DUMP-NEXT: SwiftNameAttr {{.+}} "moveTo(x:y:)"
+// CHECK-VERSIONED-DUMP-NEXT: SwiftNameAttr {{.+}} <<invalid sloc>> "moveTo(a:b:)"
+// CHECK-UNVERSIONED-DUMP: SwiftNameAttr {{.+}} "moveTo(x:y:)"
+// CHECK-UNVERSIONED-DUMP-NEXT: SwiftVersionedAdditionAttr {{.+}} Implicit 3.0{{$}}
+// CHECK-UNVERSIONED-DUMP-NEXT: SwiftNameAttr {{.+}} <<invalid sloc>> "moveTo(a:b:)"
+// CHECK-DUMP-NOT: Attr
+
+// CHECK-DUMP-LABEL: Dumping unversionedRenameDUMP
+// CHECK-DUMP: in VersionedKit unversionedRenameDUMP
+// CHECK-DUMP-NEXT: SwiftVersionedAdditionAttr {{.+}} Implicit 0 IsReplacedByActive{{$}}
+// CHECK-DUMP-NEXT: SwiftNameAttr {{.+}} "unversionedRename_HEADER()"
+// CHECK-DUMP-NEXT: SwiftNameAttr {{.+}} "unversionedRename_NOTES()"
+// CHECK-DUMP-NOT: Attr
+
+// CHECK-DUMP-LABEL: Dumping TestGenericDUMP
+// CHECK-VERSIONED-DUMP: SwiftImportAsNonGenericAttr {{.+}} <<invalid sloc>>
+// CHECK-UNVERSIONED-DUMP: SwiftVersionedAdditionAttr {{.+}} Implicit 3.0{{$}}
+// CHECK-UNVERSIONED-DUMP-NEXT: SwiftImportAsNonGenericAttr {{.+}} <<invalid sloc>>
+// CHECK-DUMP-NOT: Attr
+
+// CHECK-DUMP-LABEL: Dumping Swift3RenamedOnlyDUMP
+// CHECK-DUMP: in VersionedKit Swift3RenamedOnlyDUMP
+// CHECK-VERSIONED-DUMP-NEXT: SwiftVersionedRemovalAttr {{.+}} Implicit 3.0 {{[0-9]+}} IsReplacedByActive{{$}}
+// CHECK-VERSIONED-DUMP-NEXT: SwiftNameAttr {{.+}} "SpecialSwift3Name"
+// CHECK-UNVERSIONED-DUMP-NEXT: SwiftVersionedAdditionAttr {{.+}} Implicit 3.0{{$}}
+// CHECK-UNVERSIONED-DUMP-NEXT: SwiftNameAttr {{.+}} <<invalid sloc>> "SpecialSwift3Name"
+// CHECK-DUMP-NOT: Attr
+
+// CHECK-DUMP-LABEL: Dumping Swift3RenamedAlsoDUMP
+// CHECK-DUMP: in VersionedKit Swift3RenamedAlsoDUMP
+// CHECK-VERSIONED-DUMP-NEXT: SwiftVersionedAdditionAttr {{.+}} Implicit 3.0 IsReplacedByActive{{$}}
+// CHECK-VERSIONED-DUMP-NEXT: SwiftNameAttr {{.+}} <line:{{.+}}, col:{{.+}}> "Swift4Name"
+// CHECK-VERSIONED-DUMP-NEXT: SwiftNameAttr {{.+}} "SpecialSwift3Also"
+// CHECK-UNVERSIONED-DUMP-NEXT: SwiftNameAttr {{.+}} <line:{{.+}}, col:{{.+}}> "Swift4Name"
+// CHECK-UNVERSIONED-DUMP-NEXT: SwiftVersionedAdditionAttr {{.+}} Implicit 3.0{{$}}
+// CHECK-UNVERSIONED-DUMP-NEXT: SwiftNameAttr {{.+}} <<invalid sloc>> "SpecialSwift3Also"
+// CHECK-DUMP-NOT: Attr
+
+// CHECK-DUMP-LABEL: Dumping Swift4RenamedDUMP
+// CHECK-DUMP: in VersionedKit Swift4RenamedDUMP
+// CHECK-VERSIONED-DUMP-NEXT: SwiftVersionedRemovalAttr {{.+}} Implicit 4 {{[0-9]+}} IsReplacedByActive{{$}}
+// CHECK-VERSIONED-DUMP-NEXT: SwiftNameAttr {{.+}} "SpecialSwift4Name"
+// CHECK-UNVERSIONED-DUMP-NEXT: SwiftVersionedAdditionAttr {{.+}} Implicit 4{{$}}
+// CHECK-UNVERSIONED-DUMP-NEXT: SwiftNameAttr {{.+}} <<invalid sloc>> "SpecialSwift4Name"
+// CHECK-DUMP-NOT: Attr
+
+// CHECK-DUMP-NOT: Dumping
+
+// CHECK-UNVERSIONED: void acceptClosure(void (^block)(void) __attribute__((noescape)));
+// CHECK-VERSIONED: void acceptClosure(void (^block)(void));
+
+// CHECK-UNVERSIONED: void privateFunc(void) __attribute__((swift_private));
+
+// CHECK-UNVERSIONED: typedef double MyDoubleWrapper __attribute__((swift_wrapper("struct")));
+
+// CHECK-UNVERSIONED: enum __attribute__((ns_error_domain(MyErrorDomain))) MyErrorCode {
+// CHECK-UNVERSIONED-NEXT: MyErrorCodeFailed = 1
+// CHECK-UNVERSIONED-NEXT: };
+
+// CHECK-UNVERSIONED: __attribute__((swift_bridge("MyValueType")))
+// CHECK-UNVERSIONED: @interface MyReferenceType
+
+// CHECK-VERSIONED: void privateFunc(void);
+
+// CHECK-VERSIONED: typedef double MyDoubleWrapper;
+
+// CHECK-VERSIONED: enum MyErrorCode {
+// CHECK-VERSIONED-NEXT: MyErrorCodeFailed = 1
+// CHECK-VERSIONED-NEXT: };
+
+// CHECK-VERSIONED-NOT: __attribute__((swift_bridge("MyValueType")))
+// CHECK-VERSIONED: @interface MyReferenceType
+
+// CHECK-UNVERSIONED: __attribute__((swift_objc_members)
+// CHECK-UNVERSIONED-NEXT: @interface TestProperties
+// CHECK-VERSIONED-NOT: __attribute__((swift_objc_members)
+// CHECK-VERSIONED: @interface TestProperties
+
+// CHECK-UNVERSIONED-LABEL: enum __attribute__((flag_enum)) FlagEnum {
+// CHECK-UNVERSIONED-NEXT: FlagEnumA = 1,
+// CHECK-UNVERSIONED-NEXT: FlagEnumB = 2
+// CHECK-UNVERSIONED-NEXT: };
+// CHECK-UNVERSIONED-LABEL: enum __attribute__((flag_enum)) NewlyFlagEnum {
+// CHECK-UNVERSIONED-NEXT: NewlyFlagEnumA = 1,
+// CHECK-UNVERSIONED-NEXT: NewlyFlagEnumB = 2
+// CHECK-UNVERSIONED-NEXT: };
+// CHECK-UNVERSIONED-LABEL: enum __attribute__((flag_enum)) APINotedFlagEnum {
+// CHECK-UNVERSIONED-NEXT: APINotedFlagEnumA = 1,
+// CHECK-UNVERSIONED-NEXT: APINotedFlagEnumB = 2
+// CHECK-UNVERSIONED-NEXT: };
+// CHECK-UNVERSIONED-LABEL: enum __attribute__((enum_extensibility("open"))) OpenEnum {
+// CHECK-UNVERSIONED-NEXT: OpenEnumA = 1
+// CHECK-UNVERSIONED-NEXT: };
+// CHECK-UNVERSIONED-LABEL: enum __attribute__((enum_extensibility("open"))) NewlyOpenEnum {
+// CHECK-UNVERSIONED-NEXT: NewlyOpenEnumA = 1
+// CHECK-UNVERSIONED-NEXT: };
+// CHECK-UNVERSIONED-LABEL: enum __attribute__((enum_extensibility("closed"))) NewlyClosedEnum {
+// CHECK-UNVERSIONED-NEXT: NewlyClosedEnumA = 1
+// CHECK-UNVERSIONED-NEXT: };
+// CHECK-UNVERSIONED-LABEL: enum __attribute__((enum_extensibility("open"))) ClosedToOpenEnum {
+// CHECK-UNVERSIONED-NEXT: ClosedToOpenEnumA = 1
+// CHECK-UNVERSIONED-NEXT: };
+// CHECK-UNVERSIONED-LABEL: enum __attribute__((enum_extensibility("closed"))) OpenToClosedEnum {
+// CHECK-UNVERSIONED-NEXT: OpenToClosedEnumA = 1
+// CHECK-UNVERSIONED-NEXT: };
+// CHECK-UNVERSIONED-LABEL: enum __attribute__((enum_extensibility("open"))) APINotedOpenEnum {
+// CHECK-UNVERSIONED-NEXT: APINotedOpenEnumA = 1
+// CHECK-UNVERSIONED-NEXT: };
+// CHECK-UNVERSIONED-LABEL: enum __attribute__((enum_extensibility("closed"))) APINotedClosedEnum {
+// CHECK-UNVERSIONED-NEXT: APINotedClosedEnumA = 1
+// CHECK-UNVERSIONED-NEXT: };
+
+// CHECK-VERSIONED-LABEL: enum __attribute__((flag_enum)) FlagEnum {
+// CHECK-VERSIONED-NEXT: FlagEnumA = 1,
+// CHECK-VERSIONED-NEXT: FlagEnumB = 2
+// CHECK-VERSIONED-NEXT: };
+// CHECK-VERSIONED-LABEL: enum NewlyFlagEnum {
+// CHECK-VERSIONED-NEXT: NewlyFlagEnumA = 1,
+// CHECK-VERSIONED-NEXT: NewlyFlagEnumB = 2
+// CHECK-VERSIONED-NEXT: };
+// CHECK-VERSIONED-LABEL: enum __attribute__((flag_enum)) APINotedFlagEnum {
+// CHECK-VERSIONED-NEXT: APINotedFlagEnumA = 1,
+// CHECK-VERSIONED-NEXT: APINotedFlagEnumB = 2
+// CHECK-VERSIONED-NEXT: };
+// CHECK-VERSIONED-LABEL: enum __attribute__((enum_extensibility("open"))) OpenEnum {
+// CHECK-VERSIONED-NEXT: OpenEnumA = 1
+// CHECK-VERSIONED-NEXT: };
+// CHECK-VERSIONED-LABEL: enum NewlyOpenEnum {
+// CHECK-VERSIONED-NEXT: NewlyOpenEnumA = 1
+// CHECK-VERSIONED-NEXT: };
+// CHECK-VERSIONED-LABEL: enum NewlyClosedEnum {
+// CHECK-VERSIONED-NEXT: NewlyClosedEnumA = 1
+// CHECK-VERSIONED-NEXT: };
+// CHECK-VERSIONED-LABEL: enum __attribute__((enum_extensibility("closed"))) ClosedToOpenEnum {
+// CHECK-VERSIONED-NEXT: ClosedToOpenEnumA = 1
+// CHECK-VERSIONED-NEXT: };
+// CHECK-VERSIONED-LABEL: enum __attribute__((enum_extensibility("open"))) OpenToClosedEnum {
+// CHECK-VERSIONED-NEXT: OpenToClosedEnumA = 1
+// CHECK-VERSIONED-NEXT: };
+// CHECK-VERSIONED-LABEL: enum __attribute__((enum_extensibility("open"))) APINotedOpenEnum {
+// CHECK-VERSIONED-NEXT: APINotedOpenEnumA = 1
+// CHECK-VERSIONED-NEXT: };
+// CHECK-VERSIONED-LABEL: enum __attribute__((enum_extensibility("closed"))) APINotedClosedEnum {
+// CHECK-VERSIONED-NEXT: APINotedClosedEnumA = 1
+// CHECK-VERSIONED-NEXT: };
+
+// These don't actually have versioned information, so we just check them once.
+// CHECK-UNVERSIONED-LABEL: enum __attribute__((enum_extensibility("open"))) SoonToBeCFEnum {
+// CHECK-UNVERSIONED-NEXT: SoonToBeCFEnumA = 1
+// CHECK-UNVERSIONED-NEXT: };
+// CHECK-UNVERSIONED-LABEL: enum __attribute__((enum_extensibility("open"))) SoonToBeNSEnum {
+// CHECK-UNVERSIONED-NEXT: SoonToBeNSEnumA = 1
+// CHECK-UNVERSIONED-NEXT: };
+// CHECK-UNVERSIONED-LABEL: enum __attribute__((enum_extensibility("open"))) __attribute__((flag_enum)) SoonToBeCFOptions {
+// CHECK-UNVERSIONED-NEXT: SoonToBeCFOptionsA = 1
+// CHECK-UNVERSIONED-NEXT: };
+// CHECK-UNVERSIONED-LABEL: enum __attribute__((enum_extensibility("open"))) __attribute__((flag_enum)) SoonToBeNSOptions {
+// CHECK-UNVERSIONED-NEXT: SoonToBeNSOptionsA = 1
+// CHECK-UNVERSIONED-NEXT: };
+// CHECK-UNVERSIONED-LABEL: enum __attribute__((enum_extensibility("closed"))) SoonToBeCFClosedEnum {
+// CHECK-UNVERSIONED-NEXT: SoonToBeCFClosedEnumA = 1
+// CHECK-UNVERSIONED-NEXT: };
+// CHECK-UNVERSIONED-LABEL: enum __attribute__((enum_extensibility("closed"))) SoonToBeNSClosedEnum {
+// CHECK-UNVERSIONED-NEXT: SoonToBeNSClosedEnumA = 1
+// CHECK-UNVERSIONED-NEXT: };
+// CHECK-UNVERSIONED-LABEL: enum UndoAllThatHasBeenDoneToMe {
+// CHECK-UNVERSIONED-NEXT: UndoAllThatHasBeenDoneToMeA = 1
+// CHECK-UNVERSIONED-NEXT: };
diff --git a/clang/test/APINotes/yaml-convert-diags.c b/clang/test/APINotes/yaml-convert-diags.c
new file mode 100644
index 000000000000..1d352dc2c523
--- /dev/null
+++ b/clang/test/APINotes/yaml-convert-diags.c
@@ -0,0 +1,6 @@
+// RUN: rm -rf %t
+// RUN: not %clang_cc1 -fsyntax-only -fapinotes %s -I %S/Inputs/BrokenHeaders2 2>&1 | FileCheck %s
+
+#include "SomeBrokenLib.h"
+
+// CHECK: error: multiple definitions of global function 'do_something_with_pointers'
diff --git a/clang/test/APINotes/yaml-parse-diags.c b/clang/test/APINotes/yaml-parse-diags.c
new file mode 100644
index 000000000000..3ae39ccb301d
--- /dev/null
+++ b/clang/test/APINotes/yaml-parse-diags.c
@@ -0,0 +1,6 @@
+// RUN: rm -rf %t
+// RUN: %clang_cc1 -fsyntax-only -fapinotes %s -I %S/Inputs/BrokenHeaders -verify
+
+#include "SomeBrokenLib.h"
+
+// expected-error@APINotes.apinotes:4{{unknown key 'Nu llabilityOfRet'}}
diff --git a/clang/test/APINotes/yaml-reader-errors.m b/clang/test/APINotes/yaml-reader-errors.m
new file mode 100644
index 000000000000..9e5ee34c3e41
--- /dev/null
+++ b/clang/test/APINotes/yaml-reader-errors.m
@@ -0,0 +1,5 @@
+// RUN: rm -rf %t
+// RUN: not %clang_cc1 -fmodules -fimplicit-module-maps -fapinotes -fapinotes-modules -fmodules-cache-path=%t -I %S/Inputs/yaml-reader-errors/ -fsyntax-only %s > %t.err 2>&1
+// RUN: FileCheck %S/Inputs/yaml-reader-errors/UIKit.apinotes < %t.err
+
+@import UIKit;
diff --git a/clang/test/CodeGen/CSKY/csky-abi.c b/clang/test/CodeGen/CSKY/csky-abi.c
index 2e549376ba93..29ed661aea75 100644
--- a/clang/test/CodeGen/CSKY/csky-abi.c
+++ b/clang/test/CodeGen/CSKY/csky-abi.c
@@ -185,13 +185,13 @@ void f_va_caller(void) {
// CHECK: [[VA:%.*]] = alloca ptr, align 4
// CHECK: [[V:%.*]] = alloca i32, align 4
// CHECK: store ptr %fmt, ptr [[FMT_ADDR]], align 4
-// CHECK: call void @llvm.va_start(ptr [[VA]])
+// CHECK: call void @llvm.va_start.p0(ptr [[VA]])
// CHECK: [[ARGP_CUR:%.*]] = load ptr, ptr [[VA]], align 4
// CHECK: [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 4
// CHECK: store ptr [[ARGP_NEXT]], ptr [[VA]], align 4
// CHECK: [[TMP1:%.*]] = load i32, ptr [[ARGP_CUR]], align 4
// CHECK: store i32 [[TMP1]], ptr [[V]], align 4
-// CHECK: call void @llvm.va_end(ptr [[VA]])
+// CHECK: call void @llvm.va_end.p0(ptr [[VA]])
// CHECK: [[TMP2:%.*]] = load i32, ptr [[V]], align 4
// CHECK: ret i32 [[TMP2]]
// CHECK: }
@@ -210,13 +210,13 @@ int f_va_1(char *fmt, ...) {
// CHECK-NEXT: [[VA:%.*]] = alloca ptr, align 4
// CHECK-NEXT: [[V:%.*]] = alloca double, align 4
// CHECK-NEXT: store ptr [[FMT:%.*]], ptr [[FMT_ADDR]], align 4
-// CHECK-NEXT: call void @llvm.va_start(ptr [[VA]])
+// CHECK-NEXT: call void @llvm.va_start.p0(ptr [[VA]])
// CHECK-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[VA]], align 4
// CHECK-NEXT: [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 8
// CHECK-NEXT: store ptr [[ARGP_NEXT]], ptr [[VA]], align 4
// CHECK-NEXT: [[TMP4:%.*]] = load double, ptr [[ARGP_CUR]], align 4
// CHECK-NEXT: store double [[TMP4]], ptr [[V]], align 4
-// CHECK-NEXT: call void @llvm.va_end(ptr [[VA]])
+// CHECK-NEXT: call void @llvm.va_end.p0(ptr [[VA]])
// CHECK-NEXT: [[TMP5:%.*]] = load double, ptr [[V]], align 4
// CHECK-NEXT: ret double [[TMP5]]
double f_va_2(char *fmt, ...) {
@@ -236,7 +236,7 @@ double f_va_2(char *fmt, ...) {
// CHECK-NEXT: [[W:%.*]] = alloca i32, align 4
// CHECK-NEXT: [[X:%.*]] = alloca double, align 4
// CHECK-NEXT: store ptr [[FMT:%.*]], ptr [[FMT_ADDR]], align 4
-// CHECK-NEXT: call void @llvm.va_start(ptr [[VA]])
+// CHECK-NEXT: call void @llvm.va_start.p0(ptr [[VA]])
// CHECK-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[VA]], align 4
// CHECK-NEXT: [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 8
// CHECK-NEXT: store ptr [[ARGP_NEXT]], ptr [[VA]], align 4
@@ -252,7 +252,7 @@ double f_va_2(char *fmt, ...) {
// CHECK-NEXT: store ptr [[ARGP_NEXT5]], ptr [[VA]], align 4
// CHECK-NEXT: [[TMP11:%.*]] = load double, ptr [[ARGP_CUR4]], align 4
// CHECK-NEXT: store double [[TMP11]], ptr [[X]], align 4
-// CHECK-NEXT: call void @llvm.va_end(ptr [[VA]])
+// CHECK-NEXT: call void @llvm.va_end.p0(ptr [[VA]])
// CHECK-NEXT: [[TMP12:%.*]] = load double, ptr [[V]], align 4
// CHECK-NEXT: [[TMP13:%.*]] = load double, ptr [[X]], align 4
// CHECK-NEXT: [[ADD:%.*]] = fadd double [[TMP12]], [[TMP13]]
@@ -279,7 +279,7 @@ double f_va_3(char *fmt, ...) {
// CHECK-NEXT: [[LS:%.*]] = alloca [[STRUCT_LARGE:%.*]], align 4
// CHECK-NEXT: [[RET:%.*]] = alloca i32, align 4
// CHECK-NEXT: store ptr [[FMT:%.*]], ptr [[FMT_ADDR]], align 4
-// CHECK-NEXT: call void @llvm.va_start(ptr [[VA]])
+// CHECK-NEXT: call void @llvm.va_start.p0(ptr [[VA]])
// CHECK-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[VA]], align 4
// CHECK-NEXT: [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 4
// CHECK-NEXT: store ptr [[ARGP_NEXT]], ptr [[VA]], align 4
@@ -302,7 +302,7 @@ double f_va_3(char *fmt, ...) {
// CHECK-NEXT: [[ARGP_NEXT9:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR8]], i32 16
// CHECK-NEXT: store ptr [[ARGP_NEXT9]], ptr [[VA]], align 4
// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[LS]], ptr align 4 [[ARGP_CUR8]], i32 16, i1 false)
-// CHECK-NEXT: call void @llvm.va_end(ptr [[VA]])
+// CHECK-NEXT: call void @llvm.va_end.p0(ptr [[VA]])
int f_va_4(char *fmt, ...) {
__builtin_va_list va;
diff --git a/clang/test/CodeGen/LoongArch/abi-lp64d.c b/clang/test/CodeGen/LoongArch/abi-lp64d.c
index 66b480a7f068..fc7f1eada586 100644
--- a/clang/test/CodeGen/LoongArch/abi-lp64d.c
+++ b/clang/test/CodeGen/LoongArch/abi-lp64d.c
@@ -449,13 +449,13 @@ void f_va_caller(void) {
// CHECK-NEXT: [[VA:%.*]] = alloca ptr, align 8
// CHECK-NEXT: [[V:%.*]] = alloca i32, align 4
// CHECK-NEXT: store ptr [[FMT:%.*]], ptr [[FMT_ADDR]], align 8
-// CHECK-NEXT: call void @llvm.va_start(ptr [[VA]])
+// CHECK-NEXT: call void @llvm.va_start.p0(ptr [[VA]])
// CHECK-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[VA]], align 8
// CHECK-NEXT: [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i64 8
// CHECK-NEXT: store ptr [[ARGP_NEXT]], ptr [[VA]], align 8
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARGP_CUR]], align 8
// CHECK-NEXT: store i32 [[TMP0]], ptr [[V]], align 4
-// CHECK-NEXT: call void @llvm.va_end(ptr [[VA]])
+// CHECK-NEXT: call void @llvm.va_end.p0(ptr [[VA]])
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[V]], align 4
// CHECK-NEXT: ret i32 [[TMP1]]
int f_va_int(char *fmt, ...) {
diff --git a/clang/test/CodeGen/PowerPC/aix-altivec-vaargs.c b/clang/test/CodeGen/PowerPC/aix-altivec-vaargs.c
index 03182423a422..b3f1e93b6394 100644
--- a/clang/test/CodeGen/PowerPC/aix-altivec-vaargs.c
+++ b/clang/test/CodeGen/PowerPC/aix-altivec-vaargs.c
@@ -17,7 +17,7 @@ vector double vector_varargs(int count, ...) {
}
// CHECK: %arg_list = alloca ptr
-// CHECK: call void @llvm.va_start(ptr %arg_list)
+// CHECK: call void @llvm.va_start.p0(ptr %arg_list)
// AIX32: for.body:
// AIX32-NEXT: %argp.cur = load ptr, ptr %arg_list, align 4
@@ -41,4 +41,4 @@ vector double vector_varargs(int count, ...) {
// CHECK: for.end:
-// CHECK: call void @llvm.va_end(ptr %arg_list)
+// CHECK: call void @llvm.va_end.p0(ptr %arg_list)
diff --git a/clang/test/CodeGen/PowerPC/aix-vaargs.c b/clang/test/CodeGen/PowerPC/aix-vaargs.c
index 8b8417d315a5..724ba6560cdb 100644
--- a/clang/test/CodeGen/PowerPC/aix-vaargs.c
+++ b/clang/test/CodeGen/PowerPC/aix-vaargs.c
@@ -35,7 +35,7 @@ void testva (int n, ...) {
// CHECK-NEXT: %v = alloca i32, align 4
// CHECK-NEXT: store i32 %n, ptr %n.addr, align 4
-// CHECK-NEXT: call void @llvm.va_start(ptr %ap)
+// CHECK-NEXT: call void @llvm.va_start.p0(ptr %ap)
// AIX32-NEXT: %argp.cur = load ptr, ptr %ap, align 4
// AIX32-NEXT: %argp.next = getelementptr inbounds i8, ptr %argp.cur, i32 16
@@ -48,7 +48,7 @@ void testva (int n, ...) {
// AIX32-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 8 %t, ptr align 4 %argp.cur, i32 16, i1 false)
// AIX64-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 %t, ptr align 8 %argp.cur, i64 16, i1 false)
-// CHECK-NEXT: call void @llvm.va_copy(ptr %ap2, ptr %ap)
+// CHECK-NEXT: call void @llvm.va_copy.p0(ptr %ap2, ptr %ap)
// AIX32-NEXT: %argp.cur1 = load ptr, ptr %ap2, align 4
// AIX32-NEXT: %argp.next2 = getelementptr inbounds i8, ptr %argp.cur1, i32 4
@@ -62,14 +62,14 @@ void testva (int n, ...) {
// AIX64-NEXT: %1 = load i32, ptr %0, align 4
// AIX64-NEXT: store i32 %1, ptr %v, align 4
-// CHECK-NEXT: call void @llvm.va_end(ptr %ap2)
-// CHECK-NEXT: call void @llvm.va_end(ptr %ap)
+// CHECK-NEXT: call void @llvm.va_end.p0(ptr %ap2)
+// CHECK-NEXT: call void @llvm.va_end.p0(ptr %ap)
// CHECK-NEXT: ret void
-// CHECK: declare void @llvm.va_start(ptr)
+// CHECK: declare void @llvm.va_start.p0(ptr)
// AIX32: declare void @llvm.memcpy.p0.p0.i32(ptr noalias nocapture writeonly, ptr noalias nocapture readonly, i32, i1 immarg)
// AIX64: declare void @llvm.memcpy.p0.p0.i64(ptr noalias nocapture writeonly, ptr noalias nocapture readonly, i64, i1 immarg)
-// CHECK: declare void @llvm.va_copy(ptr, ptr)
-// CHECK: declare void @llvm.va_end(ptr)
+// CHECK: declare void @llvm.va_copy.p0(ptr, ptr)
+// CHECK: declare void @llvm.va_end.p0(ptr)
diff --git a/clang/test/CodeGen/PowerPC/ppc64le-varargs-f128.c b/clang/test/CodeGen/PowerPC/ppc64le-varargs-f128.c
index 396614fe5bac..2f5459d1bb9c 100644
--- a/clang/test/CodeGen/PowerPC/ppc64le-varargs-f128.c
+++ b/clang/test/CodeGen/PowerPC/ppc64le-varargs-f128.c
@@ -31,7 +31,7 @@ void foo_ls(ldbl128_s);
// OMP-TARGET: call void @foo_ld(ppc_fp128 noundef %[[V3]])
// OMP-HOST-LABEL: define{{.*}} void @omp(
-// OMP-HOST: call void @llvm.va_start(ptr %[[AP:[0-9a-zA-Z_.]+]])
+// OMP-HOST: call void @llvm.va_start.p0(ptr %[[AP:[0-9a-zA-Z_.]+]])
// OMP-HOST: %[[CUR:[0-9a-zA-Z_.]+]] = load ptr, ptr %[[AP]], align 8
// OMP-HOST: %[[TMP0:[^ ]+]] = getelementptr inbounds i8, ptr %[[CUR]], i32 15
// OMP-HOST: %[[ALIGN:[^ ]+]] = call ptr @llvm.ptrmask.p0.i64(ptr %[[TMP0]], i64 -16)
@@ -49,13 +49,13 @@ void omp(int n, ...) {
}
// IEEE-LABEL: define{{.*}} void @f128
-// IEEE: call void @llvm.va_start(ptr %[[AP:[0-9a-zA-Z_.]+]])
+// IEEE: call void @llvm.va_start.p0(ptr %[[AP:[0-9a-zA-Z_.]+]])
// IEEE: %[[CUR:[0-9a-zA-Z_.]+]] = load ptr, ptr %[[AP]]
// IEEE: %[[TMP0:[^ ]+]] = getelementptr inbounds i8, ptr %[[CUR]], i32 15
// IEEE: %[[ALIGN:[^ ]+]] = call ptr @llvm.ptrmask.p0.i64(ptr %[[TMP0]], i64 -16)
// IEEE: %[[V4:[0-9a-zA-Z_.]+]] = load fp128, ptr %[[ALIGN]], align 16
// IEEE: call void @foo_fq(fp128 noundef %[[V4]])
-// IEEE: call void @llvm.va_end(ptr %[[AP]])
+// IEEE: call void @llvm.va_end.p0(ptr %[[AP]])
void f128(int n, ...) {
va_list ap;
va_start(ap, n);
@@ -64,20 +64,20 @@ void f128(int n, ...) {
}
// IEEE-LABEL: define{{.*}} void @long_double
-// IEEE: call void @llvm.va_start(ptr %[[AP:[0-9a-zA-Z_.]+]])
+// IEEE: call void @llvm.va_start.p0(ptr %[[AP:[0-9a-zA-Z_.]+]])
// IEEE: %[[CUR:[0-9a-zA-Z_.]+]] = load ptr, ptr %[[AP]]
// IEEE: %[[TMP0:[^ ]+]] = getelementptr inbounds i8, ptr %[[CUR]], i32 15
// IEEE: %[[ALIGN:[^ ]+]] = call ptr @llvm.ptrmask.p0.i64(ptr %[[TMP0]], i64 -16)
// IEEE: %[[V4:[0-9a-zA-Z_.]+]] = load fp128, ptr %[[ALIGN]], align 16
// IEEE: call void @foo_ld(fp128 noundef %[[V4]])
-// IEEE: call void @llvm.va_end(ptr %[[AP]])
+// IEEE: call void @llvm.va_end.p0(ptr %[[AP]])
// IBM-LABEL: define{{.*}} void @long_double
-// IBM: call void @llvm.va_start(ptr %[[AP:[0-9a-zA-Z_.]+]])
+// IBM: call void @llvm.va_start.p0(ptr %[[AP:[0-9a-zA-Z_.]+]])
// IBM: %[[CUR:[0-9a-zA-Z_.]+]] = load ptr, ptr %[[AP]]
// IBM: %[[V4:[0-9a-zA-Z_.]+]] = load ppc_fp128, ptr %[[CUR]], align 8
// IBM: call void @foo_ld(ppc_fp128 noundef %[[V4]])
-// IBM: call void @llvm.va_end(ptr %[[AP]])
+// IBM: call void @llvm.va_end.p0(ptr %[[AP]])
void long_double(int n, ...) {
va_list ap;
va_start(ap, n);
@@ -86,7 +86,7 @@ void long_double(int n, ...) {
}
// IEEE-LABEL: define{{.*}} void @long_double_struct
-// IEEE: call void @llvm.va_start(ptr %[[AP:[0-9a-zA-Z_.]+]])
+// IEEE: call void @llvm.va_start.p0(ptr %[[AP:[0-9a-zA-Z_.]+]])
// IEEE: %[[CUR:[0-9a-zA-Z_.]+]] = load ptr, ptr %[[AP]]
// IEEE: %[[TMP0:[^ ]+]] = getelementptr inbounds i8, ptr %[[CUR]], i32 15
// IEEE: %[[ALIGN:[^ ]+]] = call ptr @llvm.ptrmask.p0.i64(ptr %[[TMP0]], i64 -16)
@@ -96,7 +96,7 @@ void long_double(int n, ...) {
// IEEE: %[[COERCE:[0-9a-zA-Z_.]+]] = getelementptr inbounds %struct.ldbl128_s, ptr %[[TMP]], i32 0, i32 0
// IEEE: %[[V4:[0-9a-zA-Z_.]+]] = load fp128, ptr %[[COERCE]], align 16
// IEEE: call void @foo_ls(fp128 inreg %[[V4]])
-// IEEE: call void @llvm.va_end(ptr %[[AP]])
+// IEEE: call void @llvm.va_end.p0(ptr %[[AP]])
void long_double_struct(int n, ...) {
va_list ap;
va_start(ap, n);
diff --git a/clang/test/CodeGen/RISCV/riscv-func-attr-target-err.c b/clang/test/CodeGen/RISCV/riscv-func-attr-target-err.c
index 35d6973818d0..b303d71304bf 100644
--- a/clang/test/CodeGen/RISCV/riscv-func-attr-target-err.c
+++ b/clang/test/CodeGen/RISCV/riscv-func-attr-target-err.c
@@ -2,6 +2,28 @@
// RUN: not %clang_cc1 -triple riscv64 -target-feature +zifencei -target-feature +m -target-feature +a \
// RUN: -emit-llvm %s 2>&1 | FileCheck %s
+#include <riscv_vector.h>
+
+void test_builtin() {
+// CHECK: error: '__builtin_rvv_vsetvli' needs target feature zve32x
+ __riscv_vsetvl_e8m8(1);
+}
+
+void test_rvv_i32_type() {
+// CHECK: error: RISC-V type 'vint32m1_t' (aka '__rvv_int32m1_t') requires the 'zve32x' extension
+ vint32m1_t v;
+}
+
+void test_rvv_f32_type() {
+// CHECK: error: RISC-V type 'vfloat32m1_t' (aka '__rvv_float32m1_t') requires the 'zve32f' extension
+ vfloat32m1_t v;
+}
+
+void test_rvv_f64_type() {
+// CHECK: error: RISC-V type 'vfloat64m1_t' (aka '__rvv_float64m1_t') requires the 'zve64d' extension
+ vfloat64m1_t v;
+}
+
// CHECK: error: duplicate 'arch=' in the 'target' attribute string;
__attribute__((target("arch=rv64gc;arch=rv64gc_zbb"))) void testMultiArchSelectLast() {}
// CHECK: error: duplicate 'cpu=' in the 'target' attribute string;
diff --git a/clang/test/CodeGen/RISCV/riscv-func-attr-target.c b/clang/test/CodeGen/RISCV/riscv-func-attr-target.c
index f216eaf735b4..1f8682179ea8 100644
--- a/clang/test/CodeGen/RISCV/riscv-func-attr-target.c
+++ b/clang/test/CodeGen/RISCV/riscv-func-attr-target.c
@@ -4,6 +4,8 @@
// RUN: -target-feature -relax -target-feature -zfa \
// RUN: -emit-llvm %s -o - | FileCheck %s
+#include <riscv_vector.h>
+
// CHECK-LABEL: define dso_local void @testDefault
// CHECK-SAME: () #0 {
void testDefault() {}
@@ -35,6 +37,34 @@ testAttrFullArchAndAttrCpu() {}
// CHECK-SAME: () #8 {
__attribute__((target("cpu=sifive-u54"))) void testAttrCpuOnly() {}
+__attribute__((target("arch=+zve32x")))
+void test_builtin_w_zve32x() {
+// CHECK-LABEL: test_builtin_w_zve32x
+// CHECK-SAME: #9
+ __riscv_vsetvl_e8m8(1);
+}
+
+__attribute__((target("arch=+zve32x")))
+void test_rvv_i32_type_w_zve32x() {
+// CHECK-LABEL: test_rvv_i32_type_w_zve32x
+// CHECK-SAME: #9
+ vint32m1_t v;
+}
+
+__attribute__((target("arch=+zve32f")))
+void test_rvv_f32_type_w_zve32f() {
+// CHECK-LABEL: test_rvv_f32_type_w_zve32f
+// CHECK-SAME: #11
+ vfloat32m1_t v;
+}
+
+__attribute__((target("arch=+zve64d")))
+void test_rvv_f64_type_w_zve64d() {
+// CHECK-LABEL: test_rvv_f64_type_w_zve64d
+// CHECK-SAME: #12
+ vfloat64m1_t v;
+}
+
//.
// CHECK: attributes #0 = { {{.*}}"target-features"="+64bit,+a,+m,+save-restore,+zifencei,-relax,-zbb,-zfa" }
// CHECK: attributes #1 = { {{.*}}"target-cpu"="rocket-rv64" "target-features"="+64bit,+a,+d,+f,+m,+save-restore,+v,+zicsr,+zifencei,+zve32f,+zve32x,+zve64d,+zve64f,+zve64x,+zvl128b,+zvl32b,+zvl64b,-relax,-zbb,-zfa" "tune-cpu"="generic-rv64" }
@@ -46,3 +76,6 @@ __attribute__((target("cpu=sifive-u54"))) void testAttrCpuOnly() {}
// CHECK: attributes #6 = { {{.*}}"target-cpu"="sifive-u54" "target-features"="+64bit,+a,+m,+save-restore,+zbb,+zifencei,-relax,-zfa" }
// CHECK: attributes #7 = { {{.*}}"target-cpu"="sifive-u54" "target-features"="+64bit,+m,+save-restore,{{(-[[:alnum:]-]+)(,-[[:alnum:]-]+)*}}" }
// CHECK: attributes #8 = { {{.*}}"target-cpu"="sifive-u54" "target-features"="+64bit,+a,+c,+d,+f,+m,+save-restore,+zicsr,+zifencei,{{(-[[:alnum:]-]+)(,-[[:alnum:]-]+)*}}" }
+// CHECK: attributes #9 = { {{.*}}"target-features"="+64bit,+a,+m,+save-restore,+zicsr,+zifencei,+zve32x,+zvl32b,-relax,-zbb,-zfa" }
+// CHECK: attributes #11 = { {{.*}}"target-features"="+64bit,+a,+f,+m,+save-restore,+zicsr,+zifencei,+zve32f,+zve32x,+zvl32b,-relax,-zbb,-zfa" }
+// CHECK: attributes #12 = { {{.*}}"target-features"="+64bit,+a,+d,+f,+m,+save-restore,+zicsr,+zifencei,+zve32f,+zve32x,+zve64d,+zve64f,+zve64x,+zvl32b,+zvl64b,-relax,-zbb,-zfa" }
diff --git a/clang/test/CodeGen/RISCV/riscv-vector-callingconv-llvm-ir.c b/clang/test/CodeGen/RISCV/riscv-vector-callingconv-llvm-ir.c
new file mode 100644
index 000000000000..072d8a863d45
--- /dev/null
+++ b/clang/test/CodeGen/RISCV/riscv-vector-callingconv-llvm-ir.c
@@ -0,0 +1,34 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v \
+// RUN: -emit-llvm %s -o - | FileCheck -check-prefix=CHECK-LLVM %s
+// RUN: %clang_cc1 -std=c23 -triple riscv64 -target-feature +v \
+// RUN: -emit-llvm %s -o - | FileCheck -check-prefix=CHECK-LLVM %s
+
+#include <riscv_vector.h>
+
+// CHECK-LLVM: call riscv_vector_cc <vscale x 2 x i32> @bar
+vint32m1_t __attribute__((riscv_vector_cc)) bar(vint32m1_t input);
+vint32m1_t test_vector_cc_attr(vint32m1_t input, int32_t *base, size_t vl) {
+ vint32m1_t val = __riscv_vle32_v_i32m1(base, vl);
+ vint32m1_t ret = bar(input);
+ __riscv_vse32_v_i32m1(base, val, vl);
+ return ret;
+}
+
+// CHECK-LLVM: call riscv_vector_cc <vscale x 2 x i32> @bar
+[[riscv::vector_cc]] vint32m1_t bar(vint32m1_t input);
+vint32m1_t test_vector_cc_attr2(vint32m1_t input, int32_t *base, size_t vl) {
+ vint32m1_t val = __riscv_vle32_v_i32m1(base, vl);
+ vint32m1_t ret = bar(input);
+ __riscv_vse32_v_i32m1(base, val, vl);
+ return ret;
+}
+
+// CHECK-LLVM: call <vscale x 2 x i32> @baz
+vint32m1_t baz(vint32m1_t input);
+vint32m1_t test_no_vector_cc_attr(vint32m1_t input, int32_t *base, size_t vl) {
+ vint32m1_t val = __riscv_vle32_v_i32m1(base, vl);
+ vint32m1_t ret = baz(input);
+ __riscv_vse32_v_i32m1(base, val, vl);
+ return ret;
+}
diff --git a/clang/test/CodeGen/RISCV/riscv-vector-callingconv-llvm-ir.cpp b/clang/test/CodeGen/RISCV/riscv-vector-callingconv-llvm-ir.cpp
new file mode 100644
index 000000000000..c01aeb21f675
--- /dev/null
+++ b/clang/test/CodeGen/RISCV/riscv-vector-callingconv-llvm-ir.cpp
@@ -0,0 +1,32 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -std=c++11 -triple riscv64 -target-feature +v \
+// RUN: -emit-llvm %s -o - | FileCheck -check-prefix=CHECK-LLVM %s
+
+#include <riscv_vector.h>
+
+// CHECK-LLVM: call riscv_vector_cc <vscale x 2 x i32> @_Z3baru15__rvv_int32m1_t
+vint32m1_t __attribute__((riscv_vector_cc)) bar(vint32m1_t input);
+vint32m1_t test_vector_cc_attr(vint32m1_t input, int32_t *base, size_t vl) {
+ vint32m1_t val = __riscv_vle32_v_i32m1(base, vl);
+ vint32m1_t ret = bar(input);
+ __riscv_vse32_v_i32m1(base, val, vl);
+ return ret;
+}
+
+// CHECK-LLVM: call riscv_vector_cc <vscale x 2 x i32> @_Z3baru15__rvv_int32m1_t
+[[riscv::vector_cc]] vint32m1_t bar(vint32m1_t input);
+vint32m1_t test_vector_cc_attr2(vint32m1_t input, int32_t *base, size_t vl) {
+ vint32m1_t val = __riscv_vle32_v_i32m1(base, vl);
+ vint32m1_t ret = bar(input);
+ __riscv_vse32_v_i32m1(base, val, vl);
+ return ret;
+}
+
+// CHECK-LLVM: call <vscale x 2 x i32> @_Z3bazu15__rvv_int32m1_t
+vint32m1_t baz(vint32m1_t input);
+vint32m1_t test_no_vector_cc_attr(vint32m1_t input, int32_t *base, size_t vl) {
+ vint32m1_t val = __riscv_vle32_v_i32m1(base, vl);
+ vint32m1_t ret = baz(input);
+ __riscv_vse32_v_i32m1(base, val, vl);
+ return ret;
+}
diff --git a/clang/test/CodeGen/RISCV/riscv-vector-callingconv.c b/clang/test/CodeGen/RISCV/riscv-vector-callingconv.c
new file mode 100644
index 000000000000..5c35901799b4
--- /dev/null
+++ b/clang/test/CodeGen/RISCV/riscv-vector-callingconv.c
@@ -0,0 +1,17 @@
+// RUN: %clang_cc1 %s -std=c23 -triple riscv64 -target-feature +v -verify
+
+__attribute__((riscv_vector_cc)) int var; // expected-warning {{'riscv_vector_cc' only applies to function types; type here is 'int'}}
+
+__attribute__((riscv_vector_cc)) void func();
+__attribute__((riscv_vector_cc(1))) void func_invalid(); // expected-error {{'riscv_vector_cc' attribute takes no arguments}}
+
+void test_no_attribute(int); // expected-note {{previous declaration is here}}
+void __attribute__((riscv_vector_cc)) test_no_attribute(int x) { } // expected-error {{function declared 'riscv_vector_cc' here was previously declared without calling convention}}
+
+[[riscv::vector_cc]] int var2; // expected-warning {{'vector_cc' only applies to function types; type here is 'int'}}
+
+[[riscv::vector_cc]] void func2();
+[[riscv::vector_cc(1)]] void func_invalid2(); // expected-error {{'vector_cc' attribute takes no arguments}}
+
+void test_no_attribute2(int); // expected-note {{previous declaration is here}}
+[[riscv::vector_cc]] void test_no_attribute2(int x) { } // expected-error {{function declared 'riscv_vector_cc' here was previously declared without calling convention}}
diff --git a/clang/test/CodeGen/RISCV/riscv-vector-callingconv.cpp b/clang/test/CodeGen/RISCV/riscv-vector-callingconv.cpp
new file mode 100644
index 000000000000..264bb7d9ad7c
--- /dev/null
+++ b/clang/test/CodeGen/RISCV/riscv-vector-callingconv.cpp
@@ -0,0 +1,35 @@
+// RUN: %clang_cc1 %s -triple riscv64 -target-feature +v -verify
+
+__attribute__((riscv_vector_cc)) int var; // expected-warning {{'riscv_vector_cc' only applies to function types; type here is 'int'}}
+
+__attribute__((riscv_vector_cc)) void func();
+__attribute__((riscv_vector_cc(1))) void func_invalid(); // expected-error {{'riscv_vector_cc' attribute takes no arguments}}
+
+void test_no_attribute(int); // expected-note {{previous declaration is here}}
+void __attribute__((riscv_vector_cc)) test_no_attribute(int x) { } // expected-error {{function declared 'riscv_vector_cc' here was previously declared without calling convention}}
+
+class test_cc {
+ __attribute__((riscv_vector_cc)) void member_func();
+};
+
+void test_lambda() {
+ __attribute__((riscv_vector_cc)) auto lambda = []() { // expected-warning {{'riscv_vector_cc' only applies to function types; type here is 'auto'}}
+ };
+}
+
+[[riscv::vector_cc]] int var2; // expected-warning {{'vector_cc' only applies to function types; type here is 'int'}}
+
+[[riscv::vector_cc]] void func2();
+[[riscv::vector_cc(1)]] void func_invalid2(); // expected-error {{'vector_cc' attribute takes no arguments}}
+
+void test_no_attribute2(int); // expected-note {{previous declaration is here}}
+[[riscv::vector_cc]] void test_no_attribute2(int x) { } // expected-error {{function declared 'riscv_vector_cc' here was previously declared without calling convention}}
+
+class test_cc2 {
+ [[riscv::vector_cc]] void member_func();
+};
+
+void test_lambda2() {
+ [[riscv::vector_cc]] auto lambda = []() { // expected-warning {{'vector_cc' only applies to function types; type here is 'auto'}}
+ };
+}
diff --git a/clang/test/CodeGen/RISCV/riscv32-vararg.c b/clang/test/CodeGen/RISCV/riscv32-vararg.c
index 1c4e41f2f54c..00e04eb89467 100644
--- a/clang/test/CodeGen/RISCV/riscv32-vararg.c
+++ b/clang/test/CodeGen/RISCV/riscv32-vararg.c
@@ -80,13 +80,13 @@ void f_va_caller(void) {
// CHECK-NEXT: [[VA:%.*]] = alloca ptr, align 4
// CHECK-NEXT: [[V:%.*]] = alloca i32, align 4
// CHECK-NEXT: store ptr [[FMT]], ptr [[FMT_ADDR]], align 4
-// CHECK-NEXT: call void @llvm.va_start(ptr [[VA]])
+// CHECK-NEXT: call void @llvm.va_start.p0(ptr [[VA]])
// CHECK-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[VA]], align 4
// CHECK-NEXT: [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 4
// CHECK-NEXT: store ptr [[ARGP_NEXT]], ptr [[VA]], align 4
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARGP_CUR]], align 4
// CHECK-NEXT: store i32 [[TMP0]], ptr [[V]], align 4
-// CHECK-NEXT: call void @llvm.va_end(ptr [[VA]])
+// CHECK-NEXT: call void @llvm.va_end.p0(ptr [[VA]])
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[V]], align 4
// CHECK-NEXT: ret i32 [[TMP1]]
//
@@ -111,7 +111,7 @@ int f_va_1(char *fmt, ...) {
// CHECK-ILP32F-NEXT: [[VA:%.*]] = alloca ptr, align 4
// CHECK-ILP32F-NEXT: [[V:%.*]] = alloca double, align 8
// CHECK-ILP32F-NEXT: store ptr [[FMT]], ptr [[FMT_ADDR]], align 4
-// CHECK-ILP32F-NEXT: call void @llvm.va_start(ptr [[VA]])
+// CHECK-ILP32F-NEXT: call void @llvm.va_start.p0(ptr [[VA]])
// CHECK-ILP32F-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[VA]], align 4
// CHECK-ILP32F-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 7
// CHECK-ILP32F-NEXT: [[ARGP_CUR_ALIGNED:%.*]] = call ptr @llvm.ptrmask.p0.i32(ptr [[TMP0]], i32 -8)
@@ -119,7 +119,7 @@ int f_va_1(char *fmt, ...) {
// CHECK-ILP32F-NEXT: store ptr [[ARGP_NEXT]], ptr [[VA]], align 4
// CHECK-ILP32F-NEXT: [[TMP1:%.*]] = load double, ptr [[ARGP_CUR_ALIGNED]], align 8
// CHECK-ILP32F-NEXT: store double [[TMP1]], ptr [[V]], align 8
-// CHECK-ILP32F-NEXT: call void @llvm.va_end(ptr [[VA]])
+// CHECK-ILP32F-NEXT: call void @llvm.va_end.p0(ptr [[VA]])
// CHECK-ILP32F-NEXT: [[TMP2:%.*]] = load double, ptr [[V]], align 8
// CHECK-ILP32F-NEXT: ret double [[TMP2]]
//
@@ -130,7 +130,7 @@ int f_va_1(char *fmt, ...) {
// CHECK-ILP32D-NEXT: [[VA:%.*]] = alloca ptr, align 4
// CHECK-ILP32D-NEXT: [[V:%.*]] = alloca double, align 8
// CHECK-ILP32D-NEXT: store ptr [[FMT]], ptr [[FMT_ADDR]], align 4
-// CHECK-ILP32D-NEXT: call void @llvm.va_start(ptr [[VA]])
+// CHECK-ILP32D-NEXT: call void @llvm.va_start.p0(ptr [[VA]])
// CHECK-ILP32D-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[VA]], align 4
// CHECK-ILP32D-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 7
// CHECK-ILP32D-NEXT: [[ARGP_CUR_ALIGNED:%.*]] = call ptr @llvm.ptrmask.p0.i32(ptr [[TMP0]], i32 -8)
@@ -138,7 +138,7 @@ int f_va_1(char *fmt, ...) {
// CHECK-ILP32D-NEXT: store ptr [[ARGP_NEXT]], ptr [[VA]], align 4
// CHECK-ILP32D-NEXT: [[TMP1:%.*]] = load double, ptr [[ARGP_CUR_ALIGNED]], align 8
// CHECK-ILP32D-NEXT: store double [[TMP1]], ptr [[V]], align 8
-// CHECK-ILP32D-NEXT: call void @llvm.va_end(ptr [[VA]])
+// CHECK-ILP32D-NEXT: call void @llvm.va_end.p0(ptr [[VA]])
// CHECK-ILP32D-NEXT: [[TMP2:%.*]] = load double, ptr [[V]], align 8
// CHECK-ILP32D-NEXT: ret double [[TMP2]]
//
@@ -149,13 +149,13 @@ int f_va_1(char *fmt, ...) {
// CHECK-ILP32E-NEXT: [[VA:%.*]] = alloca ptr, align 4
// CHECK-ILP32E-NEXT: [[V:%.*]] = alloca double, align 8
// CHECK-ILP32E-NEXT: store ptr [[FMT]], ptr [[FMT_ADDR]], align 4
-// CHECK-ILP32E-NEXT: call void @llvm.va_start(ptr [[VA]])
+// CHECK-ILP32E-NEXT: call void @llvm.va_start.p0(ptr [[VA]])
// CHECK-ILP32E-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[VA]], align 4
// CHECK-ILP32E-NEXT: [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 8
// CHECK-ILP32E-NEXT: store ptr [[ARGP_NEXT]], ptr [[VA]], align 4
// CHECK-ILP32E-NEXT: [[TMP0:%.*]] = load double, ptr [[ARGP_CUR]], align 4
// CHECK-ILP32E-NEXT: store double [[TMP0]], ptr [[V]], align 8
-// CHECK-ILP32E-NEXT: call void @llvm.va_end(ptr [[VA]])
+// CHECK-ILP32E-NEXT: call void @llvm.va_end.p0(ptr [[VA]])
// CHECK-ILP32E-NEXT: [[TMP1:%.*]] = load double, ptr [[V]], align 8
// CHECK-ILP32E-NEXT: ret double [[TMP1]]
//
@@ -180,7 +180,7 @@ double f_va_2(char *fmt, ...) {
// CHECK-ILP32F-NEXT: [[W:%.*]] = alloca i32, align 4
// CHECK-ILP32F-NEXT: [[X:%.*]] = alloca double, align 8
// CHECK-ILP32F-NEXT: store ptr [[FMT]], ptr [[FMT_ADDR]], align 4
-// CHECK-ILP32F-NEXT: call void @llvm.va_start(ptr [[VA]])
+// CHECK-ILP32F-NEXT: call void @llvm.va_start.p0(ptr [[VA]])
// CHECK-ILP32F-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[VA]], align 4
// CHECK-ILP32F-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 7
// CHECK-ILP32F-NEXT: [[ARGP_CUR_ALIGNED:%.*]] = call ptr @llvm.ptrmask.p0.i32(ptr [[TMP0]], i32 -8)
@@ -200,7 +200,7 @@ double f_va_2(char *fmt, ...) {
// CHECK-ILP32F-NEXT: store ptr [[ARGP_NEXT4]], ptr [[VA]], align 4
// CHECK-ILP32F-NEXT: [[TMP4:%.*]] = load double, ptr [[ARGP_CUR3_ALIGNED]], align 8
// CHECK-ILP32F-NEXT: store double [[TMP4]], ptr [[X]], align 8
-// CHECK-ILP32F-NEXT: call void @llvm.va_end(ptr [[VA]])
+// CHECK-ILP32F-NEXT: call void @llvm.va_end.p0(ptr [[VA]])
// CHECK-ILP32F-NEXT: [[TMP5:%.*]] = load double, ptr [[V]], align 8
// CHECK-ILP32F-NEXT: [[TMP6:%.*]] = load double, ptr [[X]], align 8
// CHECK-ILP32F-NEXT: [[ADD:%.*]] = fadd double [[TMP5]], [[TMP6]]
@@ -215,7 +215,7 @@ double f_va_2(char *fmt, ...) {
// CHECK-ILP32D-NEXT: [[W:%.*]] = alloca i32, align 4
// CHECK-ILP32D-NEXT: [[X:%.*]] = alloca double, align 8
// CHECK-ILP32D-NEXT: store ptr [[FMT]], ptr [[FMT_ADDR]], align 4
-// CHECK-ILP32D-NEXT: call void @llvm.va_start(ptr [[VA]])
+// CHECK-ILP32D-NEXT: call void @llvm.va_start.p0(ptr [[VA]])
// CHECK-ILP32D-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[VA]], align 4
// CHECK-ILP32D-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 7
// CHECK-ILP32D-NEXT: [[ARGP_CUR_ALIGNED:%.*]] = call ptr @llvm.ptrmask.p0.i32(ptr [[TMP0]], i32 -8)
@@ -235,7 +235,7 @@ double f_va_2(char *fmt, ...) {
// CHECK-ILP32D-NEXT: store ptr [[ARGP_NEXT4]], ptr [[VA]], align 4
// CHECK-ILP32D-NEXT: [[TMP4:%.*]] = load double, ptr [[ARGP_CUR3_ALIGNED]], align 8
// CHECK-ILP32D-NEXT: store double [[TMP4]], ptr [[X]], align 8
-// CHECK-ILP32D-NEXT: call void @llvm.va_end(ptr [[VA]])
+// CHECK-ILP32D-NEXT: call void @llvm.va_end.p0(ptr [[VA]])
// CHECK-ILP32D-NEXT: [[TMP5:%.*]] = load double, ptr [[V]], align 8
// CHECK-ILP32D-NEXT: [[TMP6:%.*]] = load double, ptr [[X]], align 8
// CHECK-ILP32D-NEXT: [[ADD:%.*]] = fadd double [[TMP5]], [[TMP6]]
@@ -250,7 +250,7 @@ double f_va_2(char *fmt, ...) {
// CHECK-ILP32E-NEXT: [[W:%.*]] = alloca i32, align 4
// CHECK-ILP32E-NEXT: [[X:%.*]] = alloca double, align 8
// CHECK-ILP32E-NEXT: store ptr [[FMT]], ptr [[FMT_ADDR]], align 4
-// CHECK-ILP32E-NEXT: call void @llvm.va_start(ptr [[VA]])
+// CHECK-ILP32E-NEXT: call void @llvm.va_start.p0(ptr [[VA]])
// CHECK-ILP32E-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[VA]], align 4
// CHECK-ILP32E-NEXT: [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 8
// CHECK-ILP32E-NEXT: store ptr [[ARGP_NEXT]], ptr [[VA]], align 4
@@ -266,7 +266,7 @@ double f_va_2(char *fmt, ...) {
// CHECK-ILP32E-NEXT: store ptr [[ARGP_NEXT4]], ptr [[VA]], align 4
// CHECK-ILP32E-NEXT: [[TMP2:%.*]] = load double, ptr [[ARGP_CUR3]], align 4
// CHECK-ILP32E-NEXT: store double [[TMP2]], ptr [[X]], align 8
-// CHECK-ILP32E-NEXT: call void @llvm.va_end(ptr [[VA]])
+// CHECK-ILP32E-NEXT: call void @llvm.va_end.p0(ptr [[VA]])
// CHECK-ILP32E-NEXT: [[TMP3:%.*]] = load double, ptr [[V]], align 8
// CHECK-ILP32E-NEXT: [[TMP4:%.*]] = load double, ptr [[X]], align 8
// CHECK-ILP32E-NEXT: [[ADD:%.*]] = fadd double [[TMP3]], [[TMP4]]
@@ -296,7 +296,7 @@ double f_va_3(char *fmt, ...) {
// CHECK-ILP32F-NEXT: [[LS:%.*]] = alloca [[STRUCT_LARGE:%.*]], align 4
// CHECK-ILP32F-NEXT: [[RET:%.*]] = alloca i32, align 4
// CHECK-ILP32F-NEXT: store ptr [[FMT]], ptr [[FMT_ADDR]], align 4
-// CHECK-ILP32F-NEXT: call void @llvm.va_start(ptr [[VA]])
+// CHECK-ILP32F-NEXT: call void @llvm.va_start.p0(ptr [[VA]])
// CHECK-ILP32F-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[VA]], align 4
// CHECK-ILP32F-NEXT: [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 4
// CHECK-ILP32F-NEXT: store ptr [[ARGP_NEXT]], ptr [[VA]], align 4
@@ -321,7 +321,7 @@ double f_va_3(char *fmt, ...) {
// CHECK-ILP32F-NEXT: store ptr [[ARGP_NEXT8]], ptr [[VA]], align 4
// CHECK-ILP32F-NEXT: [[TMP3:%.*]] = load ptr, ptr [[ARGP_CUR7]], align 4
// CHECK-ILP32F-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[LS]], ptr align 4 [[TMP3]], i32 16, i1 false)
-// CHECK-ILP32F-NEXT: call void @llvm.va_end(ptr [[VA]])
+// CHECK-ILP32F-NEXT: call void @llvm.va_end.p0(ptr [[VA]])
// CHECK-ILP32F-NEXT: [[TMP4:%.*]] = load i32, ptr [[V]], align 4
// CHECK-ILP32F-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP4]] to fp128
// CHECK-ILP32F-NEXT: [[TMP5:%.*]] = load fp128, ptr [[LD]], align 16
@@ -384,7 +384,7 @@ double f_va_3(char *fmt, ...) {
// CHECK-ILP32D-NEXT: [[LS:%.*]] = alloca [[STRUCT_LARGE:%.*]], align 4
// CHECK-ILP32D-NEXT: [[RET:%.*]] = alloca i32, align 4
// CHECK-ILP32D-NEXT: store ptr [[FMT]], ptr [[FMT_ADDR]], align 4
-// CHECK-ILP32D-NEXT: call void @llvm.va_start(ptr [[VA]])
+// CHECK-ILP32D-NEXT: call void @llvm.va_start.p0(ptr [[VA]])
// CHECK-ILP32D-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[VA]], align 4
// CHECK-ILP32D-NEXT: [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 4
// CHECK-ILP32D-NEXT: store ptr [[ARGP_NEXT]], ptr [[VA]], align 4
@@ -409,7 +409,7 @@ double f_va_3(char *fmt, ...) {
// CHECK-ILP32D-NEXT: store ptr [[ARGP_NEXT8]], ptr [[VA]], align 4
// CHECK-ILP32D-NEXT: [[TMP3:%.*]] = load ptr, ptr [[ARGP_CUR7]], align 4
// CHECK-ILP32D-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[LS]], ptr align 4 [[TMP3]], i32 16, i1 false)
-// CHECK-ILP32D-NEXT: call void @llvm.va_end(ptr [[VA]])
+// CHECK-ILP32D-NEXT: call void @llvm.va_end.p0(ptr [[VA]])
// CHECK-ILP32D-NEXT: [[TMP4:%.*]] = load i32, ptr [[V]], align 4
// CHECK-ILP32D-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP4]] to fp128
// CHECK-ILP32D-NEXT: [[TMP5:%.*]] = load fp128, ptr [[LD]], align 16
@@ -472,7 +472,7 @@ double f_va_3(char *fmt, ...) {
// CHECK-ILP32E-NEXT: [[LS:%.*]] = alloca [[STRUCT_LARGE:%.*]], align 4
// CHECK-ILP32E-NEXT: [[RET:%.*]] = alloca i32, align 4
// CHECK-ILP32E-NEXT: store ptr [[FMT]], ptr [[FMT_ADDR]], align 4
-// CHECK-ILP32E-NEXT: call void @llvm.va_start(ptr [[VA]])
+// CHECK-ILP32E-NEXT: call void @llvm.va_start.p0(ptr [[VA]])
// CHECK-ILP32E-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[VA]], align 4
// CHECK-ILP32E-NEXT: [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 4
// CHECK-ILP32E-NEXT: store ptr [[ARGP_NEXT]], ptr [[VA]], align 4
@@ -497,7 +497,7 @@ double f_va_3(char *fmt, ...) {
// CHECK-ILP32E-NEXT: store ptr [[ARGP_NEXT8]], ptr [[VA]], align 4
// CHECK-ILP32E-NEXT: [[TMP3:%.*]] = load ptr, ptr [[ARGP_CUR7]], align 4
// CHECK-ILP32E-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[LS]], ptr align 4 [[TMP3]], i32 16, i1 false)
-// CHECK-ILP32E-NEXT: call void @llvm.va_end(ptr [[VA]])
+// CHECK-ILP32E-NEXT: call void @llvm.va_end.p0(ptr [[VA]])
// CHECK-ILP32E-NEXT: [[TMP4:%.*]] = load i32, ptr [[V]], align 4
// CHECK-ILP32E-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP4]] to fp128
// CHECK-ILP32E-NEXT: [[TMP5:%.*]] = load fp128, ptr [[LD]], align 16
diff --git a/clang/test/CodeGen/RISCV/riscv64-vararg.c b/clang/test/CodeGen/RISCV/riscv64-vararg.c
index 634cde61320c..efdffa2687e6 100644
--- a/clang/test/CodeGen/RISCV/riscv64-vararg.c
+++ b/clang/test/CodeGen/RISCV/riscv64-vararg.c
@@ -135,13 +135,13 @@ void f_va_caller(void) {
// CHECK-NEXT: [[VA:%.*]] = alloca ptr, align 8
// CHECK-NEXT: [[V:%.*]] = alloca i32, align 4
// CHECK-NEXT: store ptr [[FMT]], ptr [[FMT_ADDR]], align 8
-// CHECK-NEXT: call void @llvm.va_start(ptr [[VA]])
+// CHECK-NEXT: call void @llvm.va_start.p0(ptr [[VA]])
// CHECK-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[VA]], align 8
// CHECK-NEXT: [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i64 8
// CHECK-NEXT: store ptr [[ARGP_NEXT]], ptr [[VA]], align 8
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARGP_CUR]], align 8
// CHECK-NEXT: store i32 [[TMP0]], ptr [[V]], align 4
-// CHECK-NEXT: call void @llvm.va_end(ptr [[VA]])
+// CHECK-NEXT: call void @llvm.va_end.p0(ptr [[VA]])
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[V]], align 4
// CHECK-NEXT: ret i32 [[TMP1]]
//
@@ -166,7 +166,7 @@ int f_va_1(char *fmt, ...) {
// CHECK-NEXT: [[VA:%.*]] = alloca ptr, align 8
// CHECK-NEXT: [[V:%.*]] = alloca fp128, align 16
// CHECK-NEXT: store ptr [[FMT]], ptr [[FMT_ADDR]], align 8
-// CHECK-NEXT: call void @llvm.va_start(ptr [[VA]])
+// CHECK-NEXT: call void @llvm.va_start.p0(ptr [[VA]])
// CHECK-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[VA]], align 8
// CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 15
// CHECK-NEXT: [[ARGP_CUR_ALIGNED:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[TMP0]], i64 -16)
@@ -174,7 +174,7 @@ int f_va_1(char *fmt, ...) {
// CHECK-NEXT: store ptr [[ARGP_NEXT]], ptr [[VA]], align 8
// CHECK-NEXT: [[TMP1:%.*]] = load fp128, ptr [[ARGP_CUR_ALIGNED]], align 16
// CHECK-NEXT: store fp128 [[TMP1]], ptr [[V]], align 16
-// CHECK-NEXT: call void @llvm.va_end(ptr [[VA]])
+// CHECK-NEXT: call void @llvm.va_end.p0(ptr [[VA]])
// CHECK-NEXT: [[TMP2:%.*]] = load fp128, ptr [[V]], align 16
// CHECK-NEXT: ret fp128 [[TMP2]]
//
@@ -199,7 +199,7 @@ long double f_va_2(char *fmt, ...) {
// CHECK-NEXT: [[W:%.*]] = alloca i32, align 4
// CHECK-NEXT: [[X:%.*]] = alloca fp128, align 16
// CHECK-NEXT: store ptr [[FMT]], ptr [[FMT_ADDR]], align 8
-// CHECK-NEXT: call void @llvm.va_start(ptr [[VA]])
+// CHECK-NEXT: call void @llvm.va_start.p0(ptr [[VA]])
// CHECK-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[VA]], align 8
// CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 15
// CHECK-NEXT: [[ARGP_CUR_ALIGNED:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[TMP0]], i64 -16)
@@ -219,7 +219,7 @@ long double f_va_2(char *fmt, ...) {
// CHECK-NEXT: store ptr [[ARGP_NEXT4]], ptr [[VA]], align 8
// CHECK-NEXT: [[TMP4:%.*]] = load fp128, ptr [[ARGP_CUR3_ALIGNED]], align 16
// CHECK-NEXT: store fp128 [[TMP4]], ptr [[X]], align 16
-// CHECK-NEXT: call void @llvm.va_end(ptr [[VA]])
+// CHECK-NEXT: call void @llvm.va_end.p0(ptr [[VA]])
// CHECK-NEXT: [[TMP5:%.*]] = load fp128, ptr [[V]], align 16
// CHECK-NEXT: [[TMP6:%.*]] = load fp128, ptr [[X]], align 16
// CHECK-NEXT: [[ADD:%.*]] = fadd fp128 [[TMP5]], [[TMP6]]
@@ -248,7 +248,7 @@ long double f_va_3(char *fmt, ...) {
// CHECK-NEXT: [[LS:%.*]] = alloca [[STRUCT_LARGE:%.*]], align 8
// CHECK-NEXT: [[RET:%.*]] = alloca i32, align 4
// CHECK-NEXT: store ptr [[FMT]], ptr [[FMT_ADDR]], align 8
-// CHECK-NEXT: call void @llvm.va_start(ptr [[VA]])
+// CHECK-NEXT: call void @llvm.va_start.p0(ptr [[VA]])
// CHECK-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[VA]], align 8
// CHECK-NEXT: [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i64 8
// CHECK-NEXT: store ptr [[ARGP_NEXT]], ptr [[VA]], align 8
@@ -267,7 +267,7 @@ long double f_va_3(char *fmt, ...) {
// CHECK-NEXT: store ptr [[ARGP_NEXT6]], ptr [[VA]], align 8
// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[ARGP_CUR5]], align 8
// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[LS]], ptr align 8 [[TMP1]], i64 32, i1 false)
-// CHECK-NEXT: call void @llvm.va_end(ptr [[VA]])
+// CHECK-NEXT: call void @llvm.va_end.p0(ptr [[VA]])
// CHECK-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_TINY]], ptr [[TS]], i32 0, i32 0
// CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr [[A]], align 2
// CHECK-NEXT: [[CONV:%.*]] = zext i16 [[TMP2]] to i64
diff --git a/clang/test/CodeGen/RISCV/rvb-intrinsics/riscv32-zbb-error.c b/clang/test/CodeGen/RISCV/rvb-intrinsics/riscv32-zbb-error.c
index ecf090a128aa..bad68504fab0 100644
--- a/clang/test/CodeGen/RISCV/rvb-intrinsics/riscv32-zbb-error.c
+++ b/clang/test/CodeGen/RISCV/rvb-intrinsics/riscv32-zbb-error.c
@@ -1,6 +1,6 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
-// RUN: %clang_cc1 -triple riscv32 -target-feature +zbb -verify %s -o -
+// RUN: %clang_cc1 -triple riscv32 -target-feature +zbb -S -verify %s -o -
unsigned int orc_b_64(unsigned int a) {
- return __builtin_riscv_orc_b_64(a); // expected-error {{builtin requires: 'RV64'}}
+ return __builtin_riscv_orc_b_64(a); // expected-error {{'__builtin_riscv_orc_b_64' needs target feature zbb,64bit}}
}
diff --git a/clang/test/CodeGen/RISCV/rvb-intrinsics/riscv64-zbkb-error.c b/clang/test/CodeGen/RISCV/rvb-intrinsics/riscv64-zbkb-error.c
index d2e3e76043ae..a256bf75b031 100644
--- a/clang/test/CodeGen/RISCV/rvb-intrinsics/riscv64-zbkb-error.c
+++ b/clang/test/CodeGen/RISCV/rvb-intrinsics/riscv64-zbkb-error.c
@@ -1,14 +1,10 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
-// RUN: %clang_cc1 -triple riscv64 -target-feature +zbkb -verify %s -o -
+// RUN: %clang_cc1 -triple riscv64 -target-feature +zbkb -S -verify %s -o -
#include <stdint.h>
-uint32_t zip(uint32_t rs1)
+uint32_t zip_unzip(uint32_t rs1)
{
- return __builtin_riscv_zip_32(rs1); // expected-error {{builtin requires: 'RV32'}}
-}
-
-uint32_t unzip(uint32_t rs1)
-{
- return __builtin_riscv_unzip_32(rs1); // expected-error {{builtin requires: 'RV32'}}
+ (void)__builtin_riscv_zip_32(rs1); // expected-error {{'__builtin_riscv_zip_32' needs target feature zbkb,32bit}}
+ return __builtin_riscv_unzip_32(rs1); // expected-error {{'__builtin_riscv_unzip_32' needs target feature zbkb,32bit}}
}
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-handcrafted/rvv-error.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-handcrafted/rvv-error.c
index 6ec9b0579976..ecb6c5f27025 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-handcrafted/rvv-error.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-handcrafted/rvv-error.c
@@ -11,7 +11,7 @@
// CHECK-RV64V-NEXT: ret i32 [[CONV]]
//
-// CHECK-RV64-ERR: error: builtin requires at least one of the following extensions: 'Zve32x'
+// CHECK-RV64-ERR: error: '__builtin_rvv_vsetvli' needs target feature zve32x
int test() {
return __builtin_rvv_vsetvli(1, 0, 0);
diff --git a/clang/test/CodeGen/WebAssembly/wasm-varargs.c b/clang/test/CodeGen/WebAssembly/wasm-varargs.c
index c475de19ae44..e794857304e1 100644
--- a/clang/test/CodeGen/WebAssembly/wasm-varargs.c
+++ b/clang/test/CodeGen/WebAssembly/wasm-varargs.c
@@ -10,13 +10,13 @@
// CHECK-NEXT: [[VA:%.*]] = alloca ptr, align 4
// CHECK-NEXT: [[V:%.*]] = alloca i32, align 4
// CHECK-NEXT: store ptr [[FMT]], ptr [[FMT_ADDR]], align 4
-// CHECK-NEXT: call void @llvm.va_start(ptr [[VA]])
+// CHECK-NEXT: call void @llvm.va_start.p0(ptr [[VA]])
// CHECK-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[VA]], align 4
// CHECK-NEXT: [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 4
// CHECK-NEXT: store ptr [[ARGP_NEXT]], ptr [[VA]], align 4
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARGP_CUR]], align 4
// CHECK-NEXT: store i32 [[TMP0]], ptr [[V]], align 4
-// CHECK-NEXT: call void @llvm.va_end(ptr [[VA]])
+// CHECK-NEXT: call void @llvm.va_end.p0(ptr [[VA]])
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[V]], align 4
// CHECK-NEXT: ret i32 [[TMP1]]
//
@@ -38,7 +38,7 @@ int test_i32(char *fmt, ...) {
// CHECK-NEXT: [[VA:%.*]] = alloca ptr, align 4
// CHECK-NEXT: [[V:%.*]] = alloca i64, align 8
// CHECK-NEXT: store ptr [[FMT]], ptr [[FMT_ADDR]], align 4
-// CHECK-NEXT: call void @llvm.va_start(ptr [[VA]])
+// CHECK-NEXT: call void @llvm.va_start.p0(ptr [[VA]])
// CHECK-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[VA]], align 4
// CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 7
// CHECK-NEXT: [[ARGP_CUR_ALIGNED:%.*]] = call ptr @llvm.ptrmask.p0.i32(ptr [[TMP0]], i32 -8)
@@ -46,7 +46,7 @@ int test_i32(char *fmt, ...) {
// CHECK-NEXT: store ptr [[ARGP_NEXT]], ptr [[VA]], align 4
// CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[ARGP_CUR_ALIGNED]], align 8
// CHECK-NEXT: store i64 [[TMP1]], ptr [[V]], align 8
-// CHECK-NEXT: call void @llvm.va_end(ptr [[VA]])
+// CHECK-NEXT: call void @llvm.va_end.p0(ptr [[VA]])
// CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[V]], align 8
// CHECK-NEXT: ret i64 [[TMP2]]
//
@@ -73,13 +73,13 @@ struct S {
// CHECK-NEXT: [[FMT_ADDR:%.*]] = alloca ptr, align 4
// CHECK-NEXT: [[VA:%.*]] = alloca ptr, align 4
// CHECK-NEXT: store ptr [[FMT]], ptr [[FMT_ADDR]], align 4
-// CHECK-NEXT: call void @llvm.va_start(ptr [[VA]])
+// CHECK-NEXT: call void @llvm.va_start.p0(ptr [[VA]])
// CHECK-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[VA]], align 4
// CHECK-NEXT: [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 4
// CHECK-NEXT: store ptr [[ARGP_NEXT]], ptr [[VA]], align 4
// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGP_CUR]], align 4
// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[AGG_RESULT]], ptr align 4 [[TMP0]], i32 12, i1 false)
-// CHECK-NEXT: call void @llvm.va_end(ptr [[VA]])
+// CHECK-NEXT: call void @llvm.va_end.p0(ptr [[VA]])
// CHECK-NEXT: ret void
//
struct S test_struct(char *fmt, ...) {
@@ -102,7 +102,7 @@ struct Z {};
// CHECK-NEXT: [[VA:%.*]] = alloca ptr, align 4
// CHECK-NEXT: [[U:%.*]] = alloca [[STRUCT_Z:%.*]], align 1
// CHECK-NEXT: store ptr [[FMT]], ptr [[FMT_ADDR]], align 4
-// CHECK-NEXT: call void @llvm.va_start(ptr [[VA]])
+// CHECK-NEXT: call void @llvm.va_start.p0(ptr [[VA]])
// CHECK-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[VA]], align 4
// CHECK-NEXT: [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 0
// CHECK-NEXT: store ptr [[ARGP_NEXT]], ptr [[VA]], align 4
@@ -112,7 +112,7 @@ struct Z {};
// CHECK-NEXT: store ptr [[ARGP_NEXT2]], ptr [[VA]], align 4
// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGP_CUR1]], align 4
// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[AGG_RESULT]], ptr align 4 [[TMP0]], i32 12, i1 false)
-// CHECK-NEXT: call void @llvm.va_end(ptr [[VA]])
+// CHECK-NEXT: call void @llvm.va_end.p0(ptr [[VA]])
// CHECK-NEXT: ret void
//
struct S test_empty_struct(char *fmt, ...) {
diff --git a/clang/test/CodeGen/X86/va-arg-sse.c b/clang/test/CodeGen/X86/va-arg-sse.c
index e040b0e5790b..b7d00dad1453 100644
--- a/clang/test/CodeGen/X86/va-arg-sse.c
+++ b/clang/test/CodeGen/X86/va-arg-sse.c
@@ -21,7 +21,7 @@ struct S a[5];
// CHECK-NEXT: store i32 0, ptr [[J]], align 4
// CHECK-NEXT: store i32 0, ptr [[K]], align 4
// CHECK-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [1 x %struct.__va_list_tag], ptr [[AP]], i64 0, i64 0
-// CHECK-NEXT: call void @llvm.va_start(ptr [[ARRAYDECAY]])
+// CHECK-NEXT: call void @llvm.va_start.p0(ptr [[ARRAYDECAY]])
// CHECK-NEXT: store ptr getelementptr inbounds ([5 x %struct.S], ptr @a, i64 0, i64 2), ptr [[P]], align 8
// CHECK-NEXT: [[ARRAYDECAY2:%.*]] = getelementptr inbounds [1 x %struct.__va_list_tag], ptr [[AP]], i64 0, i64 0
// CHECK-NEXT: [[FP_OFFSET_P:%.*]] = getelementptr inbounds [[STRUCT___VA_LIST_TAG:%.*]], ptr [[ARRAYDECAY2]], i32 0, i32 1
@@ -52,7 +52,7 @@ struct S a[5];
// CHECK-NEXT: [[VAARG_ADDR:%.*]] = phi ptr [ [[TMP]], [[VAARG_IN_REG]] ], [ [[OVERFLOW_ARG_AREA]], [[VAARG_IN_MEM]] ]
// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARG]], ptr align 4 [[VAARG_ADDR]], i64 12, i1 false)
// CHECK-NEXT: [[ARRAYDECAY3:%.*]] = getelementptr inbounds [1 x %struct.__va_list_tag], ptr [[AP]], i64 0, i64 0
-// CHECK-NEXT: call void @llvm.va_end(ptr [[ARRAYDECAY3]])
+// CHECK-NEXT: call void @llvm.va_end.p0(ptr [[ARRAYDECAY3]])
// CHECK-NEXT: [[TMP15:%.*]] = load ptr, ptr [[P]], align 8
// CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne ptr [[TMP15]], null
// CHECK-NEXT: br i1 [[TOBOOL]], label [[LAND_LHS_TRUE:%.*]], label [[IF_END:%.*]]
diff --git a/clang/test/CodeGen/X86/x86_64-vaarg.c b/clang/test/CodeGen/X86/x86_64-vaarg.c
index a18ba8364238..07c6df14a0b8 100644
--- a/clang/test/CodeGen/X86/x86_64-vaarg.c
+++ b/clang/test/CodeGen/X86/x86_64-vaarg.c
@@ -13,7 +13,7 @@ typedef struct { struct {} a; } empty;
// CHECK-NEXT: [[TMP:%.*]] = alloca [[STRUCT_EMPTY]], align 1
// CHECK-NEXT: store i32 [[Z]], ptr [[Z_ADDR]], align 4
// CHECK-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [1 x %struct.__va_list_tag], ptr [[LIST]], i64 0, i64 0
-// CHECK-NEXT: call void @llvm.va_start(ptr [[ARRAYDECAY]])
+// CHECK-NEXT: call void @llvm.va_start.p0(ptr [[ARRAYDECAY]])
// CHECK-NEXT: [[ARRAYDECAY1:%.*]] = getelementptr inbounds [1 x %struct.__va_list_tag], ptr [[LIST]], i64 0, i64 0
// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[RETVAL]], ptr align 1 [[TMP]], i64 0, i1 false)
// CHECK-NEXT: ret void
@@ -37,7 +37,7 @@ typedef struct {
// CHECK-NEXT: [[LIST:%.*]] = alloca [1 x %struct.__va_list_tag], align 16
// CHECK-NEXT: store i32 [[Z]], ptr [[Z_ADDR]], align 4
// CHECK-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [1 x %struct.__va_list_tag], ptr [[LIST]], i64 0, i64 0
-// CHECK-NEXT: call void @llvm.va_start(ptr [[ARRAYDECAY]])
+// CHECK-NEXT: call void @llvm.va_start.p0(ptr [[ARRAYDECAY]])
// CHECK-NEXT: [[ARRAYDECAY1:%.*]] = getelementptr inbounds [1 x %struct.__va_list_tag], ptr [[LIST]], i64 0, i64 0
// CHECK-NEXT: [[FP_OFFSET_P:%.*]] = getelementptr inbounds [[STRUCT___VA_LIST_TAG:%.*]], ptr [[ARRAYDECAY1]], i32 0, i32 1
// CHECK-NEXT: [[FP_OFFSET:%.*]] = load i32, ptr [[FP_OFFSET_P]], align 4
diff --git a/clang/test/CodeGen/aarch64-ABI-align-packed.c b/clang/test/CodeGen/aarch64-ABI-align-packed.c
index 2b029f645895..13c68fe54b84 100644
--- a/clang/test/CodeGen/aarch64-ABI-align-packed.c
+++ b/clang/test/CodeGen/aarch64-ABI-align-packed.c
@@ -73,7 +73,7 @@ __attribute__((noinline)) void named_arg_non_packed_struct(double d0, double d1,
// CHECK-NEXT: entry:
// CHECK-NEXT: [[VL:%.*]] = alloca [[STRUCT___VA_LIST:%.*]], align 8
// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 32, ptr nonnull [[VL]]) #[[ATTR6:[0-9]+]]
-// CHECK-NEXT: call void @llvm.va_start(ptr nonnull [[VL]])
+// CHECK-NEXT: call void @llvm.va_start.p0(ptr nonnull [[VL]])
// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 32, ptr nonnull [[VL]]) #[[ATTR6]]
// CHECK-NEXT: ret void
void variadic_non_packed_struct(double d0, double d1, double d2, double d3,
@@ -128,7 +128,7 @@ __attribute__((noinline)) void named_arg_packed_struct(double d0, double d1, dou
// CHECK-NEXT: entry:
// CHECK-NEXT: [[VL:%.*]] = alloca [[STRUCT___VA_LIST:%.*]], align 8
// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 32, ptr nonnull [[VL]]) #[[ATTR6]]
-// CHECK-NEXT: call void @llvm.va_start(ptr nonnull [[VL]])
+// CHECK-NEXT: call void @llvm.va_start.p0(ptr nonnull [[VL]])
// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 32, ptr nonnull [[VL]]) #[[ATTR6]]
// CHECK-NEXT: ret void
void variadic_packed_struct(double d0, double d1, double d2, double d3,
@@ -183,7 +183,7 @@ __attribute__((noinline)) void named_arg_packed_member(double d0, double d1, dou
// CHECK-NEXT: entry:
// CHECK-NEXT: [[VL:%.*]] = alloca [[STRUCT___VA_LIST:%.*]], align 8
// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 32, ptr nonnull [[VL]]) #[[ATTR6]]
-// CHECK-NEXT: call void @llvm.va_start(ptr nonnull [[VL]])
+// CHECK-NEXT: call void @llvm.va_start.p0(ptr nonnull [[VL]])
// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 32, ptr nonnull [[VL]]) #[[ATTR6]]
// CHECK-NEXT: ret void
void variadic_packed_member(double d0, double d1, double d2, double d3,
@@ -238,7 +238,7 @@ __attribute__((noinline)) void named_arg_aligned_struct_8(double d0, double d1,
// CHECK-NEXT: entry:
// CHECK-NEXT: [[VL:%.*]] = alloca [[STRUCT___VA_LIST:%.*]], align 8
// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 32, ptr nonnull [[VL]]) #[[ATTR6]]
-// CHECK-NEXT: call void @llvm.va_start(ptr nonnull [[VL]])
+// CHECK-NEXT: call void @llvm.va_start.p0(ptr nonnull [[VL]])
// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 32, ptr nonnull [[VL]]) #[[ATTR6]]
// CHECK-NEXT: ret void
void variadic_aligned_struct_8(double d0, double d1, double d2, double d3,
@@ -293,7 +293,7 @@ __attribute__((noinline)) void named_arg_aligned_member_8(double d0, double d1,
// CHECK-NEXT: entry:
// CHECK-NEXT: [[VL:%.*]] = alloca [[STRUCT___VA_LIST:%.*]], align 8
// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 32, ptr nonnull [[VL]]) #[[ATTR6]]
-// CHECK-NEXT: call void @llvm.va_start(ptr nonnull [[VL]])
+// CHECK-NEXT: call void @llvm.va_start.p0(ptr nonnull [[VL]])
// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 32, ptr nonnull [[VL]]) #[[ATTR6]]
// CHECK-NEXT: ret void
void variadic_aligned_member_8(double d0, double d1, double d2, double d3,
@@ -348,7 +348,7 @@ __attribute__((noinline)) void named_arg_pragma_packed_struct_8(double d0, doubl
// CHECK-NEXT: entry:
// CHECK-NEXT: [[VL:%.*]] = alloca [[STRUCT___VA_LIST:%.*]], align 8
// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 32, ptr nonnull [[VL]]) #[[ATTR6]]
-// CHECK-NEXT: call void @llvm.va_start(ptr nonnull [[VL]])
+// CHECK-NEXT: call void @llvm.va_start.p0(ptr nonnull [[VL]])
// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 32, ptr nonnull [[VL]]) #[[ATTR6]]
// CHECK-NEXT: ret void
void variadic_pragma_packed_struct_8(double d0, double d1, double d2, double d3,
@@ -403,7 +403,7 @@ __attribute__((noinline)) void named_arg_pragma_packed_struct_4(double d0, doubl
// CHECK-NEXT: entry:
// CHECK-NEXT: [[VL:%.*]] = alloca [[STRUCT___VA_LIST:%.*]], align 8
// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 32, ptr nonnull [[VL]]) #[[ATTR6]]
-// CHECK-NEXT: call void @llvm.va_start(ptr nonnull [[VL]])
+// CHECK-NEXT: call void @llvm.va_start.p0(ptr nonnull [[VL]])
// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 32, ptr nonnull [[VL]]) #[[ATTR6]]
// CHECK-NEXT: ret void
void variadic_pragma_packed_struct_4(double d0, double d1, double d2, double d3,
diff --git a/clang/test/CodeGen/aarch64-varargs.c b/clang/test/CodeGen/aarch64-varargs.c
index 44b87029e7b3..ee4e88eda4ef 100644
--- a/clang/test/CodeGen/aarch64-varargs.c
+++ b/clang/test/CodeGen/aarch64-varargs.c
@@ -837,7 +837,7 @@ void check_start(int n, ...) {
va_list the_list;
va_start(the_list, n);
// CHECK: [[THE_LIST:%[a-z_0-9]+]] = alloca %struct.__va_list
-// CHECK: call void @llvm.va_start(ptr [[THE_LIST]])
+// CHECK: call void @llvm.va_start.p0(ptr [[THE_LIST]])
}
typedef struct {} empty;
diff --git a/clang/test/CodeGen/arm-varargs.c b/clang/test/CodeGen/arm-varargs.c
index f754c7f52e59..ab4ac46924e6 100644
--- a/clang/test/CodeGen/arm-varargs.c
+++ b/clang/test/CodeGen/arm-varargs.c
@@ -264,5 +264,5 @@ void check_start(int n, ...) {
va_list the_list;
va_start(the_list, n);
// CHECK: [[THE_LIST:%[a-z0-9._]+]] = alloca %struct.__va_list
-// CHECK: call void @llvm.va_start(ptr [[THE_LIST]])
+// CHECK: call void @llvm.va_start.p0(ptr [[THE_LIST]])
}
diff --git a/clang/test/CodeGen/hexagon-linux-vararg.c b/clang/test/CodeGen/hexagon-linux-vararg.c
index 033e72ab449d..84945e872d28 100644
--- a/clang/test/CodeGen/hexagon-linux-vararg.c
+++ b/clang/test/CodeGen/hexagon-linux-vararg.c
@@ -9,7 +9,7 @@ struct AAA {
int d;
};
-// CHECK: call void @llvm.va_start(ptr %arraydecay)
+// CHECK: call void @llvm.va_start.p0(ptr %arraydecay)
// CHECK: %arraydecay1 = getelementptr inbounds [1 x %struct.__va_list_tag],
// ptr %ap, i32 0, i32 0
// CHECK: br label %vaarg.maybe_reg
diff --git a/clang/test/CodeGen/mips-varargs.c b/clang/test/CodeGen/mips-varargs.c
index 052aedd1cd1e..029f000c121a 100644
--- a/clang/test/CodeGen/mips-varargs.c
+++ b/clang/test/CodeGen/mips-varargs.c
@@ -29,7 +29,7 @@ int test_i32(char *fmt, ...) {
// ALL: [[V:%.*]] = alloca i32, align 4
// NEW: [[PROMOTION_TEMP:%.*]] = alloca i32, align 4
//
-// ALL: call void @llvm.va_start(ptr %va)
+// ALL: call void @llvm.va_start.p0(ptr %va)
// ALL: [[AP_CUR:%.+]] = load ptr, ptr %va, align [[$PTRALIGN]]
// O32: [[AP_NEXT:%.+]] = getelementptr inbounds i8, ptr [[AP_CUR]], [[$INTPTR_T:i32]] [[$CHUNKSIZE:4]]
// NEW: [[AP_NEXT:%.+]] = getelementptr inbounds i8, ptr [[AP_CUR]], [[$INTPTR_T:i32|i64]] [[$CHUNKSIZE:8]]
@@ -45,7 +45,7 @@ int test_i32(char *fmt, ...) {
// NEW: [[ARG:%.+]] = load i32, ptr [[PROMOTION_TEMP]], align 4
// ALL: store i32 [[ARG]], ptr [[V]], align 4
//
-// ALL: call void @llvm.va_end(ptr %va)
+// ALL: call void @llvm.va_end.p0(ptr %va)
// ALL: }
long long test_i64(char *fmt, ...) {
@@ -61,7 +61,7 @@ long long test_i64(char *fmt, ...) {
// ALL-LABEL: define{{.*}} i64 @test_i64(ptr{{.*}} %fmt, ...)
//
// ALL: %va = alloca ptr, align [[$PTRALIGN]]
-// ALL: call void @llvm.va_start(ptr %va)
+// ALL: call void @llvm.va_start.p0(ptr %va)
// ALL: [[AP_CUR:%.+]] = load ptr, ptr %va, align [[$PTRALIGN]]
//
// i64 is 8-byte aligned, while this is within O32's stack alignment there's no
@@ -74,7 +74,7 @@ long long test_i64(char *fmt, ...) {
//
// ALL: [[ARG:%.+]] = load i64, ptr [[AP_CUR]], align 8
//
-// ALL: call void @llvm.va_end(ptr %va)
+// ALL: call void @llvm.va_end.p0(ptr %va)
// ALL: }
char *test_ptr(char *fmt, ...) {
@@ -92,7 +92,7 @@ char *test_ptr(char *fmt, ...) {
// ALL: %va = alloca ptr, align [[$PTRALIGN]]
// ALL: [[V:%.*]] = alloca ptr, align [[$PTRALIGN]]
// N32: [[AP_CAST:%.+]] = alloca ptr, align 4
-// ALL: call void @llvm.va_start(ptr %va)
+// ALL: call void @llvm.va_start.p0(ptr %va)
// ALL: [[AP_CUR:%.+]] = load ptr, ptr %va, align [[$PTRALIGN]]
// ALL: [[AP_NEXT:%.+]] = getelementptr inbounds i8, ptr [[AP_CUR]], [[$INTPTR_T]] [[$CHUNKSIZE]]
// ALL: store ptr [[AP_NEXT]], ptr %va, align [[$PTRALIGN]]
@@ -109,7 +109,7 @@ char *test_ptr(char *fmt, ...) {
// N64: [[ARG:%.+]] = load ptr, ptr [[AP_CUR]], align [[$PTRALIGN]]
// ALL: store ptr [[ARG]], ptr [[V]], align [[$PTRALIGN]]
//
-// ALL: call void @llvm.va_end(ptr %va)
+// ALL: call void @llvm.va_end.p0(ptr %va)
// ALL: }
int test_v4i32(char *fmt, ...) {
@@ -128,7 +128,7 @@ int test_v4i32(char *fmt, ...) {
//
// ALL: %va = alloca ptr, align [[$PTRALIGN]]
// ALL: [[V:%.+]] = alloca <4 x i32>, align 16
-// ALL: call void @llvm.va_start(ptr %va)
+// ALL: call void @llvm.va_start.p0(ptr %va)
// ALL: [[AP_CUR:%.+]] = load ptr, ptr %va, align [[$PTRALIGN]]
//
// Vectors are 16-byte aligned, however the O32 ABI has a maximum alignment of
@@ -152,7 +152,7 @@ int test_v4i32(char *fmt, ...) {
// N32: [[ARG:%.+]] = load <4 x i32>, ptr [[AP_CUR]], align 16
// ALL: store <4 x i32> [[ARG]], ptr [[V]], align 16
//
-// ALL: call void @llvm.va_end(ptr %va)
+// ALL: call void @llvm.va_end.p0(ptr %va)
// ALL: [[VECEXT:%.+]] = extractelement <4 x i32> {{.*}}, i32 0
// ALL: ret i32 [[VECEXT]]
// ALL: }
diff --git a/clang/test/CodeGen/pr53127.cpp b/clang/test/CodeGen/pr53127.cpp
index 97fe1291352d..5a52b4860eec 100644
--- a/clang/test/CodeGen/pr53127.cpp
+++ b/clang/test/CodeGen/pr53127.cpp
@@ -34,7 +34,7 @@ void operator delete(void*);
// CHECK-NEXT: br i1 [[CALL6]], label [[COND_TRUE7:%.*]], label [[COND_FALSE8:%.*]]
// CHECK: cond.true7:
// CHECK-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [1 x %struct.__va_list_tag], ptr [[L]], i64 0, i64 0
-// CHECK-NEXT: call void @llvm.va_start(ptr [[ARRAYDECAY]])
+// CHECK-NEXT: call void @llvm.va_start.p0(ptr [[ARRAYDECAY]])
// CHECK-NEXT: br label [[COND_END9:%.*]]
// CHECK: cond.false8:
// CHECK-NEXT: br label [[COND_END9]]
@@ -44,7 +44,7 @@ void operator delete(void*);
// CHECK: cond.true11:
// CHECK-NEXT: [[ARRAYDECAY12:%.*]] = getelementptr inbounds [1 x %struct.__va_list_tag], ptr [[L]], i64 0, i64 0
// CHECK-NEXT: [[ARRAYDECAY13:%.*]] = getelementptr inbounds [1 x %struct.__va_list_tag], ptr [[L2]], i64 0, i64 0
-// CHECK-NEXT: call void @llvm.va_copy(ptr [[ARRAYDECAY12]], ptr [[ARRAYDECAY13]])
+// CHECK-NEXT: call void @llvm.va_copy.p0(ptr [[ARRAYDECAY12]], ptr [[ARRAYDECAY13]])
// CHECK-NEXT: br label [[COND_END15:%.*]]
// CHECK: cond.false14:
// CHECK-NEXT: br label [[COND_END15]]
diff --git a/clang/test/CodeGen/varargs-with-nonzero-default-address-space.c b/clang/test/CodeGen/varargs-with-nonzero-default-address-space.c
new file mode 100644
index 000000000000..b087da34c3df
--- /dev/null
+++ b/clang/test/CodeGen/varargs-with-nonzero-default-address-space.c
@@ -0,0 +1,46 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4
+// RUN: %clang_cc1 -triple spirv64-unknown-unknown -fcuda-is-device -emit-llvm -o - %s | FileCheck %s
+
+struct x {
+ double b;
+ long a;
+};
+
+// CHECK-LABEL: define spir_func void @testva(
+// CHECK-SAME: i32 noundef [[N:%.*]], ...) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT: [[AP:%.*]] = alloca ptr addrspace(4), align 8
+// CHECK-NEXT: [[T:%.*]] = alloca [[STRUCT_X:%.*]], align 8
+// CHECK-NEXT: [[AP2:%.*]] = alloca ptr addrspace(4), align 8
+// CHECK-NEXT: [[V:%.*]] = alloca i32, align 4
+// CHECK-NEXT: [[VARET:%.*]] = alloca i32, align 4
+// CHECK-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr [[N_ADDR]] to ptr addrspace(4)
+// CHECK-NEXT: [[AP_ASCAST:%.*]] = addrspacecast ptr [[AP]] to ptr addrspace(4)
+// CHECK-NEXT: [[T_ASCAST:%.*]] = addrspacecast ptr [[T]] to ptr addrspace(4)
+// CHECK-NEXT: [[AP2_ASCAST:%.*]] = addrspacecast ptr [[AP2]] to ptr addrspace(4)
+// CHECK-NEXT: [[V_ASCAST:%.*]] = addrspacecast ptr [[V]] to ptr addrspace(4)
+// CHECK-NEXT: [[VARET_ASCAST:%.*]] = addrspacecast ptr [[VARET]] to ptr addrspace(4)
+// CHECK-NEXT: store i32 [[N]], ptr addrspace(4) [[N_ADDR_ASCAST]], align 4
+// CHECK-NEXT: call void @llvm.va_start.p4(ptr addrspace(4) [[AP_ASCAST]])
+// CHECK-NEXT: [[TMP0:%.*]] = va_arg ptr addrspace(4) [[AP_ASCAST]], ptr
+// CHECK-NEXT: call void @llvm.memcpy.p4.p0.i64(ptr addrspace(4) align 8 [[T_ASCAST]], ptr align 8 [[TMP0]], i64 16, i1 false)
+// CHECK-NEXT: call void @llvm.va_copy.p4(ptr addrspace(4) [[AP2_ASCAST]], ptr addrspace(4) [[AP_ASCAST]])
+// CHECK-NEXT: [[TMP1:%.*]] = va_arg ptr addrspace(4) [[AP2_ASCAST]], i32
+// CHECK-NEXT: store i32 [[TMP1]], ptr addrspace(4) [[VARET_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[VARET_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[TMP2]], ptr addrspace(4) [[V_ASCAST]], align 4
+// CHECK-NEXT: call void @llvm.va_end.p4(ptr addrspace(4) [[AP2_ASCAST]])
+// CHECK-NEXT: call void @llvm.va_end.p4(ptr addrspace(4) [[AP_ASCAST]])
+// CHECK-NEXT: ret void
+
+void testva(int n, ...) {
+ __builtin_va_list ap;
+ __builtin_va_start(ap, n);
+ struct x t = __builtin_va_arg(ap, struct x);
+ __builtin_va_list ap2;
+ __builtin_va_copy(ap2, ap);
+ int v = __builtin_va_arg(ap2, int);
+ __builtin_va_end(ap2);
+ __builtin_va_end(ap);
+}
diff --git a/clang/test/CodeGen/xcore-abi.c b/clang/test/CodeGen/xcore-abi.c
index 4dd0f221533b..bb8d2fec46bd 100644
--- a/clang/test/CodeGen/xcore-abi.c
+++ b/clang/test/CodeGen/xcore-abi.c
@@ -28,7 +28,7 @@ void testva (int n, ...) {
// CHECK: [[AP:%[a-z0-9]+]] = alloca ptr, align 4
// CHECK: [[V5:%[a-z0-9]+]] = alloca %struct.x, align 4
// CHECK: [[TMP:%[a-z0-9]+]] = alloca [4 x i32], align 4
- // CHECK: call void @llvm.va_start(ptr [[AP]])
+ // CHECK: call void @llvm.va_start.p0(ptr [[AP]])
char* v1 = va_arg (ap, char*);
f(v1);
diff --git a/clang/test/CodeGenCXX/ext-int.cpp b/clang/test/CodeGenCXX/ext-int.cpp
index 5a4270aef285..a1d17c840ee4 100644
--- a/clang/test/CodeGenCXX/ext-int.cpp
+++ b/clang/test/CodeGenCXX/ext-int.cpp
@@ -159,9 +159,9 @@ void TakesVarargs(int i, ...) {
// WIN: %[[ARGS:.+]] = alloca ptr
__builtin_va_start(args, i);
// LIN64: %[[STARTAD:.+]] = getelementptr inbounds [1 x %struct.__va_list_tag], ptr %[[ARGS]]
- // LIN64: call void @llvm.va_start(ptr %[[STARTAD]])
- // LIN32: call void @llvm.va_start(ptr %[[ARGS]])
- // WIN: call void @llvm.va_start(ptr %[[ARGS]])
+ // LIN64: call void @llvm.va_start.p0(ptr %[[STARTAD]])
+ // LIN32: call void @llvm.va_start.p0(ptr %[[ARGS]])
+ // WIN: call void @llvm.va_start.p0(ptr %[[ARGS]])
_BitInt(92) A = __builtin_va_arg(args, _BitInt(92));
// LIN64: %[[AD1:.+]] = getelementptr inbounds [1 x %struct.__va_list_tag], ptr %[[ARGS]]
@@ -302,9 +302,9 @@ void TakesVarargs(int i, ...) {
__builtin_va_end(args);
// LIN64: %[[ENDAD:.+]] = getelementptr inbounds [1 x %struct.__va_list_tag], ptr %[[ARGS]]
- // LIN64: call void @llvm.va_end(ptr %[[ENDAD]])
- // LIN32: call void @llvm.va_end(ptr %[[ARGS]])
- // WIN: call void @llvm.va_end(ptr %[[ARGS]])
+ // LIN64: call void @llvm.va_end.p0(ptr %[[ENDAD]])
+ // LIN32: call void @llvm.va_end.p0(ptr %[[ARGS]])
+ // WIN: call void @llvm.va_end.p0(ptr %[[ARGS]])
}
void typeid_tests() {
// LIN: define{{.*}} void @_Z12typeid_testsv()
diff --git a/clang/test/CodeGenCXX/ibm128-declarations.cpp b/clang/test/CodeGenCXX/ibm128-declarations.cpp
index 5ee4f354d379..e0187e20cde4 100644
--- a/clang/test/CodeGenCXX/ibm128-declarations.cpp
+++ b/clang/test/CodeGenCXX/ibm128-declarations.cpp
@@ -107,13 +107,13 @@ int main(void) {
// CHECK: define dso_local noundef ppc_fp128 @_Z10func_vaargiz(i32 noundef signext %n, ...)
// CHECK: entry:
// CHECK: store i32 %n, ptr %n.addr, align 4
-// CHECK: call void @llvm.va_start(ptr %ap)
+// CHECK: call void @llvm.va_start.p0(ptr %ap)
// CHECK: %argp.cur = load ptr, ptr %ap, align 8
// CHECK: %argp.next = getelementptr inbounds i8, ptr %argp.cur, i64 16
// CHECK: store ptr %argp.next, ptr %ap, align 8
// CHECK: %0 = load ppc_fp128, ptr %argp.cur, align 8
// CHECK: store ppc_fp128 %0, ptr %r, align 16
-// CHECK: call void @llvm.va_end(ptr %ap)
+// CHECK: call void @llvm.va_end.p0(ptr %ap)
// CHECK: %1 = load ppc_fp128, ptr %r, align 16
// CHECK: ret ppc_fp128 %1
// CHECK: }
diff --git a/clang/test/CodeGenCXX/x86_64-vaarg.cpp b/clang/test/CodeGenCXX/x86_64-vaarg.cpp
index d221c1881d36..985a0cc41a14 100644
--- a/clang/test/CodeGenCXX/x86_64-vaarg.cpp
+++ b/clang/test/CodeGenCXX/x86_64-vaarg.cpp
@@ -11,7 +11,7 @@ typedef struct { struct {} a; } empty;
// CHECK-NEXT: [[TMP:%.*]] = alloca [[STRUCT_EMPTY]], align 1
// CHECK-NEXT: store i32 [[Z:%.*]], ptr [[Z_ADDR]], align 4
// CHECK-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [1 x %struct.__va_list_tag], ptr [[LIST]], i64 0, i64 0
-// CHECK-NEXT: call void @llvm.va_start(ptr [[ARRAYDECAY]])
+// CHECK-NEXT: call void @llvm.va_start.p0(ptr [[ARRAYDECAY]])
// CHECK-NEXT: [[ARRAYDECAY1:%.*]] = getelementptr inbounds [1 x %struct.__va_list_tag], ptr [[LIST]], i64 0, i64 0
// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[RETVAL]], ptr align 1 [[TMP]], i64 1, i1 false)
// CHECK-NEXT: ret void
@@ -34,7 +34,7 @@ typedef struct {
// CHECK-NEXT: [[LIST:%.*]] = alloca [1 x %struct.__va_list_tag], align 16
// CHECK-NEXT: store i32 [[Z:%.*]], ptr [[Z_ADDR]], align 4
// CHECK-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [1 x %struct.__va_list_tag], ptr [[LIST]], i64 0, i64 0
-// CHECK-NEXT: call void @llvm.va_start(ptr [[ARRAYDECAY]])
+// CHECK-NEXT: call void @llvm.va_start.p0(ptr [[ARRAYDECAY]])
// CHECK-NEXT: [[ARRAYDECAY1:%.*]] = getelementptr inbounds [1 x %struct.__va_list_tag], ptr [[LIST]], i64 0, i64 0
// CHECK-NEXT: [[FP_OFFSET_P:%.*]] = getelementptr inbounds [[STRUCT___VA_LIST_TAG:%.*]], ptr [[ARRAYDECAY1]], i32 0, i32 1
// CHECK-NEXT: [[FP_OFFSET:%.*]] = load i32, ptr [[FP_OFFSET_P]], align 4
diff --git a/clang/test/Driver/aarch64-sve.c b/clang/test/Driver/aarch64-sve.c
index f34b2700deb9..4a33c2e3c8d3 100644
--- a/clang/test/Driver/aarch64-sve.c
+++ b/clang/test/Driver/aarch64-sve.c
@@ -6,12 +6,11 @@
// RUN: %clang --target=aarch64 -march=armv8.6a -### -c %s 2>&1 | FileCheck -check-prefix=GENERICV8A-NOSVE %s
// GENERICV8A-NOSVE-NOT: "-target-feature" "+sve"
-// The 32-bit floating point matrix multiply extension is enabled by default
-// for armv8.6-a targets (or later) with SVE, and can optionally be enabled for
-// any target from armv8.2a onwards (we don't enforce not using it with earlier
-// targets).
+// The 32-bit floating point matrix multiply extension is an optional feature
+// that can be used for any target from armv8.2a and onwards. This can be
+// enabled using the `+f32mm` option.`.
// RUN: %clang --target=aarch64 -march=armv8.6a -### -c %s 2>&1 | FileCheck -check-prefix=NO-F32MM %s
-// RUN: %clang --target=aarch64 -march=armv8.6a+sve -### -c %s 2>&1 | FileCheck -check-prefix=F32MM %s
+// RUN: %clang --target=aarch64 -march=armv8.6a+sve+f32mm -### -c %s 2>&1 | FileCheck -check-prefix=F32MM %s
// RUN: %clang --target=aarch64 -march=armv8.5a+f32mm -### -c %s 2>&1 | FileCheck -check-prefix=F32MM %s
// NO-F32MM-NOT: "-target-feature" "+f32mm"
// F32MM: "-target-feature" "+f32mm"
diff --git a/clang/test/InstallAPI/Inputs/Umbrella/Umbrella.framework/Headers/AAA.h b/clang/test/InstallAPI/Inputs/Umbrella/Umbrella.framework/Headers/AAA.h
new file mode 100644
index 000000000000..993d5d4abadb
--- /dev/null
+++ b/clang/test/InstallAPI/Inputs/Umbrella/Umbrella.framework/Headers/AAA.h
@@ -0,0 +1,3 @@
+#ifndef PUBLIC_UMBRELLA_HEADER_FIRST
+#error "Public umbrella header was not included first!"
+#endif
diff --git a/clang/test/InstallAPI/Inputs/Umbrella/Umbrella.framework/Headers/SpecialUmbrella.h b/clang/test/InstallAPI/Inputs/Umbrella/Umbrella.framework/Headers/SpecialUmbrella.h
new file mode 100644
index 000000000000..2599ff14ae17
--- /dev/null
+++ b/clang/test/InstallAPI/Inputs/Umbrella/Umbrella.framework/Headers/SpecialUmbrella.h
@@ -0,0 +1 @@
+#define PUBLIC_UMBRELLA_HEADER_FIRST
diff --git a/clang/test/InstallAPI/Inputs/Umbrella/Umbrella.framework/PrivateHeaders/AAA_Private.h b/clang/test/InstallAPI/Inputs/Umbrella/Umbrella.framework/PrivateHeaders/AAA_Private.h
new file mode 100644
index 000000000000..557209bfeb86
--- /dev/null
+++ b/clang/test/InstallAPI/Inputs/Umbrella/Umbrella.framework/PrivateHeaders/AAA_Private.h
@@ -0,0 +1,3 @@
+#ifndef PRIVATE_UMBRELLA_HEADER_FIRST
+#error "Private umbrella header was not included first!"
+#endif
diff --git a/clang/test/InstallAPI/Inputs/Umbrella/Umbrella.framework/PrivateHeaders/SpecialPrivateUmbrella.h b/clang/test/InstallAPI/Inputs/Umbrella/Umbrella.framework/PrivateHeaders/SpecialPrivateUmbrella.h
new file mode 100644
index 000000000000..fd5b49b94316
--- /dev/null
+++ b/clang/test/InstallAPI/Inputs/Umbrella/Umbrella.framework/PrivateHeaders/SpecialPrivateUmbrella.h
@@ -0,0 +1 @@
+#define PRIVATE_UMBRELLA_HEADER_FIRST
diff --git a/clang/test/InstallAPI/umbrella-headers-unix.test b/clang/test/InstallAPI/umbrella-headers-unix.test
new file mode 100644
index 000000000000..46118779896c
--- /dev/null
+++ b/clang/test/InstallAPI/umbrella-headers-unix.test
@@ -0,0 +1,40 @@
+// UNSUPPORTED: system-windows
+
+; RUN: rm -rf %t
+; RUN: split-file %s %t
+; RUN: sed -e "s|DSTROOT|%/t|g" %t/inputs.json.in > %t/inputs.json
+; RUN: mkdir %t/Frameworks/
+; RUN: cp -r %S/Inputs/Umbrella/Umbrella.framework %t/Frameworks/
+
+// Only validate path based input that rely on regex matching on unix based file systems.
+; RUN: clang-installapi --target=arm64-apple-macosx13 \
+; RUN: -install_name /System/Library/Frameworks/Umbrella2.framework/Versions/A/Umbrella \
+; RUN: -ObjC -F%t/Frameworks/ %t/inputs.json \
+; RUN: --public-umbrella-header=%t/Frameworks/Umbrella.framework/Headers/SpecialUmbrella.h \
+; RUN: -private-umbrella-header \
+; RUN: %t/Frameworks/Umbrella.framework/PrivateHeaders/SpecialPrivateUmbrella.h \
+; RUN: -o %t/output.tbd 2>&1 | FileCheck -allow-empty %s
+
+; CHECK-NOT: error
+; CHECK-NOT: warning
+
+;--- inputs.json.in
+{
+ "headers": [ {
+ "path" : "DSTROOT/Frameworks/Umbrella.framework/Headers/AAA.h",
+ "type" : "public"
+ },
+ {
+ "path" : "DSTROOT/Frameworks/Umbrella.framework/Headers/SpecialUmbrella.h",
+ "type" : "public"
+ },
+ {
+ "path" : "DSTROOT/Frameworks/Umbrella.framework/PrivateHeaders/AAA_Private.h",
+ "type" : "private"
+ },
+ {
+ "path" : "DSTROOT/Frameworks/Umbrella.framework/PrivateHeaders/SpecialPrivateUmbrella.h",
+ "type" : "private"
+ }],
+ "version": "3"
+}
diff --git a/clang/test/InstallAPI/umbrella-headers.test b/clang/test/InstallAPI/umbrella-headers.test
new file mode 100644
index 000000000000..ce9c50608c41
--- /dev/null
+++ b/clang/test/InstallAPI/umbrella-headers.test
@@ -0,0 +1,48 @@
+; RUN: rm -rf %t
+; RUN: split-file %s %t
+; RUN: sed -e "s|DSTROOT|%/t|g" %t/inputs.json.in > %t/inputs.json
+; RUN: cp -r %S/Inputs/Umbrella/Umbrella.framework %t/Frameworks/
+
+// Check base filename matches.
+; RUN: clang-installapi --target=arm64-apple-macosx13 \
+; RUN: -install_name /System/Library/Frameworks/Umbrella.framework/Versions/A/Umbrella \
+; RUN: -ObjC -F%t/Frameworks/ %t/inputs.json \
+; RUN: --public-umbrella-header=SpecialUmbrella.h \
+; RUN: --private-umbrella-header=SpecialPrivateUmbrella.h \
+; RUN: -o %t/output.tbd 2>&1 | FileCheck -allow-empty %s
+
+// Try missing umbrella header argument.
+; RUN: not clang-installapi --target=arm64-apple-macosx13 \
+; RUN: -install_name /System/Library/Frameworks/Umbrella.framework/Versions/A/Umbrella \
+; RUN: -ObjC -F%t/Frameworks/ %t/inputs.json \
+; RUN: --public-umbrella-header=Ignore.h \
+; RUN: -o %t/output.tbd 2>&1 | FileCheck %s -check-prefix=ERR
+
+; ERR: error: public umbrella header file not found in input: 'Ignore.h'
+
+; CHECK-NOT: error
+; CHECK-NOT: warning
+
+;--- Frameworks/Umbrella.framework/Headers/Ignore.h
+#error "This header should be ignored"
+
+;--- inputs.json.in
+{
+ "headers": [ {
+ "path" : "DSTROOT/Frameworks/Umbrella.framework/Headers/AAA.h",
+ "type" : "public"
+ },
+ {
+ "path" : "DSTROOT/Frameworks/Umbrella.framework/Headers/SpecialUmbrella.h",
+ "type" : "public"
+ },
+ {
+ "path" : "DSTROOT/Frameworks/Umbrella.framework/PrivateHeaders/AAA_Private.h",
+ "type" : "private"
+ },
+ {
+ "path" : "DSTROOT/Frameworks/Umbrella.framework/PrivateHeaders/SpecialPrivateUmbrella.h",
+ "type" : "private"
+ }],
+ "version": "3"
+}
diff --git a/clang/test/Modules/codegen.test b/clang/test/Modules/codegen.test
index 77602056defd..0af630a75480 100644
--- a/clang/test/Modules/codegen.test
+++ b/clang/test/Modules/codegen.test
@@ -26,7 +26,7 @@ USE: $_Z4instIiEvv = comdat any
USE: $_Z10always_inlv = comdat any
FOO: $_ZN13implicit_dtorD2Ev = comdat any
FOO: define weak_odr void @_Z2f1PKcz(ptr noundef %fmt, ...) #{{[0-9]+}} comdat
-FOO: call void @llvm.va_start(ptr %{{[a-zA-Z0-9]*}})
+FOO: call void @llvm.va_start.p0(ptr %{{[a-zA-Z0-9]*}})
Test that implicit special members are emitted into the FOO module if they're
ODR used there, otherwise emit them linkonce_odr as usual in the use.
diff --git a/clang/test/Preprocessor/aarch64-target-features.c b/clang/test/Preprocessor/aarch64-target-features.c
index 9f8a8bdeeb9c..85762b7fed4d 100644
--- a/clang/test/Preprocessor/aarch64-target-features.c
+++ b/clang/test/Preprocessor/aarch64-target-features.c
@@ -196,7 +196,7 @@
// CHECK-8_6-NOT: __ARM_FEATURE_SHA3 1
// CHECK-8_6-NOT: __ARM_FEATURE_SM4 1
-// RUN: %clang -target aarch64-none-linux-gnu -march=armv8.6-a+sve -x c -E -dM %s -o - | FileCheck --check-prefix=CHECK-SVE-8_6 %s
+// RUN: %clang -target aarch64-none-linux-gnu -march=armv8.6-a+sve+f32mm -x c -E -dM %s -o - | FileCheck --check-prefix=CHECK-SVE-8_6 %s
// CHECK-SVE-8_6: __ARM_FEATURE_SVE 1
// CHECK-SVE-8_6: __ARM_FEATURE_SVE_BF16 1
// CHECK-SVE-8_6: __ARM_FEATURE_SVE_MATMUL_FP32 1
diff --git a/clang/test/SemaTemplate/concepts.cpp b/clang/test/SemaTemplate/concepts.cpp
index b7ea0d003a52..787cc809e253 100644
--- a/clang/test/SemaTemplate/concepts.cpp
+++ b/clang/test/SemaTemplate/concepts.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -std=c++20 -verify %s
+// RUN: %clang_cc1 -std=c++20 -ferror-limit 0 -verify %s
namespace PR47043 {
template<typename T> concept True = true;
@@ -1114,3 +1114,11 @@ void foo() {
}
} // namespace GH64808
+
+namespace GH86757_1 {
+template <typename...> concept b = false;
+template <typename> concept c = b<>;
+template <typename d> concept f = c< d >;
+template <f> struct e; // expected-note {{}}
+template <f d> struct e<d>; // expected-error {{class template partial specialization is not more specialized than the primary template}}
+}
diff --git a/clang/tools/clang-installapi/InstallAPIOpts.td b/clang/tools/clang-installapi/InstallAPIOpts.td
index ab9e1fe7f2f9..71532c9cf24d 100644
--- a/clang/tools/clang-installapi/InstallAPIOpts.td
+++ b/clang/tools/clang-installapi/InstallAPIOpts.td
@@ -61,3 +61,15 @@ def exclude_private_header : Separate<["-"], "exclude-private-header">,
HelpText<"Exclude private header from parsing">;
def exclude_private_header_EQ : Joined<["--"], "exclude-private-header=">,
Alias<exclude_private_header>;
+def public_umbrella_header : Separate<["-"], "public-umbrella-header">,
+ MetaVarName<"<path>">, HelpText<"Specify the public umbrella header location">;
+def public_umbrella_header_EQ : Joined<["--"], "public-umbrella-header=">,
+ Alias<public_umbrella_header>;
+def private_umbrella_header : Separate<["-"], "private-umbrella-header">,
+ MetaVarName<"<path>">, HelpText<"Specify the private umbrella header location">;
+def private_umbrella_header_EQ : Joined<["--"], "private-umbrella-header=">,
+ Alias<private_umbrella_header>;
+def project_umbrella_header : Separate<["-"], "project-umbrella-header">,
+ MetaVarName<"<path>">, HelpText<"Specify the project umbrella header location">;
+def project_umbrella_header_EQ : Joined<["--"], "project-umbrella-header=">,
+ Alias<project_umbrella_header>;
diff --git a/clang/tools/clang-installapi/Options.cpp b/clang/tools/clang-installapi/Options.cpp
index 4f79c62724a6..8e4a1b019fd8 100644
--- a/clang/tools/clang-installapi/Options.cpp
+++ b/clang/tools/clang-installapi/Options.cpp
@@ -270,6 +270,16 @@ Options::processAndFilterOutInstallAPIOptions(ArrayRef<const char *> Args) {
OPT_exclude_project_header))
return {};
+ // Handle umbrella headers.
+ if (const Arg *A = ParsedArgs.getLastArg(OPT_public_umbrella_header))
+ DriverOpts.PublicUmbrellaHeader = A->getValue();
+
+ if (const Arg *A = ParsedArgs.getLastArg(OPT_private_umbrella_header))
+ DriverOpts.PrivateUmbrellaHeader = A->getValue();
+
+ if (const Arg *A = ParsedArgs.getLastArg(OPT_project_umbrella_header))
+ DriverOpts.ProjectUmbrellaHeader = A->getValue();
+
/// Any unclaimed arguments should be forwarded to the clang driver.
std::vector<const char *> ClangDriverArgs(ParsedArgs.size());
for (const Arg *A : ParsedArgs) {
@@ -323,6 +333,15 @@ Options::Options(DiagnosticsEngine &Diag, FileManager *FM,
}
}
+static const Regex Rule("(.+)/(.+)\\.framework/");
+static StringRef getFrameworkNameFromInstallName(StringRef InstallName) {
+ SmallVector<StringRef, 3> Match;
+ Rule.match(InstallName, &Match);
+ if (Match.empty())
+ return "";
+ return Match.back();
+}
+
InstallAPIContext Options::createContext() {
InstallAPIContext Ctx;
Ctx.FM = FM;
@@ -339,6 +358,11 @@ InstallAPIContext Options::createContext() {
Ctx.OutputLoc = DriverOpts.OutputPath;
Ctx.LangMode = FEOpts.LangMode;
+ // Attempt to find umbrella headers by capturing framework name.
+ StringRef FrameworkName;
+ if (!LinkerOpts.IsDylib)
+ FrameworkName = getFrameworkNameFromInstallName(LinkerOpts.InstallName);
+
// Process inputs.
for (const std::string &ListPath : DriverOpts.FileLists) {
auto Buffer = FM->getBufferForFile(ListPath);
@@ -357,8 +381,7 @@ InstallAPIContext Options::createContext() {
assert(Type != HeaderType::Unknown && "Missing header type.");
for (const StringRef Path : Headers) {
if (!FM->getOptionalFileRef(Path)) {
- Diags->Report(diag::err_no_such_header_file)
- << Path << (unsigned)Type - 1;
+ Diags->Report(diag::err_no_such_header_file) << Path << (unsigned)Type;
return false;
}
SmallString<PATH_MAX> FullPath(Path);
@@ -382,6 +405,7 @@ InstallAPIContext Options::createContext() {
std::vector<std::unique_ptr<HeaderGlob>> ExcludedHeaderGlobs;
std::set<FileEntryRef> ExcludedHeaderFiles;
auto ParseGlobs = [&](const PathSeq &Paths, HeaderType Type) {
+ assert(Type != HeaderType::Unknown && "Missing header type.");
for (const StringRef Path : Paths) {
auto Glob = HeaderGlob::create(Path, Type);
if (Glob)
@@ -424,6 +448,57 @@ InstallAPIContext Options::createContext() {
if (!Glob->didMatch())
Diags->Report(diag::warn_glob_did_not_match) << Glob->str();
+ // Mark any explicit or inferred umbrella headers. If one exists, move
+ // that to the beginning of the input headers.
+ auto MarkandMoveUmbrellaInHeaders = [&](llvm::Regex &Regex,
+ HeaderType Type) -> bool {
+ auto It = find_if(Ctx.InputHeaders, [&Regex, Type](const HeaderFile &H) {
+ return (H.getType() == Type) && Regex.match(H.getPath());
+ });
+
+ if (It == Ctx.InputHeaders.end())
+ return false;
+ It->setUmbrellaHeader();
+
+ // Because there can be an umbrella header per header type,
+ // find the first non umbrella header to swap position with.
+ auto BeginPos = find_if(Ctx.InputHeaders, [](const HeaderFile &H) {
+ return !H.isUmbrellaHeader();
+ });
+ if (BeginPos != Ctx.InputHeaders.end() && BeginPos < It)
+ std::swap(*BeginPos, *It);
+ return true;
+ };
+
+ auto FindUmbrellaHeader = [&](StringRef HeaderPath, HeaderType Type) -> bool {
+ assert(Type != HeaderType::Unknown && "Missing header type.");
+ if (!HeaderPath.empty()) {
+ auto EscapedString = Regex::escape(HeaderPath);
+ Regex UmbrellaRegex(EscapedString);
+ if (!MarkandMoveUmbrellaInHeaders(UmbrellaRegex, Type)) {
+ Diags->Report(diag::err_no_such_umbrella_header_file)
+ << HeaderPath << (unsigned)Type;
+ return false;
+ }
+ } else if (!FrameworkName.empty() && (Type != HeaderType::Project)) {
+ auto UmbrellaName = "/" + Regex::escape(FrameworkName);
+ if (Type == HeaderType::Public)
+ UmbrellaName += "\\.h";
+ else
+ UmbrellaName += "[_]?Private\\.h";
+ Regex UmbrellaRegex(UmbrellaName);
+ MarkandMoveUmbrellaInHeaders(UmbrellaRegex, Type);
+ }
+ return true;
+ };
+ if (!FindUmbrellaHeader(DriverOpts.PublicUmbrellaHeader,
+ HeaderType::Public) ||
+ !FindUmbrellaHeader(DriverOpts.PrivateUmbrellaHeader,
+ HeaderType::Private) ||
+ !FindUmbrellaHeader(DriverOpts.ProjectUmbrellaHeader,
+ HeaderType::Project))
+ return Ctx;
+
// Parse binary dylib and initialize verifier.
if (DriverOpts.DylibToVerify.empty()) {
Ctx.Verifier = std::make_unique<DylibVerifier>();
diff --git a/clang/tools/clang-installapi/Options.h b/clang/tools/clang-installapi/Options.h
index c18309f69370..3671e4c8274b 100644
--- a/clang/tools/clang-installapi/Options.h
+++ b/clang/tools/clang-installapi/Options.h
@@ -31,6 +31,15 @@ struct DriverOptions {
/// \brief Path to input file lists (JSON).
llvm::MachO::PathSeq FileLists;
+ /// \brief Path to public umbrella header.
+ std::string PublicUmbrellaHeader;
+
+ /// \brief Path to private umbrella header.
+ std::string PrivateUmbrellaHeader;
+
+ /// \brief Path to project umbrella header.
+ std::string ProjectUmbrellaHeader;
+
/// \brief Paths of extra public headers.
PathSeq ExtraPublicHeaders;
diff --git a/clang/tools/libclang/CXType.cpp b/clang/tools/libclang/CXType.cpp
index 292d524f00ab..991767dc4c49 100644
--- a/clang/tools/libclang/CXType.cpp
+++ b/clang/tools/libclang/CXType.cpp
@@ -680,6 +680,7 @@ CXCallingConv clang_getFunctionTypeCallingConv(CXType X) {
TCALLINGCONV(PreserveAll);
TCALLINGCONV(M68kRTD);
TCALLINGCONV(PreserveNone);
+ TCALLINGCONV(RISCVVectorCall);
case CC_SpirFunction: return CXCallingConv_Unexposed;
case CC_AMDGPUKernelCall: return CXCallingConv_Unexposed;
case CC_OpenCLKernel: return CXCallingConv_Unexposed;
diff --git a/clang/utils/TableGen/RISCVVEmitter.cpp b/clang/utils/TableGen/RISCVVEmitter.cpp
index 8513174c88bf..5e41ef9f9d26 100644
--- a/clang/utils/TableGen/RISCVVEmitter.cpp
+++ b/clang/utils/TableGen/RISCVVEmitter.cpp
@@ -334,10 +334,6 @@ void RVVEmitter::createHeader(raw_ostream &OS) {
OS << "#include <stdint.h>\n";
OS << "#include <stddef.h>\n\n";
- OS << "#ifndef __riscv_vector\n";
- OS << "#error \"Vector intrinsics require the vector extension.\"\n";
- OS << "#endif\n\n";
-
OS << "#ifdef __cplusplus\n";
OS << "extern \"C\" {\n";
OS << "#endif\n\n";
diff --git a/compiler-rt/lib/scudo/standalone/tests/strings_test.cpp b/compiler-rt/lib/scudo/standalone/tests/strings_test.cpp
index e068c48fc97c..3e41f67ba922 100644
--- a/compiler-rt/lib/scudo/standalone/tests/strings_test.cpp
+++ b/compiler-rt/lib/scudo/standalone/tests/strings_test.cpp
@@ -141,7 +141,7 @@ TEST(ScudoStringsTest, CapacityIncreaseFails) {
// Test requires that the default length is at least 6 characters.
scudo::uptr MaxSize = Str.capacity();
- EXPECT_LE(6, MaxSize);
+ EXPECT_LE(6u, MaxSize);
for (size_t i = 0; i < MaxSize - 5; i++) {
Str.append("B");
diff --git a/flang/lib/Lower/OpenMP/ClauseProcessor.h b/flang/lib/Lower/OpenMP/ClauseProcessor.h
index c0c603feb296..d31d6a5c2062 100644
--- a/flang/lib/Lower/OpenMP/ClauseProcessor.h
+++ b/flang/lib/Lower/OpenMP/ClauseProcessor.h
@@ -51,7 +51,7 @@ public:
Fortran::semantics::SemanticsContext &semaCtx,
const Fortran::parser::OmpClauseList &clauses)
: converter(converter), semaCtx(semaCtx),
- clauses(makeList(clauses, semaCtx)) {}
+ clauses(makeClauses(clauses, semaCtx)) {}
// 'Unique' clauses: They can appear at most once in the clause list.
bool processCollapse(
diff --git a/flang/lib/Lower/OpenMP/Clauses.cpp b/flang/lib/Lower/OpenMP/Clauses.cpp
index f48e84f511a4..853dcd78e266 100644
--- a/flang/lib/Lower/OpenMP/Clauses.cpp
+++ b/flang/lib/Lower/OpenMP/Clauses.cpp
@@ -347,7 +347,7 @@ Aligned make(const parser::OmpClause::Aligned &inp,
return Aligned{{
/*Alignment=*/maybeApply(makeExprFn(semaCtx), t1),
- /*List=*/makeList(t0, semaCtx),
+ /*List=*/makeObjects(t0, semaCtx),
}};
}
@@ -362,7 +362,7 @@ Allocate make(const parser::OmpClause::Allocate &inp,
return Allocate{{/*AllocatorSimpleModifier=*/std::nullopt,
/*AllocatorComplexModifier=*/std::nullopt,
/*AlignModifier=*/std::nullopt,
- /*List=*/makeList(t1, semaCtx)}};
+ /*List=*/makeObjects(t1, semaCtx)}};
}
using Tuple = decltype(Allocate::t);
@@ -374,7 +374,7 @@ Allocate make(const parser::OmpClause::Allocate &inp,
return {/*AllocatorSimpleModifier=*/makeExpr(v.v, semaCtx),
/*AllocatorComplexModifier=*/std::nullopt,
/*AlignModifier=*/std::nullopt,
- /*List=*/makeList(t1, semaCtx)};
+ /*List=*/makeObjects(t1, semaCtx)};
},
// complex-modifier + align-modifier
[&](const wrapped::AllocateModifier::ComplexModifier &v) -> Tuple {
@@ -384,14 +384,14 @@ Allocate make(const parser::OmpClause::Allocate &inp,
/*AllocatorSimpleModifier=*/std::nullopt,
/*AllocatorComplexModifier=*/Allocator{makeExpr(s0.v, semaCtx)},
/*AlignModifier=*/Align{makeExpr(s1.v, semaCtx)},
- /*List=*/makeList(t1, semaCtx)};
+ /*List=*/makeObjects(t1, semaCtx)};
},
// align-modifier
[&](const wrapped::AllocateModifier::Align &v) -> Tuple {
return {/*AllocatorSimpleModifier=*/std::nullopt,
/*AllocatorComplexModifier=*/std::nullopt,
/*AlignModifier=*/Align{makeExpr(v.v, semaCtx)},
- /*List=*/makeList(t1, semaCtx)};
+ /*List=*/makeObjects(t1, semaCtx)};
},
},
t0->u)};
@@ -450,13 +450,13 @@ Collapse make(const parser::OmpClause::Collapse &inp,
Copyin make(const parser::OmpClause::Copyin &inp,
semantics::SemanticsContext &semaCtx) {
// inp.v -> parser::OmpObjectList
- return Copyin{/*List=*/makeList(inp.v, semaCtx)};
+ return Copyin{/*List=*/makeObjects(inp.v, semaCtx)};
}
Copyprivate make(const parser::OmpClause::Copyprivate &inp,
semantics::SemanticsContext &semaCtx) {
// inp.v -> parser::OmpObjectList
- return Copyprivate{/*List=*/makeList(inp.v, semaCtx)};
+ return Copyprivate{/*List=*/makeObjects(inp.v, semaCtx)};
}
Default make(const parser::OmpClause::Default &inp,
@@ -641,7 +641,7 @@ Doacross make(const parser::OmpClause::Doacross &inp,
Enter make(const parser::OmpClause::Enter &inp,
semantics::SemanticsContext &semaCtx) {
// inp.v -> parser::OmpObjectList
- return Enter{makeList(/*List=*/inp.v, semaCtx)};
+ return Enter{makeObjects(/*List=*/inp.v, semaCtx)};
}
Exclusive make(const parser::OmpClause::Exclusive &inp,
@@ -671,7 +671,7 @@ Final make(const parser::OmpClause::Final &inp,
Firstprivate make(const parser::OmpClause::Firstprivate &inp,
semantics::SemanticsContext &semaCtx) {
// inp.v -> parser::OmpObjectList
- return Firstprivate{/*List=*/makeList(inp.v, semaCtx)};
+ return Firstprivate{/*List=*/makeObjects(inp.v, semaCtx)};
}
// Flush: empty
@@ -681,7 +681,7 @@ From make(const parser::OmpClause::From &inp,
// inp.v -> parser::OmpObjectList
return From{{/*Expectation=*/std::nullopt, /*Mapper=*/std::nullopt,
/*Iterator=*/std::nullopt,
- /*LocatorList=*/makeList(inp.v, semaCtx)}};
+ /*LocatorList=*/makeObjects(inp.v, semaCtx)}};
}
// Full: empty
@@ -696,7 +696,7 @@ Grainsize make(const parser::OmpClause::Grainsize &inp,
HasDeviceAddr make(const parser::OmpClause::HasDeviceAddr &inp,
semantics::SemanticsContext &semaCtx) {
// inp.v -> parser::OmpObjectList
- return HasDeviceAddr{/*List=*/makeList(inp.v, semaCtx)};
+ return HasDeviceAddr{/*List=*/makeObjects(inp.v, semaCtx)};
}
Hint make(const parser::OmpClause::Hint &inp,
@@ -762,20 +762,20 @@ InReduction make(const parser::OmpClause::InReduction &inp,
auto &t1 = std::get<parser::OmpObjectList>(inp.v.t);
return InReduction{
{/*ReductionIdentifiers=*/{makeReductionOperator(t0, semaCtx)},
- /*List=*/makeList(t1, semaCtx)}};
+ /*List=*/makeObjects(t1, semaCtx)}};
}
IsDevicePtr make(const parser::OmpClause::IsDevicePtr &inp,
semantics::SemanticsContext &semaCtx) {
// inp.v -> parser::OmpObjectList
- return IsDevicePtr{/*List=*/makeList(inp.v, semaCtx)};
+ return IsDevicePtr{/*List=*/makeObjects(inp.v, semaCtx)};
}
Lastprivate make(const parser::OmpClause::Lastprivate &inp,
semantics::SemanticsContext &semaCtx) {
// inp.v -> parser::OmpObjectList
return Lastprivate{{/*LastprivateModifier=*/std::nullopt,
- /*List=*/makeList(inp.v, semaCtx)}};
+ /*List=*/makeObjects(inp.v, semaCtx)}};
}
Linear make(const parser::OmpClause::Linear &inp,
@@ -817,7 +817,7 @@ Linear make(const parser::OmpClause::Linear &inp,
Link make(const parser::OmpClause::Link &inp,
semantics::SemanticsContext &semaCtx) {
// inp.v -> parser::OmpObjectList
- return Link{/*List=*/makeList(inp.v, semaCtx)};
+ return Link{/*List=*/makeObjects(inp.v, semaCtx)};
}
Map make(const parser::OmpClause::Map &inp,
@@ -844,7 +844,7 @@ Map make(const parser::OmpClause::Map &inp,
if (!t0) {
return Map{{/*MapType=*/std::nullopt, /*MapTypeModifiers=*/std::nullopt,
/*Mapper=*/std::nullopt, /*Iterator=*/std::nullopt,
- /*LocatorList=*/makeList(t1, semaCtx)}};
+ /*LocatorList=*/makeObjects(t1, semaCtx)}};
}
auto &s0 = std::get<std::optional<parser::OmpMapType::Always>>(t0->t);
@@ -857,7 +857,7 @@ Map make(const parser::OmpClause::Map &inp,
return Map{{/*MapType=*/convert1(s1),
/*MapTypeModifiers=*/maybeList,
/*Mapper=*/std::nullopt, /*Iterator=*/std::nullopt,
- /*LocatorList=*/makeList(t1, semaCtx)}};
+ /*LocatorList=*/makeObjects(t1, semaCtx)}};
}
// Match: incomplete
@@ -980,7 +980,7 @@ Priority make(const parser::OmpClause::Priority &inp,
Private make(const parser::OmpClause::Private &inp,
semantics::SemanticsContext &semaCtx) {
// inp.v -> parser::OmpObjectList
- return Private{/*List=*/makeList(inp.v, semaCtx)};
+ return Private{/*List=*/makeObjects(inp.v, semaCtx)};
}
ProcBind make(const parser::OmpClause::ProcBind &inp,
@@ -1010,7 +1010,7 @@ Reduction make(const parser::OmpClause::Reduction &inp,
return Reduction{
{/*ReductionIdentifiers=*/{makeReductionOperator(t0, semaCtx)},
/*ReductionModifier=*/std::nullopt,
- /*List=*/makeList(t1, semaCtx)}};
+ /*List=*/makeObjects(t1, semaCtx)}};
}
// Relaxed: empty
@@ -1104,7 +1104,7 @@ Severity make(const parser::OmpClause::Severity &inp,
Shared make(const parser::OmpClause::Shared &inp,
semantics::SemanticsContext &semaCtx) {
// inp.v -> parser::OmpObjectList
- return Shared{/*List=*/makeList(inp.v, semaCtx)};
+ return Shared{/*List=*/makeObjects(inp.v, semaCtx)};
}
// Simd: empty
@@ -1128,7 +1128,7 @@ TaskReduction make(const parser::OmpClause::TaskReduction &inp,
auto &t1 = std::get<parser::OmpObjectList>(inp.v.t);
return TaskReduction{
{/*ReductionIdentifiers=*/{makeReductionOperator(t0, semaCtx)},
- /*List=*/makeList(t1, semaCtx)}};
+ /*List=*/makeObjects(t1, semaCtx)}};
}
ThreadLimit make(const parser::OmpClause::ThreadLimit &inp,
@@ -1145,7 +1145,7 @@ To make(const parser::OmpClause::To &inp,
// inp.v -> parser::OmpObjectList
return To{{/*Expectation=*/std::nullopt, /*Mapper=*/std::nullopt,
/*Iterator=*/std::nullopt,
- /*LocatorList=*/makeList(inp.v, semaCtx)}};
+ /*LocatorList=*/makeObjects(inp.v, semaCtx)}};
}
// UnifiedAddress: empty
@@ -1175,13 +1175,13 @@ Use make(const parser::OmpClause::Use &inp,
UseDeviceAddr make(const parser::OmpClause::UseDeviceAddr &inp,
semantics::SemanticsContext &semaCtx) {
// inp.v -> parser::OmpObjectList
- return UseDeviceAddr{/*List=*/makeList(inp.v, semaCtx)};
+ return UseDeviceAddr{/*List=*/makeObjects(inp.v, semaCtx)};
}
UseDevicePtr make(const parser::OmpClause::UseDevicePtr &inp,
semantics::SemanticsContext &semaCtx) {
// inp.v -> parser::OmpObjectList
- return UseDevicePtr{/*List=*/makeList(inp.v, semaCtx)};
+ return UseDevicePtr{/*List=*/makeObjects(inp.v, semaCtx)};
}
UsesAllocators make(const parser::OmpClause::UsesAllocators &inp,
@@ -1205,8 +1205,8 @@ Clause makeClause(const Fortran::parser::OmpClause &cls,
cls.u);
}
-List<Clause> makeList(const parser::OmpClauseList &clauses,
- semantics::SemanticsContext &semaCtx) {
+List<Clause> makeClauses(const parser::OmpClauseList &clauses,
+ semantics::SemanticsContext &semaCtx) {
return makeList(clauses.v, [&](const parser::OmpClause &s) {
return makeClause(s, semaCtx);
});
diff --git a/flang/lib/Lower/OpenMP/Clauses.h b/flang/lib/Lower/OpenMP/Clauses.h
index af1318226e8c..3e776425c733 100644
--- a/flang/lib/Lower/OpenMP/Clauses.h
+++ b/flang/lib/Lower/OpenMP/Clauses.h
@@ -88,8 +88,8 @@ List<ResultTy> makeList(ContainerTy &&container, FunctionTy &&func) {
return v;
}
-inline ObjectList makeList(const parser::OmpObjectList &objects,
- semantics::SemanticsContext &semaCtx) {
+inline ObjectList makeObjects(const parser::OmpObjectList &objects,
+ semantics::SemanticsContext &semaCtx) {
return makeList(objects.v, makeObjectFn(semaCtx));
}
@@ -256,8 +256,8 @@ Clause makeClause(llvm::omp::Clause id, Specific &&specific,
Clause makeClause(const Fortran::parser::OmpClause &cls,
semantics::SemanticsContext &semaCtx);
-List<Clause> makeList(const parser::OmpClauseList &clauses,
- semantics::SemanticsContext &semaCtx);
+List<Clause> makeClauses(const parser::OmpClauseList &clauses,
+ semantics::SemanticsContext &semaCtx);
} // namespace Fortran::lower::omp
#endif // FORTRAN_LOWER_OPENMP_CLAUSES_H
diff --git a/flang/lib/Lower/OpenMP/DataSharingProcessor.h b/flang/lib/Lower/OpenMP/DataSharingProcessor.h
index 226abe96705e..1cbc825fd5e1 100644
--- a/flang/lib/Lower/OpenMP/DataSharingProcessor.h
+++ b/flang/lib/Lower/OpenMP/DataSharingProcessor.h
@@ -89,7 +89,7 @@ public:
Fortran::lower::SymMap *symTable = nullptr)
: hasLastPrivateOp(false), converter(converter),
firOpBuilder(converter.getFirOpBuilder()),
- clauses(omp::makeList(opClauseList, semaCtx)), eval(eval),
+ clauses(omp::makeClauses(opClauseList, semaCtx)), eval(eval),
useDelayedPrivatization(useDelayedPrivatization), symTable(symTable) {}
// Privatisation is split into two steps.
diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp
index 0cf2a8f97040..5defffd738b4 100644
--- a/flang/lib/Lower/OpenMP/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP/OpenMP.cpp
@@ -1254,7 +1254,7 @@ static mlir::omp::DeclareTargetDeviceType getDeclareTargetInfo(
if (const auto *objectList{
Fortran::parser::Unwrap<Fortran::parser::OmpObjectList>(spec.u)}) {
- ObjectList objects{makeList(*objectList, semaCtx)};
+ ObjectList objects{makeObjects(*objectList, semaCtx)};
// Case: declare target(func, var1, var2)
gatherFuncAndVarSyms(objects, mlir::omp::DeclareTargetCaptureClause::to,
symbolAndClause);
@@ -2352,7 +2352,7 @@ void Fortran::lower::genOpenMPReduction(
const Fortran::parser::OmpClauseList &clauseList) {
fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
- List<Clause> clauses{makeList(clauseList, semaCtx)};
+ List<Clause> clauses{makeClauses(clauseList, semaCtx)};
for (const Clause &clause : clauses) {
if (const auto &reductionClause =
diff --git a/libc/docs/dev/code_style.rst b/libc/docs/dev/code_style.rst
index e6fc6df5a0f6..22a18b7a4cc1 100644
--- a/libc/docs/dev/code_style.rst
+++ b/libc/docs/dev/code_style.rst
@@ -55,7 +55,7 @@ We define two kinds of macros:
* ``src/__support/macros/config.h`` - Important compiler and platform
features. Such macros can be used to produce portable code by
parameterizing compilation based on the presence or lack of a given
- feature. e.g., ``LIBC_HAS_BUILTIN``
+ feature. e.g., ``LIBC_HAS_FEATURE``
* ``src/__support/macros/attributes.h`` - Attributes for functions, types,
and variables. e.g., ``LIBC_UNUSED``
* ``src/__support/macros/optimization.h`` - Portable macros for performance
diff --git a/libc/src/__support/CPP/CMakeLists.txt b/libc/src/__support/CPP/CMakeLists.txt
index f76285be5219..84d01fe04516 100644
--- a/libc/src/__support/CPP/CMakeLists.txt
+++ b/libc/src/__support/CPP/CMakeLists.txt
@@ -18,7 +18,6 @@ add_header_library(
.limits
.type_traits
libc.src.__support.macros.attributes
- libc.src.__support.macros.config
libc.src.__support.macros.sanitizer
)
@@ -157,7 +156,6 @@ add_header_library(
DEPENDS
libc.include.llvm-libc-macros.stdfix_macros
libc.src.__support.macros.attributes
- libc.src.__support.macros.config
libc.src.__support.macros.properties.types
)
diff --git a/libc/src/__support/CPP/atomic.h b/libc/src/__support/CPP/atomic.h
index b74cb5981dba..5e428940565b 100644
--- a/libc/src/__support/CPP/atomic.h
+++ b/libc/src/__support/CPP/atomic.h
@@ -71,10 +71,11 @@ public:
T load(MemoryOrder mem_ord = MemoryOrder::SEQ_CST,
[[maybe_unused]] MemoryScope mem_scope = MemoryScope::DEVICE) {
- if constexpr (LIBC_HAS_BUILTIN(__scoped_atomic_load_n))
- return __scoped_atomic_load_n(&val, int(mem_ord), (int)(mem_scope));
- else
- return __atomic_load_n(&val, int(mem_ord));
+#if __has_builtin(__scoped_atomic_load_n)
+ return __scoped_atomic_load_n(&val, int(mem_ord), (int)(mem_scope));
+#else
+ return __atomic_load_n(&val, int(mem_ord));
+#endif
}
// Atomic store.
@@ -85,10 +86,11 @@ public:
void store(T rhs, MemoryOrder mem_ord = MemoryOrder::SEQ_CST,
[[maybe_unused]] MemoryScope mem_scope = MemoryScope::DEVICE) {
- if constexpr (LIBC_HAS_BUILTIN(__scoped_atomic_store_n))
- __scoped_atomic_store_n(&val, rhs, int(mem_ord), (int)(mem_scope));
- else
- __atomic_store_n(&val, rhs, int(mem_ord));
+#if __has_builtin(__scoped_atomic_store_n)
+ __scoped_atomic_store_n(&val, rhs, int(mem_ord), (int)(mem_scope));
+#else
+ __atomic_store_n(&val, rhs, int(mem_ord));
+#endif
}
// Atomic compare exchange
@@ -101,47 +103,51 @@ public:
T exchange(T desired, MemoryOrder mem_ord = MemoryOrder::SEQ_CST,
[[maybe_unused]] MemoryScope mem_scope = MemoryScope::DEVICE) {
- if constexpr (LIBC_HAS_BUILTIN(__scoped_atomic_exchange_n))
- return __scoped_atomic_exchange_n(&val, desired, int(mem_ord),
- (int)(mem_scope));
- else
- return __atomic_exchange_n(&val, desired, int(mem_ord));
+#if __has_builtin(__scoped_atomic_exchange_n)
+ return __scoped_atomic_exchange_n(&val, desired, int(mem_ord),
+ (int)(mem_scope));
+#else
+ return __atomic_exchange_n(&val, desired, int(mem_ord));
+#endif
}
T fetch_add(T increment, MemoryOrder mem_ord = MemoryOrder::SEQ_CST,
[[maybe_unused]] MemoryScope mem_scope = MemoryScope::DEVICE) {
- if constexpr (LIBC_HAS_BUILTIN(__scoped_atomic_fetch_add))
- return __scoped_atomic_fetch_add(&val, increment, int(mem_ord),
- (int)(mem_scope));
- else
- return __atomic_fetch_add(&val, increment, int(mem_ord));
+#if __has_builtin(__scoped_atomic_fetch_add)
+ return __scoped_atomic_fetch_add(&val, increment, int(mem_ord),
+ (int)(mem_scope));
+#else
+ return __atomic_fetch_add(&val, increment, int(mem_ord));
+#endif
}
T fetch_or(T mask, MemoryOrder mem_ord = MemoryOrder::SEQ_CST,
[[maybe_unused]] MemoryScope mem_scope = MemoryScope::DEVICE) {
- if constexpr (LIBC_HAS_BUILTIN(__scoped_atomic_fetch_or))
- return __scoped_atomic_fetch_or(&val, mask, int(mem_ord),
- (int)(mem_scope));
- else
- return __atomic_fetch_or(&val, mask, int(mem_ord));
+#if __has_builtin(__scoped_atomic_fetch_or)
+ return __scoped_atomic_fetch_or(&val, mask, int(mem_ord), (int)(mem_scope));
+#else
+ return __atomic_fetch_or(&val, mask, int(mem_ord));
+#endif
}
T fetch_and(T mask, MemoryOrder mem_ord = MemoryOrder::SEQ_CST,
[[maybe_unused]] MemoryScope mem_scope = MemoryScope::DEVICE) {
- if constexpr (LIBC_HAS_BUILTIN(__scoped_atomic_fetch_and))
- return __scoped_atomic_fetch_and(&val, mask, int(mem_ord),
- (int)(mem_scope));
- else
- return __atomic_fetch_and(&val, mask, int(mem_ord));
+#if __has_builtin(__scoped_atomic_fetch_and)
+ return __scoped_atomic_fetch_and(&val, mask, int(mem_ord),
+ (int)(mem_scope));
+#else
+ return __atomic_fetch_and(&val, mask, int(mem_ord));
+#endif
}
T fetch_sub(T decrement, MemoryOrder mem_ord = MemoryOrder::SEQ_CST,
[[maybe_unused]] MemoryScope mem_scope = MemoryScope::DEVICE) {
- if constexpr (LIBC_HAS_BUILTIN(__scoped_atomic_fetch_sub))
- return __scoped_atomic_fetch_sub(&val, decrement, int(mem_ord),
- (int)(mem_scope));
- else
- return __atomic_fetch_sub(&val, decrement, int(mem_ord));
+#if __has_builtin(__scoped_atomic_fetch_sub)
+ return __scoped_atomic_fetch_sub(&val, decrement, int(mem_ord),
+ (int)(mem_scope));
+#else
+ return __atomic_fetch_sub(&val, decrement, int(mem_ord));
+#endif
}
// Set the value without using an atomic operation. This is useful
@@ -166,7 +172,7 @@ LIBC_INLINE void atomic_thread_fence([[maybe_unused]] MemoryOrder mem_ord) {
// except no instructions for memory ordering are issued. Only reordering of
// the instructions by the compiler is suppressed as order instructs.
LIBC_INLINE void atomic_signal_fence([[maybe_unused]] MemoryOrder mem_ord) {
-#if LIBC_HAS_BUILTIN(__atomic_signal_fence)
+#if __has_builtin(__atomic_signal_fence)
__atomic_signal_fence(static_cast<int>(mem_ord));
#else
// if the builtin is not ready, use asm as a full compiler barrier.
diff --git a/libc/src/__support/CPP/bit.h b/libc/src/__support/CPP/bit.h
index 3f2fbec94405..80f50fd221ef 100644
--- a/libc/src/__support/CPP/bit.h
+++ b/libc/src/__support/CPP/bit.h
@@ -14,14 +14,13 @@
#include "src/__support/CPP/limits.h" // numeric_limits
#include "src/__support/CPP/type_traits.h"
#include "src/__support/macros/attributes.h"
-#include "src/__support/macros/config.h" // LIBC_HAS_BUILTIN
#include "src/__support/macros/sanitizer.h"
#include <stdint.h>
namespace LIBC_NAMESPACE::cpp {
-#if LIBC_HAS_BUILTIN(__builtin_memcpy_inline)
+#if __has_builtin(__builtin_memcpy_inline)
#define LLVM_LIBC_HAS_BUILTIN_MEMCPY_INLINE
#endif
@@ -36,20 +35,20 @@ LIBC_INLINE constexpr cpp::enable_if_t<
To>
bit_cast(const From &from) {
MSAN_UNPOISON(&from, sizeof(From));
-#if LIBC_HAS_BUILTIN(__builtin_bit_cast)
+#if __has_builtin(__builtin_bit_cast)
return __builtin_bit_cast(To, from);
#else
To to;
char *dst = reinterpret_cast<char *>(&to);
const char *src = reinterpret_cast<const char *>(&from);
-#if LIBC_HAS_BUILTIN(__builtin_memcpy_inline)
+#if __has_builtin(__builtin_memcpy_inline)
__builtin_memcpy_inline(dst, src, sizeof(To));
#else
for (unsigned i = 0; i < sizeof(To); ++i)
dst[i] = src[i];
-#endif // LIBC_HAS_BUILTIN(__builtin_memcpy_inline)
+#endif // __has_builtin(__builtin_memcpy_inline)
return to;
-#endif // LIBC_HAS_BUILTIN(__builtin_bit_cast)
+#endif // __has_builtin(__builtin_bit_cast)
}
template <typename T>
@@ -94,7 +93,7 @@ countr_zero(T value) {
}
return zero_bits;
}
-#if LIBC_HAS_BUILTIN(__builtin_ctzs)
+#if __has_builtin(__builtin_ctzs)
ADD_SPECIALIZATION(countr_zero, unsigned short, __builtin_ctzs)
#endif
ADD_SPECIALIZATION(countr_zero, unsigned int, __builtin_ctz)
@@ -124,7 +123,7 @@ countl_zero(T value) {
}
return zero_bits;
}
-#if LIBC_HAS_BUILTIN(__builtin_clzs)
+#if __has_builtin(__builtin_clzs)
ADD_SPECIALIZATION(countl_zero, unsigned short, __builtin_clzs)
#endif
ADD_SPECIALIZATION(countl_zero, unsigned int, __builtin_clz)
@@ -242,6 +241,14 @@ LIBC_INLINE constexpr To bit_or_static_cast(const From &from) {
/// Count number of 1's aka population count or Hamming weight.
///
/// Only unsigned integral types are allowed.
+// clang-19+, gcc-14+
+#if __has_builtin(__builtin_popcountg)
+template <typename T>
+[[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t<cpp::is_unsigned_v<T>, int>
+popcount(T value) {
+ return __builtin_popcountg(value);
+}
+#else // !__has_builtin(__builtin_popcountg)
template <typename T>
[[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t<cpp::is_unsigned_v<T>, int>
popcount(T value) {
@@ -261,7 +268,7 @@ ADD_SPECIALIZATION(unsigned short, __builtin_popcount)
ADD_SPECIALIZATION(unsigned, __builtin_popcount)
ADD_SPECIALIZATION(unsigned long, __builtin_popcountl)
ADD_SPECIALIZATION(unsigned long long, __builtin_popcountll)
-// TODO: 128b specializations?
+#endif // __builtin_popcountg
#undef ADD_SPECIALIZATION
} // namespace LIBC_NAMESPACE::cpp
diff --git a/libc/src/__support/CPP/type_traits/add_pointer.h b/libc/src/__support/CPP/type_traits/add_pointer.h
index 72a764bb8ba6..1257033ee80e 100644
--- a/libc/src/__support/CPP/type_traits/add_pointer.h
+++ b/libc/src/__support/CPP/type_traits/add_pointer.h
@@ -10,7 +10,6 @@
#include "src/__support/CPP/type_traits/remove_reference.h"
#include "src/__support/CPP/type_traits/type_identity.h"
-#include "src/__support/macros/config.h"
namespace LIBC_NAMESPACE::cpp {
diff --git a/libc/src/__support/CPP/type_traits/decay.h b/libc/src/__support/CPP/type_traits/decay.h
index a018286fddd8..f1a1200ab2ba 100644
--- a/libc/src/__support/CPP/type_traits/decay.h
+++ b/libc/src/__support/CPP/type_traits/decay.h
@@ -9,7 +9,6 @@
#define LLVM_LIBC_SRC___SUPPORT_CPP_TYPE_TRAITS_DECAY_H
#include "src/__support/macros/attributes.h"
-#include "src/__support/macros/config.h"
#include "src/__support/CPP/type_traits/add_pointer.h"
#include "src/__support/CPP/type_traits/conditional.h"
diff --git a/libc/src/__support/CPP/type_traits/is_destructible.h b/libc/src/__support/CPP/type_traits/is_destructible.h
index d47de1cc797b..f94fe309ac8f 100644
--- a/libc/src/__support/CPP/type_traits/is_destructible.h
+++ b/libc/src/__support/CPP/type_traits/is_destructible.h
@@ -16,12 +16,11 @@
#include "src/__support/CPP/type_traits/true_type.h"
#include "src/__support/CPP/type_traits/type_identity.h"
#include "src/__support/macros/attributes.h"
-#include "src/__support/macros/config.h"
namespace LIBC_NAMESPACE::cpp {
// is_destructible
-#if LIBC_HAS_BUILTIN(__is_destructible)
+#if __has_builtin(__is_destructible)
template <typename T>
struct is_destructible : bool_constant<__is_destructible(T)> {};
#else
diff --git a/libc/src/__support/CPP/type_traits/is_function.h b/libc/src/__support/CPP/type_traits/is_function.h
index 557b3224484b..0eba5860ad60 100644
--- a/libc/src/__support/CPP/type_traits/is_function.h
+++ b/libc/src/__support/CPP/type_traits/is_function.h
@@ -12,12 +12,11 @@
#include "src/__support/CPP/type_traits/is_const.h"
#include "src/__support/CPP/type_traits/is_reference.h"
#include "src/__support/macros/attributes.h"
-#include "src/__support/macros/config.h"
namespace LIBC_NAMESPACE::cpp {
// is_function
-#if LIBC_HAS_BUILTIN(__is_function)
+#if __has_builtin(__is_function)
template <typename T>
struct is_function : integral_constant<bool, __is_function(T)> {};
#else
diff --git a/libc/src/__support/CPP/type_traits/is_lvalue_reference.h b/libc/src/__support/CPP/type_traits/is_lvalue_reference.h
index f52e303afad2..1dff57f186a3 100644
--- a/libc/src/__support/CPP/type_traits/is_lvalue_reference.h
+++ b/libc/src/__support/CPP/type_traits/is_lvalue_reference.h
@@ -12,12 +12,11 @@
#include "src/__support/CPP/type_traits/false_type.h"
#include "src/__support/CPP/type_traits/true_type.h"
#include "src/__support/macros/attributes.h"
-#include "src/__support/macros/config.h"
namespace LIBC_NAMESPACE::cpp {
// is_lvalue_reference
-#if LIBC_HAS_BUILTIN(__is_lvalue_reference)
+#if __has_builtin(__is_lvalue_reference)
template <typename T>
struct is_lvalue_reference : bool_constant<__is_lvalue_reference(T)> {};
#else
diff --git a/libc/src/__support/CPP/type_traits/is_reference.h b/libc/src/__support/CPP/type_traits/is_reference.h
index c017028edf41..bbfb2b7359c3 100644
--- a/libc/src/__support/CPP/type_traits/is_reference.h
+++ b/libc/src/__support/CPP/type_traits/is_reference.h
@@ -12,12 +12,11 @@
#include "src/__support/CPP/type_traits/false_type.h"
#include "src/__support/CPP/type_traits/true_type.h"
#include "src/__support/macros/attributes.h"
-#include "src/__support/macros/config.h"
namespace LIBC_NAMESPACE::cpp {
// is_reference
-#if LIBC_HAS_BUILTIN(__is_reference)
+#if __has_builtin(__is_reference)
template <typename T> struct is_reference : bool_constant<__is_reference(T)> {};
#else
template <typename T> struct is_reference : public false_type {};
diff --git a/libc/src/__support/CPP/type_traits/is_rvalue_reference.h b/libc/src/__support/CPP/type_traits/is_rvalue_reference.h
index f0487e41c998..3efbbe6b033a 100644
--- a/libc/src/__support/CPP/type_traits/is_rvalue_reference.h
+++ b/libc/src/__support/CPP/type_traits/is_rvalue_reference.h
@@ -12,12 +12,11 @@
#include "src/__support/CPP/type_traits/false_type.h"
#include "src/__support/CPP/type_traits/true_type.h"
#include "src/__support/macros/attributes.h"
-#include "src/__support/macros/config.h"
namespace LIBC_NAMESPACE::cpp {
// is_rvalue_reference
-#if LIBC_HAS_BUILTIN(__is_rvalue_reference)
+#if __has_builtin(__is_rvalue_reference)
template <typename T>
struct is_rvalue_reference : bool_constant<__is_rvalue_reference(T)> {};
#else
diff --git a/libc/src/__support/CPP/type_traits/is_trivially_copyable.h b/libc/src/__support/CPP/type_traits/is_trivially_copyable.h
index 0c3fdcc711d5..b4c825d57961 100644
--- a/libc/src/__support/CPP/type_traits/is_trivially_copyable.h
+++ b/libc/src/__support/CPP/type_traits/is_trivially_copyable.h
@@ -9,7 +9,6 @@
#define LLVM_LIBC_SRC___SUPPORT_CPP_TYPE_TRAITS_IS_TRIVIALLY_COPYABLE_H
#include "src/__support/CPP/type_traits/integral_constant.h"
-#include "src/__support/macros/config.h"
namespace LIBC_NAMESPACE::cpp {
diff --git a/libc/src/__support/CPP/type_traits/is_trivially_destructible.h b/libc/src/__support/CPP/type_traits/is_trivially_destructible.h
index 3345149433af..37e0e869266e 100644
--- a/libc/src/__support/CPP/type_traits/is_trivially_destructible.h
+++ b/libc/src/__support/CPP/type_traits/is_trivially_destructible.h
@@ -11,12 +11,11 @@
#include "src/__support/CPP/type_traits/bool_constant.h"
#include "src/__support/CPP/type_traits/is_destructible.h"
#include "src/__support/macros/attributes.h"
-#include "src/__support/macros/config.h"
namespace LIBC_NAMESPACE::cpp {
// is_trivially_destructible
-#if LIBC_HAS_BUILTIN(__is_trivially_destructible)
+#if __has_builtin(__is_trivially_destructible)
template <typename T>
struct is_trivially_destructible
: public bool_constant<__is_trivially_destructible(T)> {};
@@ -25,7 +24,7 @@ template <typename T>
struct is_trivially_destructible
: public bool_constant<cpp::is_destructible_v<T> &&__has_trivial_destructor(
T)> {};
-#endif // LIBC_HAS_BUILTIN(__is_trivially_destructible)
+#endif // __has_builtin(__is_trivially_destructible)
template <typename T>
LIBC_INLINE_VAR constexpr bool is_trivially_destructible_v =
is_trivially_destructible<T>::value;
diff --git a/libc/src/__support/CPP/type_traits/remove_all_extents.h b/libc/src/__support/CPP/type_traits/remove_all_extents.h
index bff6341d3e45..5941b82bbc16 100644
--- a/libc/src/__support/CPP/type_traits/remove_all_extents.h
+++ b/libc/src/__support/CPP/type_traits/remove_all_extents.h
@@ -9,14 +9,13 @@
#define LLVM_LIBC_SRC___SUPPORT_CPP_TYPE_TRAITS_REMOVE_ALL_EXTENTS_H
#include "src/__support/CPP/type_traits/type_identity.h"
-#include "src/__support/macros/config.h"
#include <stddef.h> // size_t
namespace LIBC_NAMESPACE::cpp {
// remove_all_extents
-#if LIBC_HAS_BUILTIN(__remove_all_extents)
+#if __has_builtin(__remove_all_extents)
template <typename T> using remove_all_extents_t = __remove_all_extents(T);
template <typename T>
struct remove_all_extents : cpp::type_identity<remove_all_extents_t<T>> {};
diff --git a/libc/src/__support/FPUtil/CMakeLists.txt b/libc/src/__support/FPUtil/CMakeLists.txt
index 0f4350234197..ff155a19758d 100644
--- a/libc/src/__support/FPUtil/CMakeLists.txt
+++ b/libc/src/__support/FPUtil/CMakeLists.txt
@@ -6,7 +6,6 @@ add_header_library(
libc.include.fenv
libc.include.math
libc.src.__support.macros.attributes
- libc.src.__support.macros.config
libc.src.errno.errno
)
diff --git a/libc/src/__support/FPUtil/FEnvImpl.h b/libc/src/__support/FPUtil/FEnvImpl.h
index a6a533dcfdf4..6086d5d3de2d 100644
--- a/libc/src/__support/FPUtil/FEnvImpl.h
+++ b/libc/src/__support/FPUtil/FEnvImpl.h
@@ -11,7 +11,6 @@
#include "include/llvm-libc-macros/math-macros.h"
#include "src/__support/macros/attributes.h" // LIBC_INLINE
-#include "src/__support/macros/config.h" // LIBC_HAS_BUILTIN
#include "src/__support/macros/properties/architectures.h"
#include "src/errno/libc_errno.h"
#include <fenv.h>
diff --git a/libc/src/__support/FPUtil/gpu/FMA.h b/libc/src/__support/FPUtil/gpu/FMA.h
index 86bc86031496..ef1cd26a72dd 100644
--- a/libc/src/__support/FPUtil/gpu/FMA.h
+++ b/libc/src/__support/FPUtil/gpu/FMA.h
@@ -10,12 +10,12 @@
#define LLVM_LIBC_SRC___SUPPORT_FPUTIL_GPU_FMA_H
#include "src/__support/CPP/type_traits.h"
-#include "src/__support/macros/config.h"
-// These intrinsics map to the FMA instrunctions in the target ISA for the GPU.
+// These intrinsics map to the FMA instructions in the target ISA for the GPU.
// The default rounding mode generated from these will be to the nearest even.
-static_assert(LIBC_HAS_BUILTIN(__builtin_fma), "FMA builtins must be defined");
-static_assert(LIBC_HAS_BUILTIN(__builtin_fmaf), "FMA builtins must be defined");
+#if !__has_builtin(__builtin_fma) || !__has_builtin(__builtin_fmaf)
+#error "FMA builtins must be defined");
+#endif
namespace LIBC_NAMESPACE {
namespace fputil {
diff --git a/libc/src/__support/UInt.h b/libc/src/__support/UInt.h
index df01e081e3c1..282efdba1c5f 100644
--- a/libc/src/__support/UInt.h
+++ b/libc/src/__support/UInt.h
@@ -1082,6 +1082,17 @@ bit_cast(const UInt<Bits> &from) {
return cpp::bit_cast<To>(from.val);
}
+// Specialization of cpp::popcount ('bit.h') for BigInt.
+template <typename T>
+[[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t<is_big_int_v<T>, int>
+popcount(T value) {
+ int bits = 0;
+ for (auto word : value.val)
+ if (word)
+ bits += popcount(word);
+ return bits;
+}
+
// Specialization of cpp::has_single_bit ('bit.h') for BigInt.
template <typename T>
[[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t<is_big_int_v<T>, bool>
@@ -1218,6 +1229,49 @@ LIBC_INLINE constexpr cpp::enable_if_t<is_big_int_v<T>, T> mask_leading_ones() {
return out;
}
+// Specialization of count_zeros ('math_extras.h') for BigInt.
+template <typename T>
+[[nodiscard]]
+LIBC_INLINE constexpr cpp::enable_if_t<is_big_int_v<T>, int>
+count_zeros(T value) {
+ return cpp::popcount(~value);
+}
+
+// Specialization of first_leading_zero ('math_extras.h') for BigInt.
+template <typename T>
+[[nodiscard]]
+LIBC_INLINE constexpr cpp::enable_if_t<is_big_int_v<T>, int>
+first_leading_zero(T value) {
+ return value == cpp::numeric_limits<T>::max() ? 0
+ : cpp::countl_one(value) + 1;
+}
+
+// Specialization of first_leading_one ('math_extras.h') for BigInt.
+template <typename T>
+[[nodiscard]]
+LIBC_INLINE constexpr cpp::enable_if_t<is_big_int_v<T>, int>
+first_leading_one(T value) {
+ return first_leading_zero(~value);
+}
+
+// Specialization of first_trailing_zero ('math_extras.h') for BigInt.
+template <typename T>
+[[nodiscard]]
+LIBC_INLINE constexpr cpp::enable_if_t<is_big_int_v<T>, int>
+first_trailing_zero(T value) {
+ return value == cpp::numeric_limits<T>::max() ? 0
+ : cpp::countr_zero(~value) + 1;
+}
+
+// Specialization of first_trailing_one ('math_extras.h') for BigInt.
+template <typename T>
+[[nodiscard]]
+LIBC_INLINE constexpr cpp::enable_if_t<is_big_int_v<T>, int>
+first_trailing_one(T value) {
+ return value == cpp::numeric_limits<T>::max() ? 0
+ : cpp::countr_zero(value) + 1;
+}
+
} // namespace LIBC_NAMESPACE
#endif // LLVM_LIBC_SRC___SUPPORT_UINT_H
diff --git a/libc/src/__support/macros/config.h b/libc/src/__support/macros/config.h
index 6666c1366696..3f200f0d62ba 100644
--- a/libc/src/__support/macros/config.h
+++ b/libc/src/__support/macros/config.h
@@ -13,24 +13,6 @@
#ifndef LLVM_LIBC_SRC___SUPPORT_MACROS_CONFIG_H
#define LLVM_LIBC_SRC___SUPPORT_MACROS_CONFIG_H
-// LIBC_HAS_BUILTIN()
-//
-// Checks whether the compiler supports a Clang Feature Checking Macro, and if
-// so, checks whether it supports the provided builtin function "x" where x
-// is one of the functions noted in
-// https://clang.llvm.org/docs/LanguageExtensions.html
-//
-// Note: Use this macro to avoid an extra level of #ifdef __has_builtin check.
-// http://releases.llvm.org/3.3/tools/clang/docs/LanguageExtensions.html
-
-// Compiler builtin-detection.
-// clang.llvm.org/docs/LanguageExtensions.html#has-builtin
-#ifdef __has_builtin
-#define LIBC_HAS_BUILTIN(x) __has_builtin(x)
-#else
-#define LIBC_HAS_BUILTIN(x) 0
-#endif
-
// Compiler feature-detection.
// clang.llvm.org/docs/LanguageExtensions.html#has-feature-and-has-extension
#ifdef __has_feature
diff --git a/libc/src/__support/macros/optimization.h b/libc/src/__support/macros/optimization.h
index ae97efcaa417..59886ca44be1 100644
--- a/libc/src/__support/macros/optimization.h
+++ b/libc/src/__support/macros/optimization.h
@@ -11,7 +11,6 @@
#define LLVM_LIBC_SRC___SUPPORT_MACROS_OPTIMIZATION_H
#include "src/__support/macros/attributes.h" // LIBC_INLINE
-#include "src/__support/macros/config.h" // LIBC_HAS_BUILTIN
#include "src/__support/macros/properties/compiler.h" // LIBC_COMPILER_IS_CLANG
// We use a template to implement likely/unlikely to make sure that we don't
diff --git a/libc/src/__support/macros/sanitizer.h b/libc/src/__support/macros/sanitizer.h
index fc66c2005c42..bd9b62b7121a 100644
--- a/libc/src/__support/macros/sanitizer.h
+++ b/libc/src/__support/macros/sanitizer.h
@@ -47,8 +47,7 @@
// Functions to unpoison memory
//-----------------------------------------------------------------------------
-#if defined(LIBC_HAVE_MEMORY_SANITIZER) && \
- LIBC_HAS_BUILTIN(__builtin_constant_p)
+#if defined(LIBC_HAVE_MEMORY_SANITIZER) && __has_builtin(__builtin_constant_p)
// Only perform MSAN unpoison in non-constexpr context.
#include <sanitizer/msan_interface.h>
#define MSAN_UNPOISON(addr, size) \
diff --git a/libc/src/__support/math_extras.h b/libc/src/__support/math_extras.h
index 28ee1be8b999..70a8800b285d 100644
--- a/libc/src/__support/math_extras.h
+++ b/libc/src/__support/math_extras.h
@@ -14,7 +14,6 @@
#include "src/__support/CPP/limits.h" // CHAR_BIT, numeric_limits
#include "src/__support/CPP/type_traits.h" // is_unsigned_v
#include "src/__support/macros/attributes.h" // LIBC_INLINE
-#include "src/__support/macros/config.h" // LIBC_HAS_BUILTIN
namespace LIBC_NAMESPACE {
@@ -61,7 +60,7 @@ add_with_carry(T a, T b, T carry_in) {
return add_with_carry_const<T>(a, b, carry_in);
}
-#if LIBC_HAS_BUILTIN(__builtin_addc)
+#if __has_builtin(__builtin_addc)
// https://clang.llvm.org/docs/LanguageExtensions.html#multiprecision-arithmetic-builtins
template <>
@@ -129,7 +128,7 @@ add_with_carry<unsigned long long>(unsigned long long a, unsigned long long b,
}
}
-#endif // LIBC_HAS_BUILTIN(__builtin_addc)
+#endif // __has_builtin(__builtin_addc)
// Subtract with borrow
template <typename T> struct DiffBorrow {
@@ -157,7 +156,7 @@ sub_with_borrow(T a, T b, T borrow_in) {
return sub_with_borrow_const<T>(a, b, borrow_in);
}
-#if LIBC_HAS_BUILTIN(__builtin_subc)
+#if __has_builtin(__builtin_subc)
// https://clang.llvm.org/docs/LanguageExtensions.html#multiprecision-arithmetic-builtins
template <>
@@ -225,7 +224,7 @@ sub_with_borrow<unsigned long long>(unsigned long long a, unsigned long long b,
}
}
-#endif // LIBC_HAS_BUILTIN(__builtin_subc)
+#endif // __has_builtin(__builtin_subc)
template <typename T>
[[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t<cpp::is_unsigned_v<T>, int>
diff --git a/libc/src/__support/memory_size.h b/libc/src/__support/memory_size.h
index 7bd16a1695be..491123bbabf3 100644
--- a/libc/src/__support/memory_size.h
+++ b/libc/src/__support/memory_size.h
@@ -19,7 +19,7 @@
namespace LIBC_NAMESPACE {
namespace internal {
template <class T> LIBC_INLINE bool mul_overflow(T a, T b, T *res) {
-#if LIBC_HAS_BUILTIN(__builtin_mul_overflow)
+#if __has_builtin(__builtin_mul_overflow)
return __builtin_mul_overflow(a, b, res);
#else
T max = cpp::numeric_limits<T>::max();
diff --git a/libc/src/string/memory_utils/generic/builtin.h b/libc/src/string/memory_utils/generic/builtin.h
index 5239329f653b..ba4f4b898408 100644
--- a/libc/src/string/memory_utils/generic/builtin.h
+++ b/libc/src/string/memory_utils/generic/builtin.h
@@ -10,16 +10,16 @@
#define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_GENERIC_BUILTIN_H
#include "src/__support/macros/attributes.h" // LIBC_INLINE
-#include "src/__support/macros/config.h" // LIBC_HAS_BUILTIN
#include "src/string/memory_utils/utils.h" // Ptr, CPtr
#include <stddef.h> // size_t
namespace LIBC_NAMESPACE {
-static_assert(LIBC_HAS_BUILTIN(__builtin_memcpy), "Builtin not defined");
-static_assert(LIBC_HAS_BUILTIN(__builtin_memset), "Builtin not defined");
-static_assert(LIBC_HAS_BUILTIN(__builtin_memmove), "Builtin not defined");
+#if !__has_builtin(__builtin_memcpy) || !__has_builtin(__builtin_memset) || \
+ !__has_builtin(__builtin_memmove)
+#error "Builtin not defined");
+#endif
[[maybe_unused]] LIBC_INLINE void
inline_memcpy_builtin(Ptr dst, CPtr src, size_t count, size_t offset = 0) {
diff --git a/libc/src/string/memory_utils/utils.h b/libc/src/string/memory_utils/utils.h
index 79526d19c6b3..b3e1a26ad996 100644
--- a/libc/src/string/memory_utils/utils.h
+++ b/libc/src/string/memory_utils/utils.h
@@ -14,7 +14,6 @@
#include "src/__support/CPP/type_traits.h"
#include "src/__support/endian.h"
#include "src/__support/macros/attributes.h" // LIBC_INLINE
-#include "src/__support/macros/config.h" // LIBC_HAS_BUILTIN
#include "src/__support/macros/properties/architectures.h"
#include <stddef.h> // size_t
@@ -71,11 +70,11 @@ LIBC_INLINE bool is_disjoint(const void *p1, const void *p2, size_t size) {
return sdiff >= 0 ? size <= udiff : size <= neg_udiff;
}
-#if LIBC_HAS_BUILTIN(__builtin_memcpy_inline)
+#if __has_builtin(__builtin_memcpy_inline)
#define LLVM_LIBC_HAS_BUILTIN_MEMCPY_INLINE
#endif
-#if LIBC_HAS_BUILTIN(__builtin_memset_inline)
+#if __has_builtin(__builtin_memset_inline)
#define LLVM_LIBC_HAS_BUILTIN_MEMSET_INLINE
#endif
diff --git a/libc/test/src/__support/CPP/bit_test.cpp b/libc/test/src/__support/CPP/bit_test.cpp
index cee5b90c8f4b..875b47e6a198 100644
--- a/libc/test/src/__support/CPP/bit_test.cpp
+++ b/libc/test/src/__support/CPP/bit_test.cpp
@@ -15,13 +15,6 @@
namespace LIBC_NAMESPACE::cpp {
-using UnsignedTypesNoBigInt = testing::TypeList<
-#if defined(LIBC_TYPES_HAS_INT128)
- __uint128_t,
-#endif // LIBC_TYPES_HAS_INT128
- unsigned char, unsigned short, unsigned int, unsigned long,
- unsigned long long>;
-
using UnsignedTypes = testing::TypeList<
#if defined(LIBC_TYPES_HAS_INT128)
__uint128_t,
@@ -228,7 +221,7 @@ TEST(LlvmLibcBitTest, Rotr) {
rotr<uint64_t>(0x12345678deadbeefULL, -19));
}
-TYPED_TEST(LlvmLibcBitTest, CountOnes, UnsignedTypesNoBigInt) {
+TYPED_TEST(LlvmLibcBitTest, CountOnes, UnsignedTypes) {
EXPECT_EQ(popcount(T(0)), 0);
for (int i = 0; i != cpp::numeric_limits<T>::digits; ++i)
EXPECT_EQ(popcount<T>(cpp::numeric_limits<T>::max() >> i),
diff --git a/libc/test/src/__support/math_extras_test.cpp b/libc/test/src/__support/math_extras_test.cpp
index e642248881a4..e88b3e1d6b68 100644
--- a/libc/test/src/__support/math_extras_test.cpp
+++ b/libc/test/src/__support/math_extras_test.cpp
@@ -6,7 +6,7 @@
//
//===----------------------------------------------------------------------===//
-#include "src/__support/UInt128.h" // UInt128
+#include "src/__support/UInt128.h" // UInt<128>
#include "src/__support/integer_literals.h"
#include "src/__support/math_extras.h"
#include "test/UnitTest/Test.h"
@@ -19,7 +19,7 @@ using UnsignedTypesNoBigInt = testing::TypeList<
__uint128_t,
#endif // LIBC_TYPES_HAS_INT128
unsigned char, unsigned short, unsigned int, unsigned long,
- unsigned long long>;
+ unsigned long long, UInt<128>>;
TEST(LlvmLibcBlockMathExtrasTest, mask_trailing_ones) {
EXPECT_EQ(0_u8, (mask_leading_ones<uint8_t, 0>()));
diff --git a/libc/utils/gpu/server/rpc_server.cpp b/libc/utils/gpu/server/rpc_server.cpp
index 90af1569c4c5..46ad98fa02cc 100644
--- a/libc/utils/gpu/server/rpc_server.cpp
+++ b/libc/utils/gpu/server/rpc_server.cpp
@@ -6,6 +6,11 @@
//
//===----------------------------------------------------------------------===//
+// Workaround for missing __has_builtin in < GCC 10.
+#ifndef __has_builtin
+#define __has_builtin(x) 0
+#endif
+
#include "llvmlibc_rpc_server.h"
#include "src/__support/RPC/rpc.h"
diff --git a/libcxx/include/__algorithm/copy.h b/libcxx/include/__algorithm/copy.h
index 4c3815405af0..0890b895f540 100644
--- a/libcxx/include/__algorithm/copy.h
+++ b/libcxx/include/__algorithm/copy.h
@@ -32,7 +32,7 @@ template <class, class _InIter, class _Sent, class _OutIter>
inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_InIter, _OutIter> __copy(_InIter, _Sent, _OutIter);
template <class _AlgPolicy>
-struct __copy_loop {
+struct __copy_impl {
template <class _InIter, class _Sent, class _OutIter>
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_InIter, _OutIter>
operator()(_InIter __first, _Sent __last, _OutIter __result) const {
@@ -94,9 +94,7 @@ struct __copy_loop {
__local_first = _Traits::__begin(++__segment_iterator);
}
}
-};
-struct __copy_trivial {
// At this point, the iterators have been unwrapped so any `contiguous_iterator` has been unwrapped to a pointer.
template <class _In, class _Out, __enable_if_t<__can_lower_copy_assignment_to_memmove<_In, _Out>::value, int> = 0>
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_In*, _Out*>
@@ -108,7 +106,7 @@ struct __copy_trivial {
template <class _AlgPolicy, class _InIter, class _Sent, class _OutIter>
pair<_InIter, _OutIter> inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14
__copy(_InIter __first, _Sent __last, _OutIter __result) {
- return std::__dispatch_copy_or_move<_AlgPolicy, __copy_loop<_AlgPolicy>, __copy_trivial>(
+ return std::__copy_move_unwrap_iters<__copy_impl<_AlgPolicy> >(
std::move(__first), std::move(__last), std::move(__result));
}
diff --git a/libcxx/include/__algorithm/copy_backward.h b/libcxx/include/__algorithm/copy_backward.h
index 591dd21e2b03..73dc846a975a 100644
--- a/libcxx/include/__algorithm/copy_backward.h
+++ b/libcxx/include/__algorithm/copy_backward.h
@@ -33,7 +33,7 @@ _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_InIter, _OutIter>
__copy_backward(_InIter __first, _Sent __last, _OutIter __result);
template <class _AlgPolicy>
-struct __copy_backward_loop {
+struct __copy_backward_impl {
template <class _InIter, class _Sent, class _OutIter>
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_InIter, _OutIter>
operator()(_InIter __first, _Sent __last, _OutIter __result) const {
@@ -104,9 +104,7 @@ struct __copy_backward_loop {
__local_last = _Traits::__end(__segment_iterator);
}
}
-};
-struct __copy_backward_trivial {
// At this point, the iterators have been unwrapped so any `contiguous_iterator` has been unwrapped to a pointer.
template <class _In, class _Out, __enable_if_t<__can_lower_copy_assignment_to_memmove<_In, _Out>::value, int> = 0>
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_In*, _Out*>
@@ -118,7 +116,7 @@ struct __copy_backward_trivial {
template <class _AlgPolicy, class _BidirectionalIterator1, class _Sentinel, class _BidirectionalIterator2>
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_BidirectionalIterator1, _BidirectionalIterator2>
__copy_backward(_BidirectionalIterator1 __first, _Sentinel __last, _BidirectionalIterator2 __result) {
- return std::__dispatch_copy_or_move<_AlgPolicy, __copy_backward_loop<_AlgPolicy>, __copy_backward_trivial>(
+ return std::__copy_move_unwrap_iters<__copy_backward_impl<_AlgPolicy> >(
std::move(__first), std::move(__last), std::move(__result));
}
diff --git a/libcxx/include/__algorithm/copy_move_common.h b/libcxx/include/__algorithm/copy_move_common.h
index 845967b05038..12a26c6d6a64 100644
--- a/libcxx/include/__algorithm/copy_move_common.h
+++ b/libcxx/include/__algorithm/copy_move_common.h
@@ -81,30 +81,17 @@ __copy_backward_trivial_impl(_In* __first, _In* __last, _Out* __result) {
// Iterator unwrapping and dispatching to the correct overload.
-template <class _F1, class _F2>
-struct __overload : _F1, _F2 {
- using _F1::operator();
- using _F2::operator();
-};
-
-template <class _InIter, class _Sent, class _OutIter, class = void>
-struct __can_rewrap : false_type {};
-
-template <class _InIter, class _Sent, class _OutIter>
-struct __can_rewrap<_InIter,
- _Sent,
- _OutIter,
- // Note that sentinels are always copy-constructible.
- __enable_if_t< is_copy_constructible<_InIter>::value && is_copy_constructible<_OutIter>::value > >
- : true_type {};
+template <class _InIter, class _OutIter>
+struct __can_rewrap
+ : integral_constant<bool, is_copy_constructible<_InIter>::value && is_copy_constructible<_OutIter>::value> {};
template <class _Algorithm,
class _InIter,
class _Sent,
class _OutIter,
- __enable_if_t<__can_rewrap<_InIter, _Sent, _OutIter>::value, int> = 0>
+ __enable_if_t<__can_rewrap<_InIter, _OutIter>::value, int> = 0>
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 pair<_InIter, _OutIter>
-__unwrap_and_dispatch(_InIter __first, _Sent __last, _OutIter __out_first) {
+__copy_move_unwrap_iters(_InIter __first, _Sent __last, _OutIter __out_first) {
auto __range = std::__unwrap_range(__first, std::move(__last));
auto __result = _Algorithm()(std::move(__range.first), std::move(__range.second), std::__unwrap_iter(__out_first));
return std::make_pair(std::__rewrap_range<_Sent>(std::move(__first), std::move(__result.first)),
@@ -115,24 +102,12 @@ template <class _Algorithm,
class _InIter,
class _Sent,
class _OutIter,
- __enable_if_t<!__can_rewrap<_InIter, _Sent, _OutIter>::value, int> = 0>
+ __enable_if_t<!__can_rewrap<_InIter, _OutIter>::value, int> = 0>
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 pair<_InIter, _OutIter>
-__unwrap_and_dispatch(_InIter __first, _Sent __last, _OutIter __out_first) {
+__copy_move_unwrap_iters(_InIter __first, _Sent __last, _OutIter __out_first) {
return _Algorithm()(std::move(__first), std::move(__last), std::move(__out_first));
}
-template <class _AlgPolicy,
- class _NaiveAlgorithm,
- class _OptimizedAlgorithm,
- class _InIter,
- class _Sent,
- class _OutIter>
-_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 pair<_InIter, _OutIter>
-__dispatch_copy_or_move(_InIter __first, _Sent __last, _OutIter __out_first) {
- using _Algorithm = __overload<_NaiveAlgorithm, _OptimizedAlgorithm>;
- return std::__unwrap_and_dispatch<_Algorithm>(std::move(__first), std::move(__last), std::move(__out_first));
-}
-
_LIBCPP_END_NAMESPACE_STD
_LIBCPP_POP_MACROS
diff --git a/libcxx/include/__algorithm/move.h b/libcxx/include/__algorithm/move.h
index bf574b527409..1716d43e2a61 100644
--- a/libcxx/include/__algorithm/move.h
+++ b/libcxx/include/__algorithm/move.h
@@ -34,7 +34,7 @@ inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_InIter, _OutIte
__move(_InIter __first, _Sent __last, _OutIter __result);
template <class _AlgPolicy>
-struct __move_loop {
+struct __move_impl {
template <class _InIter, class _Sent, class _OutIter>
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_InIter, _OutIter>
operator()(_InIter __first, _Sent __last, _OutIter __result) const {
@@ -95,9 +95,7 @@ struct __move_loop {
__local_first = _Traits::__begin(++__segment_iterator);
}
}
-};
-struct __move_trivial {
// At this point, the iterators have been unwrapped so any `contiguous_iterator` has been unwrapped to a pointer.
template <class _In, class _Out, __enable_if_t<__can_lower_move_assignment_to_memmove<_In, _Out>::value, int> = 0>
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_In*, _Out*>
@@ -109,7 +107,7 @@ struct __move_trivial {
template <class _AlgPolicy, class _InIter, class _Sent, class _OutIter>
inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_InIter, _OutIter>
__move(_InIter __first, _Sent __last, _OutIter __result) {
- return std::__dispatch_copy_or_move<_AlgPolicy, __move_loop<_AlgPolicy>, __move_trivial>(
+ return std::__copy_move_unwrap_iters<__move_impl<_AlgPolicy> >(
std::move(__first), std::move(__last), std::move(__result));
}
diff --git a/libcxx/include/__algorithm/move_backward.h b/libcxx/include/__algorithm/move_backward.h
index 6bb7c91d66c7..4beb7bdbaac0 100644
--- a/libcxx/include/__algorithm/move_backward.h
+++ b/libcxx/include/__algorithm/move_backward.h
@@ -33,7 +33,7 @@ _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_BidirectionalIterator1
__move_backward(_BidirectionalIterator1 __first, _Sentinel __last, _BidirectionalIterator2 __result);
template <class _AlgPolicy>
-struct __move_backward_loop {
+struct __move_backward_impl {
template <class _InIter, class _Sent, class _OutIter>
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_InIter, _OutIter>
operator()(_InIter __first, _Sent __last, _OutIter __result) const {
@@ -104,9 +104,7 @@ struct __move_backward_loop {
__local_last = _Traits::__end(--__segment_iterator);
}
}
-};
-struct __move_backward_trivial {
// At this point, the iterators have been unwrapped so any `contiguous_iterator` has been unwrapped to a pointer.
template <class _In, class _Out, __enable_if_t<__can_lower_move_assignment_to_memmove<_In, _Out>::value, int> = 0>
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_In*, _Out*>
@@ -122,7 +120,7 @@ __move_backward(_BidirectionalIterator1 __first, _Sentinel __last, _Bidirectiona
std::is_copy_constructible<_BidirectionalIterator1>::value,
"Iterators must be copy constructible.");
- return std::__dispatch_copy_or_move<_AlgPolicy, __move_backward_loop<_AlgPolicy>, __move_backward_trivial>(
+ return std::__copy_move_unwrap_iters<__move_backward_impl<_AlgPolicy> >(
std::move(__first), std::move(__last), std::move(__result));
}
diff --git a/libcxx/test/std/numerics/numeric.ops/numeric.ops.sat/saturate_cast.pass.cpp b/libcxx/test/std/numerics/numeric.ops/numeric.ops.sat/saturate_cast.pass.cpp
index c06a9ed2d5cb..cbca37e3a661 100644
--- a/libcxx/test/std/numerics/numeric.ops/numeric.ops.sat/saturate_cast.pass.cpp
+++ b/libcxx/test/std/numerics/numeric.ops/numeric.ops.sat/saturate_cast.pass.cpp
@@ -329,7 +329,7 @@ constexpr bool test() {
{ [[maybe_unused]] std::same_as<unsigned long int> decltype(auto) _ = std::saturate_cast<unsigned long int>(sBigMax); }
assert(std::saturate_cast<unsigned long int>( sBigMin) == 0UL); // saturated
assert(std::saturate_cast<unsigned long int>( sZero) == 0UL);
- assert(std::saturate_cast<unsigned long int>( sBigMax) == ULONG_MAX); // saturated
+ assert(std::saturate_cast<unsigned long int>( sBigMax) == (sizeof(UIntT) > sizeof(unsigned long int) ? ULONG_MAX : LONG_MAX)); // saturated depending on underlying types
{ [[maybe_unused]] std::same_as<unsigned long int> decltype(auto) _ = std::saturate_cast<unsigned long int>(uBigMax); }
assert(std::saturate_cast<unsigned long int>( uZero) == 0UL);
diff --git a/lld/COFF/Config.h b/lld/COFF/Config.h
index 8f85929f1bea..917f88fc2828 100644
--- a/lld/COFF/Config.h
+++ b/lld/COFF/Config.h
@@ -54,6 +54,7 @@ enum class EmitKind { Obj, LLVM, ASM };
struct Export {
StringRef name; // N in /export:N or /export:E=N
StringRef extName; // E in /export:E=N
+ StringRef exportAs; // E in /export:N,EXPORTAS,E
StringRef aliasTarget; // GNU specific: N in "alias == N"
Symbol *sym = nullptr;
uint16_t ordinal = 0;
@@ -73,10 +74,9 @@ struct Export {
StringRef exportName; // Name in DLL
bool operator==(const Export &e) const {
- return (name == e.name && extName == e.extName &&
- aliasTarget == e.aliasTarget &&
- ordinal == e.ordinal && noname == e.noname &&
- data == e.data && isPrivate == e.isPrivate);
+ return (name == e.name && extName == e.extName && exportAs == e.exportAs &&
+ aliasTarget == e.aliasTarget && ordinal == e.ordinal &&
+ noname == e.noname && data == e.data && isPrivate == e.isPrivate);
}
};
diff --git a/lld/COFF/Driver.cpp b/lld/COFF/Driver.cpp
index 181492913c0d..2b1d4abb6ed0 100644
--- a/lld/COFF/Driver.cpp
+++ b/lld/COFF/Driver.cpp
@@ -945,6 +945,7 @@ void LinkerDriver::createImportLibrary(bool asLib) {
e2.Name = std::string(e1.name);
e2.SymbolName = std::string(e1.symbolName);
e2.ExtName = std::string(e1.extName);
+ e2.ExportAs = std::string(e1.exportAs);
e2.AliasTarget = std::string(e1.aliasTarget);
e2.Ordinal = e1.ordinal;
e2.Noname = e1.noname;
@@ -1044,6 +1045,7 @@ void LinkerDriver::parseModuleDefs(StringRef path) {
e2.name = saver().save(e1.Name);
e2.extName = saver().save(e1.ExtName);
}
+ e2.exportAs = saver().save(e1.ExportAs);
e2.aliasTarget = saver().save(e1.AliasTarget);
e2.ordinal = e1.Ordinal;
e2.noname = e1.Noname;
diff --git a/lld/COFF/DriverUtils.cpp b/lld/COFF/DriverUtils.cpp
index 0fa4769bab19..b4ff31a606da 100644
--- a/lld/COFF/DriverUtils.cpp
+++ b/lld/COFF/DriverUtils.cpp
@@ -585,7 +585,8 @@ Export LinkerDriver::parseExport(StringRef arg) {
}
}
- // Optional parameters "[,@ordinal[,NONAME]][,DATA][,PRIVATE]"
+ // Optional parameters
+ // "[,@ordinal[,NONAME]][,DATA][,PRIVATE][,EXPORTAS,exportname]"
while (!rest.empty()) {
StringRef tok;
std::tie(tok, rest) = rest.split(",");
@@ -607,6 +608,13 @@ Export LinkerDriver::parseExport(StringRef arg) {
e.isPrivate = true;
continue;
}
+ if (tok.equals_insensitive("exportas")) {
+ if (!rest.empty() && !rest.contains(','))
+ e.exportAs = rest;
+ else
+ error("invalid EXPORTAS value: " + rest);
+ break;
+ }
if (tok.starts_with("@")) {
int32_t ord;
if (tok.substr(1).getAsInteger(0, ord))
@@ -683,7 +691,9 @@ void LinkerDriver::fixupExports() {
}
for (Export &e : ctx.config.exports) {
- if (!e.forwardTo.empty()) {
+ if (!e.exportAs.empty()) {
+ e.exportName = e.exportAs;
+ } else if (!e.forwardTo.empty()) {
e.exportName = undecorate(ctx, e.name);
} else {
e.exportName = undecorate(ctx, e.extName.empty() ? e.name : e.extName);
diff --git a/lld/ELF/Relocations.cpp b/lld/ELF/Relocations.cpp
index 33c50133bec4..92f2e200db11 100644
--- a/lld/ELF/Relocations.cpp
+++ b/lld/ELF/Relocations.cpp
@@ -1659,10 +1659,17 @@ static bool handleNonPreemptibleIfunc(Symbol &sym, uint16_t flags) {
// original section/value pairs. For non-GOT non-PLT relocation case below, we
// may alter section/value, so create a copy of the symbol to make
// section/value fixed.
+ //
+ // Prior to Android V, there was a bug that caused RELR relocations to be
+ // applied after packed relocations. This meant that resolvers referenced by
+ // IRELATIVE relocations in the packed relocation section would read
+ // unrelocated globals with RELR relocations when
+ // --pack-relative-relocs=android+relr is enabled. Work around this by placing
+ // IRELATIVE in .rela.plt.
auto *directSym = makeDefined(cast<Defined>(sym));
directSym->allocateAux();
- addPltEntry(*in.iplt, *in.igotPlt, *mainPart->relaDyn, target->iRelativeRel,
- *directSym);
+ auto &dyn = config->androidPackDynRelocs ? *in.relaPlt : *mainPart->relaDyn;
+ addPltEntry(*in.iplt, *in.igotPlt, dyn, target->iRelativeRel, *directSym);
sym.allocateAux();
symAux.back().pltIdx = symAux[directSym->auxIdx].pltIdx;
diff --git a/lld/test/COFF/exportas.test b/lld/test/COFF/exportas.test
index c0295c3d7fb7..d70547c39b40 100644
--- a/lld/test/COFF/exportas.test
+++ b/lld/test/COFF/exportas.test
@@ -9,6 +9,77 @@ RUN: lld-link -out:out1.dll -dll -noentry test.obj test.lib
RUN: llvm-readobj --coff-imports out1.dll | FileCheck --check-prefix=IMPORT %s
IMPORT: Symbol: expfunc
+Pass -export argument with EXPORTAS.
+
+RUN: llvm-mc -filetype=obj -triple=x86_64-windows func.s -o func.obj
+RUN: lld-link -out:out2.dll -dll -noentry func.obj -export:func,EXPORTAS,expfunc
+RUN: llvm-readobj --coff-exports out2.dll | FileCheck --check-prefix=EXPORT %s
+EXPORT: Name: expfunc
+
+RUN: llvm-readobj out2.lib | FileCheck --check-prefix=IMPLIB %s
+IMPLIB: Name type: export as
+IMPLIB-NEXT: Export name: expfunc
+IMPLIB-NEXT: Symbol: __imp_func
+IMPLIB-NEXT: Symbol: func
+
+Use .drectve section with EXPORTAS.
+
+RUN: llvm-mc -filetype=obj -triple=x86_64-windows drectve.s -o drectve.obj
+RUN: lld-link -out:out3.dll -dll -noentry func.obj drectve.obj
+RUN: llvm-readobj --coff-exports out3.dll | FileCheck --check-prefix=EXPORT %s
+RUN: llvm-readobj out3.lib | FileCheck --check-prefix=IMPLIB %s
+
+Use a .def file with EXPORTAS.
+
+RUN: lld-link -out:out4.dll -dll -noentry func.obj -def:test.def
+RUN: llvm-readobj --coff-exports out4.dll | FileCheck --check-prefix=EXPORT %s
+RUN: llvm-readobj out4.lib | FileCheck --check-prefix=IMPLIB %s
+
+Use a .def file with EXPORTAS in a forwarding export.
+
+RUN: lld-link -out:out5.dll -dll -noentry func.obj -def:test2.def
+RUN: llvm-readobj --coff-exports out5.dll | FileCheck --check-prefix=FORWARD-EXPORT %s
+FORWARD-EXPORT: Export {
+FORWARD-EXPORT-NEXT: Ordinal: 1
+FORWARD-EXPORT-NEXT: Name: expfunc
+FORWARD-EXPORT-NEXT: ForwardedTo: otherdll.otherfunc
+FORWARD-EXPORT-NEXT: }
+
+RUN: llvm-readobj out5.lib | FileCheck --check-prefix=FORWARD-IMPLIB %s
+FORWARD-IMPLIB: Name type: export as
+FORWARD-IMPLIB-NEXT: Export name: expfunc
+FORWARD-IMPLIB-NEXT: Symbol: __imp_func
+FORWARD-IMPLIB-NEXT: Symbol: func
+
+Pass -export argument with EXPORTAS in a forwarding export.
+
+RUN: lld-link -out:out6.dll -dll -noentry func.obj -export:func=otherdll.otherfunc,EXPORTAS,expfunc
+RUN: llvm-readobj --coff-exports out6.dll | FileCheck --check-prefix=FORWARD-EXPORT %s
+RUN: llvm-readobj out6.lib | FileCheck --check-prefix=FORWARD-IMPLIB %s
+
+Pass -export argument with EXPORTAS in a data export.
+
+RUN: lld-link -out:out7.dll -dll -noentry func.obj -export:func,DATA,@5,EXPORTAS,expfunc
+RUN: llvm-readobj --coff-exports out7.dll | FileCheck --check-prefix=ORD %s
+ORD: Ordinal: 5
+ORD-NEXT: Name: expfunc
+
+RUN: llvm-readobj out7.lib | FileCheck --check-prefix=ORD-IMPLIB %s
+ORD-IMPLIB: Type: data
+ORD-IMPLIB-NEXT: Name type: export as
+ORD-IMPLIB-NEXT: Export name: expfunc
+ORD-IMPLIB-NEXT: Symbol: __imp_func
+
+Check invalid EXPORTAS syntax.
+
+RUN: not lld-link -out:err1.dll -dll -noentry func.obj -export:func,EXPORTAS, 2>&1 | \
+RUN: FileCheck --check-prefix=ERR1 %s
+ERR1: error: invalid EXPORTAS value: {{$}}
+
+RUN: not lld-link -out:err2.dll -dll -noentry func.obj -export:func,EXPORTAS,expfunc,DATA 2>&1 | \
+RUN: FileCheck --check-prefix=ERR2 %s
+ERR2: error: invalid EXPORTAS value: expfunc,DATA
+
#--- test.s
.section ".test", "rd"
.rva __imp_func
@@ -17,3 +88,20 @@ IMPORT: Symbol: expfunc
LIBRARY test.dll
EXPORTS
func EXPORTAS expfunc
+
+#--- test2.def
+LIBRARY test.dll
+EXPORTS
+ func=otherdll.otherfunc EXPORTAS expfunc
+
+#--- func.s
+ .text
+ .globl func
+ .p2align 2, 0x0
+func:
+ movl $1, %eax
+ retq
+
+#--- drectve.s
+ .section .drectve, "yn"
+ .ascii " -export:func,EXPORTAS,expfunc"
diff --git a/lld/test/ELF/pack-dyn-relocs-ifunc.s b/lld/test/ELF/pack-dyn-relocs-ifunc.s
new file mode 100644
index 000000000000..6168d06f99d9
--- /dev/null
+++ b/lld/test/ELF/pack-dyn-relocs-ifunc.s
@@ -0,0 +1,49 @@
+# REQUIRES: aarch64
+## Prior to Android V, there was a bug that caused RELR relocations to be
+## applied after packed relocations. This meant that resolvers referenced by
+## IRELATIVE relocations in the packed relocation section would read unrelocated
+## globals when --pack-relative-relocs=android+relr is enabled. Work around this
+## by placing IRELATIVE in .rela.plt.
+
+# RUN: rm -rf %t && split-file %s %t && cd %t
+# RUN: llvm-mc -filetype=obj -triple=aarch64-linux-android a.s -o a.o
+# RUN: llvm-mc -filetype=obj -triple=aarch64-linux-android b.s -o b.o
+# RUN: ld.lld -shared b.o -o b.so
+# RUN: ld.lld -pie --pack-dyn-relocs=android+relr -z separate-loadable-segments a.o b.so -o a
+# RUN: llvm-readobj -r a | FileCheck %s
+# RUN: llvm-objdump -d a | FileCheck %s --check-prefix=ASM
+
+# CHECK: .relr.dyn {
+# CHECK-NEXT: 0x30000 R_AARCH64_RELATIVE -
+# CHECK-NEXT: }
+# CHECK: .rela.plt {
+# CHECK-NEXT: 0x30020 R_AARCH64_JUMP_SLOT bar 0x0
+# CHECK-NEXT: 0x30028 R_AARCH64_IRELATIVE - 0x10000
+# CHECK-NEXT: }
+
+# ASM: <.iplt>:
+# ASM-NEXT: adrp x16, 0x30000
+# ASM-NEXT: ldr x17, [x16, #0x28]
+# ASM-NEXT: add x16, x16, #0x28
+# ASM-NEXT: br x17
+
+#--- a.s
+.text
+.type foo, %gnu_indirect_function
+.globl foo
+foo:
+ ret
+
+.globl _start
+_start:
+ bl foo
+ bl bar
+
+.data
+.balign 8
+.quad .data
+
+#--- b.s
+.globl bar
+bar:
+ ret
diff --git a/lldb/include/lldb/Symbol/UnwindTable.h b/lldb/include/lldb/Symbol/UnwindTable.h
index f0ce7047de2d..26826e5d1b49 100644
--- a/lldb/include/lldb/Symbol/UnwindTable.h
+++ b/lldb/include/lldb/Symbol/UnwindTable.h
@@ -57,6 +57,10 @@ public:
ArchSpec GetArchitecture();
+ /// Called after a SymbolFile has been added to a Module to add any new
+ /// unwind sections that may now be available.
+ void Update();
+
private:
void Dump(Stream &s);
diff --git a/lldb/source/Core/Module.cpp b/lldb/source/Core/Module.cpp
index a520523a9652..9c105b3f0e57 100644
--- a/lldb/source/Core/Module.cpp
+++ b/lldb/source/Core/Module.cpp
@@ -1009,6 +1009,8 @@ SymbolFile *Module::GetSymbolFile(bool can_create, Stream *feedback_strm) {
m_symfile_up.reset(
SymbolVendor::FindPlugin(shared_from_this(), feedback_strm));
m_did_load_symfile = true;
+ if (m_unwind_table)
+ m_unwind_table->Update();
}
}
}
diff --git a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp
index 5f67658f86ea..1164bc62682a 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp
+++ b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp
@@ -693,6 +693,7 @@ llvm::DWARFDebugAbbrev *SymbolFileDWARF::DebugAbbrev() {
if (debug_abbrev_data.GetByteSize() == 0)
return nullptr;
+ ElapsedTime elapsed(m_parse_time);
auto abbr =
std::make_unique<llvm::DWARFDebugAbbrev>(debug_abbrev_data.GetAsLLVM());
llvm::Error error = abbr->parse();
diff --git a/lldb/source/Symbol/UnwindTable.cpp b/lldb/source/Symbol/UnwindTable.cpp
index 3c1a5187b110..11bedf3d6052 100644
--- a/lldb/source/Symbol/UnwindTable.cpp
+++ b/lldb/source/Symbol/UnwindTable.cpp
@@ -84,6 +84,51 @@ void UnwindTable::Initialize() {
}
}
+void UnwindTable::Update() {
+ if (!m_initialized)
+ return Initialize();
+
+ std::lock_guard<std::mutex> guard(m_mutex);
+
+ ObjectFile *object_file = m_module.GetObjectFile();
+ if (!object_file)
+ return;
+
+ if (!m_object_file_unwind_up)
+ m_object_file_unwind_up = object_file->CreateCallFrameInfo();
+
+ SectionList *sl = m_module.GetSectionList();
+ if (!sl)
+ return;
+
+ SectionSP sect = sl->FindSectionByType(eSectionTypeEHFrame, true);
+ if (!m_eh_frame_up && sect) {
+ m_eh_frame_up = std::make_unique<DWARFCallFrameInfo>(
+ *object_file, sect, DWARFCallFrameInfo::EH);
+ }
+
+ sect = sl->FindSectionByType(eSectionTypeDWARFDebugFrame, true);
+ if (!m_debug_frame_up && sect) {
+ m_debug_frame_up = std::make_unique<DWARFCallFrameInfo>(
+ *object_file, sect, DWARFCallFrameInfo::DWARF);
+ }
+
+ sect = sl->FindSectionByType(eSectionTypeCompactUnwind, true);
+ if (!m_compact_unwind_up && sect) {
+ m_compact_unwind_up =
+ std::make_unique<CompactUnwindInfo>(*object_file, sect);
+ }
+
+ sect = sl->FindSectionByType(eSectionTypeARMexidx, true);
+ if (!m_arm_unwind_up && sect) {
+ SectionSP sect_extab = sl->FindSectionByType(eSectionTypeARMextab, true);
+ if (sect_extab.get()) {
+ m_arm_unwind_up =
+ std::make_unique<ArmUnwindInfo>(*object_file, sect, sect_extab);
+ }
+ }
+}
+
UnwindTable::~UnwindTable() = default;
std::optional<AddressRange>
diff --git a/lldb/source/Target/StackFrame.cpp b/lldb/source/Target/StackFrame.cpp
index 3af62f52d575..03a74f29e76e 100644
--- a/lldb/source/Target/StackFrame.cpp
+++ b/lldb/source/Target/StackFrame.cpp
@@ -1800,7 +1800,6 @@ void StackFrame::DumpUsingSettingsFormat(Stream *strm, bool show_unique,
return;
ExecutionContext exe_ctx(shared_from_this());
- StreamString s;
const FormatEntity::Entry *frame_format = nullptr;
Target *target = exe_ctx.GetTargetPtr();
diff --git a/lldb/test/Shell/SymbolFile/target-symbols-add-unwind.test b/lldb/test/Shell/SymbolFile/target-symbols-add-unwind.test
new file mode 100644
index 000000000000..5420213d405e
--- /dev/null
+++ b/lldb/test/Shell/SymbolFile/target-symbols-add-unwind.test
@@ -0,0 +1,27 @@
+# TODO: When it's possible to run "image show-unwind" without a running
+# process, we can remove the unsupported line below, and hard-code an ELF
+# triple in the test.
+# UNSUPPORTED: system-windows, system-darwin
+
+# RUN: cd %T
+# RUN: %clang_host %S/Inputs/target-symbols-add-unwind.c -g \
+# RUN: -fno-unwind-tables -fno-asynchronous-unwind-tables \
+# RUN: -o target-symbols-add-unwind.debug
+# RUN: llvm-objcopy --strip-debug target-symbols-add-unwind.debug \
+# RUN: target-symbols-add-unwind.stripped
+# RUN: %lldb target-symbols-add-unwind.stripped -s %s -o quit | FileCheck %s
+
+process launch --stop-at-entry
+image show-unwind -n main
+# CHECK-LABEL: image show-unwind -n main
+# CHECK-NOT: debug_frame UnwindPlan:
+
+target symbols add -s target-symbols-add-unwind.stripped target-symbols-add-unwind.debug
+# CHECK-LABEL: target symbols add
+# CHECK: symbol file {{.*}} has been added to {{.*}}
+
+image show-unwind -n main
+# CHECK-LABEL: image show-unwind -n main
+# CHECK: debug_frame UnwindPlan:
+# CHECK-NEXT: This UnwindPlan originally sourced from DWARF CFI
+# CHECK-NEXT: This UnwindPlan is sourced from the compiler: yes.
diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
index 030b76935c7b..a4be31576931 100644
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -12815,10 +12815,11 @@ Variable argument support is defined in LLVM with the
functions. These functions are related to the similarly named macros
defined in the ``<stdarg.h>`` header file.
-All of these functions operate on arguments that use a target-specific
+All of these functions take as arguments pointers to a target-specific
value type "``va_list``". The LLVM assembly language reference manual
does not define what this type is, so all transformations should be
-prepared to handle these functions regardless of the type used.
+prepared to handle these functions regardless of the type used. The intrinsics
+are overloaded, and can be used for pointers to different address spaces.
This example shows how the :ref:`va_arg <i_va_arg>` instruction and the
variable argument handling intrinsic functions are used.
@@ -12835,24 +12836,24 @@ variable argument handling intrinsic functions are used.
define i32 @test(i32 %X, ...) {
; Initialize variable argument processing
%ap = alloca %struct.va_list
- call void @llvm.va_start(ptr %ap)
+ call void @llvm.va_start.p0(ptr %ap)
; Read a single integer argument
%tmp = va_arg ptr %ap, i32
; Demonstrate usage of llvm.va_copy and llvm.va_end
%aq = alloca ptr
- call void @llvm.va_copy(ptr %aq, ptr %ap)
- call void @llvm.va_end(ptr %aq)
+ call void @llvm.va_copy.p0(ptr %aq, ptr %ap)
+ call void @llvm.va_end.p0(ptr %aq)
; Stop processing of arguments.
- call void @llvm.va_end(ptr %ap)
+ call void @llvm.va_end.p0(ptr %ap)
ret i32 %tmp
}
- declare void @llvm.va_start(ptr)
- declare void @llvm.va_copy(ptr, ptr)
- declare void @llvm.va_end(ptr)
+ declare void @llvm.va_start.p0(ptr)
+ declare void @llvm.va_copy.p0(ptr, ptr)
+ declare void @llvm.va_end.p0(ptr)
.. _int_va_start:
@@ -12864,7 +12865,8 @@ Syntax:
::
- declare void @llvm.va_start(ptr <arglist>)
+ declare void @llvm.va_start.p0(ptr <arglist>)
+ declare void @llvm.va_start.p5(ptr addrspace(5) <arglist>)
Overview:
"""""""""
@@ -12896,7 +12898,8 @@ Syntax:
::
- declare void @llvm.va_end(ptr <arglist>)
+ declare void @llvm.va_end.p0(ptr <arglist>)
+ declare void @llvm.va_end.p5(ptr addrspace(5) <arglist>)
Overview:
"""""""""
@@ -12929,7 +12932,8 @@ Syntax:
::
- declare void @llvm.va_copy(ptr <destarglist>, ptr <srcarglist>)
+ declare void @llvm.va_copy.p0(ptr <destarglist>, ptr <srcarglist>)
+ declare void @llvm.va_copy.p5(ptr addrspace(5) <destarglist>, ptr addrspace(5) <srcarglist>)
Overview:
"""""""""
@@ -12942,6 +12946,7 @@ Arguments:
The first argument is a pointer to a ``va_list`` element to initialize.
The second argument is a pointer to a ``va_list`` element to copy from.
+The address spaces of the two arguments must match.
Semantics:
""""""""""
diff --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst
index c2b1a9d3d738..7588048334d7 100644
--- a/llvm/docs/ReleaseNotes.rst
+++ b/llvm/docs/ReleaseNotes.rst
@@ -76,6 +76,7 @@ Changes to the AMDGPU Backend
Changes to the ARM Backend
--------------------------
+* FEAT_F32MM is no longer activated by default when using `+sve` on v8.6-A or greater. The feature is still available and can be used by adding `+f32mm` to the command line options.
Changes to the AVR Backend
--------------------------
diff --git a/llvm/include/llvm/AsmParser/LLToken.h b/llvm/include/llvm/AsmParser/LLToken.h
index 5863a8d6e8ee..65ccb1b81b3a 100644
--- a/llvm/include/llvm/AsmParser/LLToken.h
+++ b/llvm/include/llvm/AsmParser/LLToken.h
@@ -181,6 +181,7 @@ enum Kind {
kw_tailcc,
kw_m68k_rtdcc,
kw_graalcc,
+ kw_riscv_vector_cc,
// Attributes:
kw_attributes,
diff --git a/llvm/include/llvm/BinaryFormat/Dwarf.def b/llvm/include/llvm/BinaryFormat/Dwarf.def
index e70b58d5ea50..d8927c6202fd 100644
--- a/llvm/include/llvm/BinaryFormat/Dwarf.def
+++ b/llvm/include/llvm/BinaryFormat/Dwarf.def
@@ -1040,6 +1040,7 @@ HANDLE_DW_CC(0xca, LLVM_PreserveAll)
HANDLE_DW_CC(0xcb, LLVM_X86RegCall)
HANDLE_DW_CC(0xcc, LLVM_M68kRTD)
HANDLE_DW_CC(0xcd, LLVM_PreserveNone)
+HANDLE_DW_CC(0xce, LLVM_RISCVVectorCall)
// From GCC source code (include/dwarf2.h): This DW_CC_ value is not currently
// generated by any toolchain. It is used internally to GDB to indicate OpenCL
// C functions that have been compiled with the IBM XL C for OpenCL compiler and
diff --git a/llvm/include/llvm/IR/CallingConv.h b/llvm/include/llvm/IR/CallingConv.h
index ef8aaf52f4e6..a05d1a4d5878 100644
--- a/llvm/include/llvm/IR/CallingConv.h
+++ b/llvm/include/llvm/IR/CallingConv.h
@@ -264,6 +264,9 @@ namespace CallingConv {
/// except that the first parameter is mapped to x9.
ARM64EC_Thunk_Native = 109,
+ /// Calling convention used for RISC-V V-extension.
+ RISCV_VectorCall = 110,
+
/// The highest possible ID. Must be some 2^k - 1.
MaxID = 1023
};
diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td
index d0ef9c25f39e..764902426f0b 100644
--- a/llvm/include/llvm/IR/Intrinsics.td
+++ b/llvm/include/llvm/IR/Intrinsics.td
@@ -700,10 +700,13 @@ class MSBuiltin<string name> {
//===--------------- Variable Argument Handling Intrinsics ----------------===//
//
-def int_vastart : DefaultAttrsIntrinsic<[], [llvm_ptr_ty], [], "llvm.va_start">;
-def int_vacopy : DefaultAttrsIntrinsic<[], [llvm_ptr_ty, llvm_ptr_ty], [],
- "llvm.va_copy">;
-def int_vaend : DefaultAttrsIntrinsic<[], [llvm_ptr_ty], [], "llvm.va_end">;
+def int_vastart : DefaultAttrsIntrinsic<[],
+ [llvm_anyptr_ty], [], "llvm.va_start">;
+def int_vacopy : DefaultAttrsIntrinsic<[],
+ [llvm_anyptr_ty, LLVMMatchType<0>], [],
+ "llvm.va_copy">;
+def int_vaend : DefaultAttrsIntrinsic<[],
+ [llvm_anyptr_ty], [], "llvm.va_end">;
//===------------------- Garbage Collection Intrinsics --------------------===//
//
diff --git a/llvm/include/llvm/IR/Verifier.h b/llvm/include/llvm/IR/Verifier.h
index b25f8eb77ee3..b7db6e0bbfb7 100644
--- a/llvm/include/llvm/IR/Verifier.h
+++ b/llvm/include/llvm/IR/Verifier.h
@@ -77,6 +77,7 @@ public:
/// Visit an instruction and return true if it is valid, return false if an
/// invalid TBAA is attached.
bool visitTBAAMetadata(Instruction &I, const MDNode *MD);
+ bool visitTBAAStructMetadata(Instruction &I, const MDNode *MD);
};
/// Check a function for errors, useful for use when debugging a
diff --git a/llvm/include/llvm/Object/GOFF.h b/llvm/include/llvm/Object/GOFF.h
index 91762457ae05..9fb8876e893d 100644
--- a/llvm/include/llvm/Object/GOFF.h
+++ b/llvm/include/llvm/Object/GOFF.h
@@ -73,6 +73,26 @@ protected:
}
};
+class TXTRecord : public Record {
+public:
+ /// \brief Maximum length of data; any more must go in continuation.
+ static const uint8_t TXTMaxDataLength = 56;
+
+ static Error getData(const uint8_t *Record, SmallString<256> &CompleteData);
+
+ static void getElementEsdId(const uint8_t *Record, uint32_t &EsdId) {
+ get<uint32_t>(Record, 4, EsdId);
+ }
+
+ static void getOffset(const uint8_t *Record, uint32_t &Offset) {
+ get<uint32_t>(Record, 12, Offset);
+ }
+
+ static void getDataLength(const uint8_t *Record, uint16_t &Length) {
+ get<uint16_t>(Record, 22, Length);
+ }
+};
+
class HDRRecord : public Record {
public:
static Error getData(const uint8_t *Record, SmallString<256> &CompleteData);
diff --git a/llvm/include/llvm/Object/GOFFObjectFile.h b/llvm/include/llvm/Object/GOFFObjectFile.h
index 7e1ceb95f667..6871641e97ec 100644
--- a/llvm/include/llvm/Object/GOFFObjectFile.h
+++ b/llvm/include/llvm/Object/GOFFObjectFile.h
@@ -29,7 +29,10 @@ namespace llvm {
namespace object {
class GOFFObjectFile : public ObjectFile {
+ friend class GOFFSymbolRef;
+
IndexedMap<const uint8_t *> EsdPtrs; // Indexed by EsdId.
+ SmallVector<const uint8_t *, 256> TextPtrs;
mutable DenseMap<uint32_t, std::pair<size_t, std::unique_ptr<char[]>>>
EsdNamesCache;
@@ -38,7 +41,7 @@ class GOFFObjectFile : public ObjectFile {
// (EDID, 0) code, r/o data section
// (EDID,PRID) r/w data section
SmallVector<SectionEntryImpl, 256> SectionList;
- mutable DenseMap<uint32_t, std::string> SectionDataCache;
+ mutable DenseMap<uint32_t, SmallVector<uint8_t>> SectionDataCache;
public:
Expected<StringRef> getSymbolName(SymbolRef Symbol) const;
@@ -66,6 +69,10 @@ public:
return true;
}
+ bool isSectionNoLoad(DataRefImpl Sec) const;
+ bool isSectionReadOnlyData(DataRefImpl Sec) const;
+ bool isSectionZeroInit(DataRefImpl Sec) const;
+
private:
// SymbolRef.
Expected<StringRef> getSymbolName(DataRefImpl Symb) const override;
@@ -75,27 +82,24 @@ private:
Expected<uint32_t> getSymbolFlags(DataRefImpl Symb) const override;
Expected<SymbolRef::Type> getSymbolType(DataRefImpl Symb) const override;
Expected<section_iterator> getSymbolSection(DataRefImpl Symb) const override;
+ uint64_t getSymbolSize(DataRefImpl Symb) const;
const uint8_t *getSymbolEsdRecord(DataRefImpl Symb) const;
bool isSymbolUnresolved(DataRefImpl Symb) const;
bool isSymbolIndirect(DataRefImpl Symb) const;
// SectionRef.
- void moveSectionNext(DataRefImpl &Sec) const override {}
- virtual Expected<StringRef> getSectionName(DataRefImpl Sec) const override {
- return StringRef();
- }
- uint64_t getSectionAddress(DataRefImpl Sec) const override { return 0; }
- uint64_t getSectionSize(DataRefImpl Sec) const override { return 0; }
+ void moveSectionNext(DataRefImpl &Sec) const override;
+ virtual Expected<StringRef> getSectionName(DataRefImpl Sec) const override;
+ uint64_t getSectionAddress(DataRefImpl Sec) const override;
+ uint64_t getSectionSize(DataRefImpl Sec) const override;
virtual Expected<ArrayRef<uint8_t>>
- getSectionContents(DataRefImpl Sec) const override {
- return ArrayRef<uint8_t>();
- }
- uint64_t getSectionIndex(DataRefImpl Sec) const override { return 0; }
- uint64_t getSectionAlignment(DataRefImpl Sec) const override { return 0; }
+ getSectionContents(DataRefImpl Sec) const override;
+ uint64_t getSectionIndex(DataRefImpl Sec) const override { return Sec.d.a; }
+ uint64_t getSectionAlignment(DataRefImpl Sec) const override;
bool isSectionCompressed(DataRefImpl Sec) const override { return false; }
- bool isSectionText(DataRefImpl Sec) const override { return false; }
- bool isSectionData(DataRefImpl Sec) const override { return false; }
+ bool isSectionText(DataRefImpl Sec) const override;
+ bool isSectionData(DataRefImpl Sec) const override;
bool isSectionBSS(DataRefImpl Sec) const override { return false; }
bool isSectionVirtual(DataRefImpl Sec) const override { return false; }
relocation_iterator section_rel_begin(DataRefImpl Sec) const override {
@@ -109,6 +113,7 @@ private:
const uint8_t *getSectionPrEsdRecord(DataRefImpl &Sec) const;
const uint8_t *getSectionEdEsdRecord(uint32_t SectionIndex) const;
const uint8_t *getSectionPrEsdRecord(uint32_t SectionIndex) const;
+ uint32_t getSectionDefEsdId(DataRefImpl &Sec) const;
// RelocationRef.
void moveRelocationNext(DataRefImpl &Rel) const override {}
@@ -122,6 +127,29 @@ private:
SmallVectorImpl<char> &Result) const override {}
};
+class GOFFSymbolRef : public SymbolRef {
+public:
+ GOFFSymbolRef(const SymbolRef &B) : SymbolRef(B) {
+ assert(isa<GOFFObjectFile>(SymbolRef::getObject()));
+ }
+
+ const GOFFObjectFile *getObject() const {
+ return cast<GOFFObjectFile>(BasicSymbolRef::getObject());
+ }
+
+ Expected<uint32_t> getSymbolGOFFFlags() const {
+ return getObject()->getSymbolFlags(getRawDataRefImpl());
+ }
+
+ Expected<SymbolRef::Type> getSymbolGOFFType() const {
+ return getObject()->getSymbolType(getRawDataRefImpl());
+ }
+
+ uint64_t getSize() const {
+ return getObject()->getSymbolSize(getRawDataRefImpl());
+ }
+};
+
} // namespace object
} // namespace llvm
diff --git a/llvm/lib/AsmParser/LLLexer.cpp b/llvm/lib/AsmParser/LLLexer.cpp
index 02f64fcfac4f..2301a27731ea 100644
--- a/llvm/lib/AsmParser/LLLexer.cpp
+++ b/llvm/lib/AsmParser/LLLexer.cpp
@@ -640,6 +640,7 @@ lltok::Kind LLLexer::LexIdentifier() {
KEYWORD(tailcc);
KEYWORD(m68k_rtdcc);
KEYWORD(graalcc);
+ KEYWORD(riscv_vector_cc);
KEYWORD(cc);
KEYWORD(c);
diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp
index f0be021668af..41d48e522620 100644
--- a/llvm/lib/AsmParser/LLParser.cpp
+++ b/llvm/lib/AsmParser/LLParser.cpp
@@ -2143,6 +2143,7 @@ void LLParser::parseOptionalDLLStorageClass(unsigned &Res) {
/// ::= 'tailcc'
/// ::= 'm68k_rtdcc'
/// ::= 'graalcc'
+/// ::= 'riscv_vector_cc'
/// ::= 'cc' UINT
///
bool LLParser::parseOptionalCallingConv(unsigned &CC) {
@@ -2213,6 +2214,9 @@ bool LLParser::parseOptionalCallingConv(unsigned &CC) {
case lltok::kw_tailcc: CC = CallingConv::Tail; break;
case lltok::kw_m68k_rtdcc: CC = CallingConv::M68k_RTD; break;
case lltok::kw_graalcc: CC = CallingConv::GRAAL; break;
+ case lltok::kw_riscv_vector_cc:
+ CC = CallingConv::RISCV_VectorCall;
+ break;
case lltok::kw_cc: {
Lex.Lex();
return parseUInt32(CC);
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 39ee95d7007c..36abe27d2621 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -2555,7 +2555,8 @@ SDValue DAGCombiner::foldSubToAvg(SDNode *N, const SDLoc &DL) {
/// Try to fold a 'not' shifted sign-bit with add/sub with constant operand into
/// a shift and add with a different constant.
-static SDValue foldAddSubOfSignBit(SDNode *N, SelectionDAG &DAG) {
+static SDValue foldAddSubOfSignBit(SDNode *N, const SDLoc &DL,
+ SelectionDAG &DAG) {
assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
"Expecting add or sub");
@@ -2583,7 +2584,6 @@ static SDValue foldAddSubOfSignBit(SDNode *N, SelectionDAG &DAG) {
// Eliminate the 'not' by adjusting the shift and add/sub constant:
// add (srl (not X), 31), C --> add (sra X, 31), (C + 1)
// sub C, (srl (not X), 31) --> add (srl X, 31), (C - 1)
- SDLoc DL(N);
if (SDValue NewC = DAG.FoldConstantArithmetic(
IsAdd ? ISD::ADD : ISD::SUB, DL, VT,
{ConstantOp, DAG.getConstant(1, DL, VT)})) {
@@ -2878,7 +2878,7 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
if (SDValue V = foldAddSubBoolOfMaskedVal(N, DL, DAG))
return V;
- if (SDValue V = foldAddSubOfSignBit(N, DAG))
+ if (SDValue V = foldAddSubOfSignBit(N, DL, DAG))
return V;
// Try to match AVGFLOOR fixedwidth pattern
@@ -3877,14 +3877,14 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
if (SDValue V = foldAddSubBoolOfMaskedVal(N, DL, DAG))
return V;
- if (SDValue V = foldAddSubOfSignBit(N, DAG))
+ if (SDValue V = foldAddSubOfSignBit(N, DL, DAG))
return V;
// Try to match AVGCEIL fixedwidth pattern
if (SDValue V = foldSubToAvg(N, DL))
return V;
- if (SDValue V = foldAddSubMasked1(false, N0, N1, DAG, SDLoc(N)))
+ if (SDValue V = foldAddSubMasked1(false, N0, N1, DAG, DL))
return V;
if (SDValue V = foldSubToUSubSat(VT, N, DL))
@@ -3949,7 +3949,7 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
if ((X0 == S0 && X1 == N1) || (X0 == N1 && X1 == S0))
if (ConstantSDNode *C = isConstOrConstSplat(N1.getOperand(1)))
if (C->getAPIntValue() == (BitWidth - 1))
- return DAG.getNode(ISD::ABS, SDLoc(N), VT, S0);
+ return DAG.getNode(ISD::ABS, DL, VT, S0);
}
}
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index b1f6fd6f3c72..e10b8bc8c5e2 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -118,14 +118,7 @@ private:
void LegalizeLoadOps(SDNode *Node);
void LegalizeStoreOps(SDNode *Node);
- /// Some targets cannot handle a variable
- /// insertion index for the INSERT_VECTOR_ELT instruction. In this case, it
- /// is necessary to spill the vector being inserted into to memory, perform
- /// the insert there, and then read the result back.
- SDValue PerformInsertVectorEltInMemory(SDValue Vec, SDValue Val, SDValue Idx,
- const SDLoc &dl);
- SDValue ExpandINSERT_VECTOR_ELT(SDValue Vec, SDValue Val, SDValue Idx,
- const SDLoc &dl);
+ SDValue ExpandINSERT_VECTOR_ELT(SDValue Op);
/// Return a vector shuffle operation which
/// performs the same shuffe in terms of order or result bytes, but on a type
@@ -378,45 +371,12 @@ SDValue SelectionDAGLegalize::ExpandConstant(ConstantSDNode *CP) {
return Result;
}
-/// Some target cannot handle a variable insertion index for the
-/// INSERT_VECTOR_ELT instruction. In this case, it
-/// is necessary to spill the vector being inserted into to memory, perform
-/// the insert there, and then read the result back.
-SDValue SelectionDAGLegalize::PerformInsertVectorEltInMemory(SDValue Vec,
- SDValue Val,
- SDValue Idx,
- const SDLoc &dl) {
- // If the target doesn't support this, we have to spill the input vector
- // to a temporary stack slot, update the element, then reload it. This is
- // badness. We could also load the value into a vector register (either
- // with a "move to register" or "extload into register" instruction, then
- // permute it into place, if the idx is a constant and if the idx is
- // supported by the target.
- EVT VT = Vec.getValueType();
- EVT EltVT = VT.getVectorElementType();
- SDValue StackPtr = DAG.CreateStackTemporary(VT);
-
- int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
-
- // Store the vector.
- SDValue Ch = DAG.getStore(
- DAG.getEntryNode(), dl, Vec, StackPtr,
- MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI));
-
- SDValue StackPtr2 = TLI.getVectorElementPointer(DAG, StackPtr, VT, Idx);
-
- // Store the scalar value.
- Ch = DAG.getTruncStore(
- Ch, dl, Val, StackPtr2,
- MachinePointerInfo::getUnknownStack(DAG.getMachineFunction()), EltVT);
- // Load the updated vector.
- return DAG.getLoad(VT, dl, Ch, StackPtr, MachinePointerInfo::getFixedStack(
- DAG.getMachineFunction(), SPFI));
-}
+SDValue SelectionDAGLegalize::ExpandINSERT_VECTOR_ELT(SDValue Op) {
+ SDValue Vec = Op.getOperand(0);
+ SDValue Val = Op.getOperand(1);
+ SDValue Idx = Op.getOperand(2);
+ SDLoc dl(Op);
-SDValue SelectionDAGLegalize::ExpandINSERT_VECTOR_ELT(SDValue Vec, SDValue Val,
- SDValue Idx,
- const SDLoc &dl) {
if (ConstantSDNode *InsertPos = dyn_cast<ConstantSDNode>(Idx)) {
// SCALAR_TO_VECTOR requires that the type of the value being inserted
// match the element type of the vector being created, except for
@@ -438,7 +398,7 @@ SDValue SelectionDAGLegalize::ExpandINSERT_VECTOR_ELT(SDValue Vec, SDValue Val,
return DAG.getVectorShuffle(Vec.getValueType(), dl, Vec, ScVec, ShufOps);
}
}
- return PerformInsertVectorEltInMemory(Vec, Val, Idx, dl);
+ return ExpandInsertToVectorThroughStack(Op);
}
SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) {
@@ -1486,7 +1446,7 @@ SDValue SelectionDAGLegalize::ExpandInsertToVectorThroughStack(SDValue Op) {
// Store the value to a temporary stack slot, then LOAD the returned part.
EVT VecVT = Vec.getValueType();
- EVT SubVecVT = Part.getValueType();
+ EVT PartVT = Part.getValueType();
SDValue StackPtr = DAG.CreateStackTemporary(VecVT);
int FI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
MachinePointerInfo PtrInfo =
@@ -1496,13 +1456,24 @@ SDValue SelectionDAGLegalize::ExpandInsertToVectorThroughStack(SDValue Op) {
SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, PtrInfo);
// Then store the inserted part.
- SDValue SubStackPtr =
- TLI.getVectorSubVecPointer(DAG, StackPtr, VecVT, SubVecVT, Idx);
+ if (PartVT.isVector()) {
+ SDValue SubStackPtr =
+ TLI.getVectorSubVecPointer(DAG, StackPtr, VecVT, PartVT, Idx);
+
+ // Store the subvector.
+ Ch = DAG.getStore(
+ Ch, dl, Part, SubStackPtr,
+ MachinePointerInfo::getUnknownStack(DAG.getMachineFunction()));
+ } else {
+ SDValue SubStackPtr =
+ TLI.getVectorElementPointer(DAG, StackPtr, VecVT, Idx);
- // Store the subvector.
- Ch = DAG.getStore(
- Ch, dl, Part, SubStackPtr,
- MachinePointerInfo::getUnknownStack(DAG.getMachineFunction()));
+ // Store the scalar value.
+ Ch = DAG.getTruncStore(
+ Ch, dl, Part, SubStackPtr,
+ MachinePointerInfo::getUnknownStack(DAG.getMachineFunction()),
+ VecVT.getVectorElementType());
+ }
// Finally, load the updated vector.
return DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr, PtrInfo);
@@ -3416,9 +3387,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
Results.push_back(ExpandSCALAR_TO_VECTOR(Node));
break;
case ISD::INSERT_VECTOR_ELT:
- Results.push_back(ExpandINSERT_VECTOR_ELT(Node->getOperand(0),
- Node->getOperand(1),
- Node->getOperand(2), dl));
+ Results.push_back(ExpandINSERT_VECTOR_ELT(SDValue(Node, 0)));
break;
case ISD::VECTOR_SHUFFLE: {
SmallVector<int, 32> NewMask;
diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp
index 9990556f89ed..b16e78daf586 100644
--- a/llvm/lib/CodeGen/TargetLoweringBase.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp
@@ -2073,7 +2073,8 @@ void TargetLoweringBase::insertSSPDeclarations(Module &M) const {
// FreeBSD has "__stack_chk_guard" defined externally on libc.so
if (M.getDirectAccessExternalData() &&
!TM.getTargetTriple().isWindowsGNUEnvironment() &&
- !TM.getTargetTriple().isOSFreeBSD() &&
+ !(TM.getTargetTriple().isPPC64() &&
+ TM.getTargetTriple().isOSFreeBSD()) &&
(!TM.getTargetTriple().isOSDarwin() ||
TM.getRelocationModel() == Reloc::Static))
GV->setDSOLocal(true);
diff --git a/llvm/lib/IR/AsmWriter.cpp b/llvm/lib/IR/AsmWriter.cpp
index 38c191a2dec6..84690f026139 100644
--- a/llvm/lib/IR/AsmWriter.cpp
+++ b/llvm/lib/IR/AsmWriter.cpp
@@ -363,6 +363,9 @@ static void PrintCallingConv(unsigned cc, raw_ostream &Out) {
case CallingConv::AMDGPU_KERNEL: Out << "amdgpu_kernel"; break;
case CallingConv::AMDGPU_Gfx: Out << "amdgpu_gfx"; break;
case CallingConv::M68k_RTD: Out << "m68k_rtdcc"; break;
+ case CallingConv::RISCV_VectorCall:
+ Out << "riscv_vector_cc";
+ break;
}
}
diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp
index 33f358440a31..e16572540f96 100644
--- a/llvm/lib/IR/Verifier.cpp
+++ b/llvm/lib/IR/Verifier.cpp
@@ -5096,6 +5096,9 @@ void Verifier::visitInstruction(Instruction &I) {
if (MDNode *TBAA = I.getMetadata(LLVMContext::MD_tbaa))
TBAAVerifyHelper.visitTBAAMetadata(I, TBAA);
+ if (MDNode *TBAA = I.getMetadata(LLVMContext::MD_tbaa_struct))
+ TBAAVerifyHelper.visitTBAAStructMetadata(I, TBAA);
+
if (MDNode *MD = I.getMetadata(LLVMContext::MD_noalias))
visitAliasScopeListMetadata(MD);
if (MDNode *MD = I.getMetadata(LLVMContext::MD_alias_scope))
@@ -7419,6 +7422,35 @@ bool TBAAVerifier::visitTBAAMetadata(Instruction &I, const MDNode *MD) {
return true;
}
+bool TBAAVerifier::visitTBAAStructMetadata(Instruction &I, const MDNode *MD) {
+ CheckTBAA(MD->getNumOperands() % 3 == 0,
+ "tbaa.struct operands must occur in groups of three", &I, MD);
+
+ // Each group of three operands must consist of two integers and a
+ // tbaa node. Moreover, the regions described by the offset and size
+ // operands must be non-overlapping.
+ std::optional<APInt> NextFree;
+ for (unsigned int Idx = 0; Idx < MD->getNumOperands(); Idx += 3) {
+ auto *OffsetCI =
+ mdconst::dyn_extract_or_null<ConstantInt>(MD->getOperand(Idx));
+ CheckTBAA(OffsetCI, "Offset must be a constant integer", &I, MD);
+
+ auto *SizeCI =
+ mdconst::dyn_extract_or_null<ConstantInt>(MD->getOperand(Idx + 1));
+ CheckTBAA(SizeCI, "Size must be a constant integer", &I, MD);
+
+ MDNode *TBAA = dyn_cast_or_null<MDNode>(MD->getOperand(Idx + 2));
+ CheckTBAA(TBAA, "TBAA tag missing", &I, MD);
+ visitTBAAMetadata(I, TBAA);
+
+ bool NonOverlapping = !NextFree || NextFree->ule(OffsetCI->getValue());
+ CheckTBAA(NonOverlapping, "Overlapping tbaa.struct regions", &I, MD);
+
+ NextFree = OffsetCI->getValue() + SizeCI->getValue();
+ }
+ return true;
+}
+
char VerifierLegacyPass::ID = 0;
INITIALIZE_PASS(VerifierLegacyPass, "verify", "Module Verifier", false, false)
diff --git a/llvm/lib/Object/COFFImportFile.cpp b/llvm/lib/Object/COFFImportFile.cpp
index 8224a1492502..477c5bf98249 100644
--- a/llvm/lib/Object/COFFImportFile.cpp
+++ b/llvm/lib/Object/COFFImportFile.cpp
@@ -690,12 +690,12 @@ Error writeImportLibrary(StringRef ImportName, StringRef Path,
if (ImportType == IMPORT_CODE && isArm64EC(M)) {
if (std::optional<std::string> MangledName =
getArm64ECMangledFunctionName(Name)) {
- if (ExportName.empty()) {
+ if (!E.Noname && ExportName.empty()) {
NameType = IMPORT_NAME_EXPORTAS;
ExportName.swap(Name);
}
Name = std::move(*MangledName);
- } else if (ExportName.empty()) {
+ } else if (!E.Noname && ExportName.empty()) {
NameType = IMPORT_NAME_EXPORTAS;
ExportName = std::move(*getArm64ECDemangledFunctionName(Name));
}
diff --git a/llvm/lib/Object/GOFFObjectFile.cpp b/llvm/lib/Object/GOFFObjectFile.cpp
index 76a13559ebfe..2845d9362544 100644
--- a/llvm/lib/Object/GOFFObjectFile.cpp
+++ b/llvm/lib/Object/GOFFObjectFile.cpp
@@ -168,6 +168,11 @@ GOFFObjectFile::GOFFObjectFile(MemoryBufferRef Object, Error &Err)
LLVM_DEBUG(dbgs() << " -- ESD " << EsdId << "\n");
break;
}
+ case GOFF::RT_TXT:
+ // Save TXT records.
+ TextPtrs.emplace_back(I);
+ LLVM_DEBUG(dbgs() << " -- TXT\n");
+ break;
case GOFF::RT_END:
LLVM_DEBUG(dbgs() << " -- END (GOFF record type) unhandled\n");
break;
@@ -364,6 +369,13 @@ GOFFObjectFile::getSymbolSection(DataRefImpl Symb) const {
std::to_string(SymEdId));
}
+uint64_t GOFFObjectFile::getSymbolSize(DataRefImpl Symb) const {
+ const uint8_t *Record = getSymbolEsdRecord(Symb);
+ uint32_t Length;
+ ESDRecord::getLength(Record, Length);
+ return Length;
+}
+
const uint8_t *GOFFObjectFile::getSectionEdEsdRecord(DataRefImpl &Sec) const {
SectionEntryImpl EsdIds = SectionList[Sec.d.a];
const uint8_t *EsdRecord = EsdPtrs[EsdIds.d.a];
@@ -394,6 +406,154 @@ GOFFObjectFile::getSectionPrEsdRecord(uint32_t SectionIndex) const {
return EsdRecord;
}
+uint32_t GOFFObjectFile::getSectionDefEsdId(DataRefImpl &Sec) const {
+ const uint8_t *EsdRecord = getSectionEdEsdRecord(Sec);
+ uint32_t Length;
+ ESDRecord::getLength(EsdRecord, Length);
+ if (Length == 0) {
+ const uint8_t *PrEsdRecord = getSectionPrEsdRecord(Sec);
+ if (PrEsdRecord)
+ EsdRecord = PrEsdRecord;
+ }
+
+ uint32_t DefEsdId;
+ ESDRecord::getEsdId(EsdRecord, DefEsdId);
+ LLVM_DEBUG(dbgs() << "Got def EsdId: " << DefEsdId << '\n');
+ return DefEsdId;
+}
+
+void GOFFObjectFile::moveSectionNext(DataRefImpl &Sec) const {
+ Sec.d.a++;
+ if ((Sec.d.a) >= SectionList.size())
+ Sec.d.a = 0;
+}
+
+Expected<StringRef> GOFFObjectFile::getSectionName(DataRefImpl Sec) const {
+ DataRefImpl EdSym;
+ SectionEntryImpl EsdIds = SectionList[Sec.d.a];
+ EdSym.d.a = EsdIds.d.a;
+ Expected<StringRef> Name = getSymbolName(EdSym);
+ if (Name) {
+ StringRef Res = *Name;
+ LLVM_DEBUG(dbgs() << "Got section: " << Res << '\n');
+ LLVM_DEBUG(dbgs() << "Final section name: " << Res << '\n');
+ Name = Res;
+ }
+ return Name;
+}
+
+uint64_t GOFFObjectFile::getSectionAddress(DataRefImpl Sec) const {
+ uint32_t Offset;
+ const uint8_t *EsdRecord = getSectionEdEsdRecord(Sec);
+ ESDRecord::getOffset(EsdRecord, Offset);
+ return Offset;
+}
+
+uint64_t GOFFObjectFile::getSectionSize(DataRefImpl Sec) const {
+ uint32_t Length;
+ uint32_t DefEsdId = getSectionDefEsdId(Sec);
+ const uint8_t *EsdRecord = EsdPtrs[DefEsdId];
+ ESDRecord::getLength(EsdRecord, Length);
+ LLVM_DEBUG(dbgs() << "Got section size: " << Length << '\n');
+ return static_cast<uint64_t>(Length);
+}
+
+// Unravel TXT records and expand fill characters to produce
+// a contiguous sequence of bytes.
+Expected<ArrayRef<uint8_t>>
+GOFFObjectFile::getSectionContents(DataRefImpl Sec) const {
+ if (SectionDataCache.count(Sec.d.a)) {
+ auto &Buf = SectionDataCache[Sec.d.a];
+ return ArrayRef<uint8_t>(Buf);
+ }
+ uint64_t SectionSize = getSectionSize(Sec);
+ uint32_t DefEsdId = getSectionDefEsdId(Sec);
+
+ const uint8_t *EdEsdRecord = getSectionEdEsdRecord(Sec);
+ bool FillBytePresent;
+ ESDRecord::getFillBytePresent(EdEsdRecord, FillBytePresent);
+ uint8_t FillByte = '\0';
+ if (FillBytePresent)
+ ESDRecord::getFillByteValue(EdEsdRecord, FillByte);
+
+ // Initialize section with fill byte.
+ SmallVector<uint8_t> Data(SectionSize, FillByte);
+
+ // Replace section with content from text records.
+ for (const uint8_t *TxtRecordInt : TextPtrs) {
+ const uint8_t *TxtRecordPtr = TxtRecordInt;
+ uint32_t TxtEsdId;
+ TXTRecord::getElementEsdId(TxtRecordPtr, TxtEsdId);
+ LLVM_DEBUG(dbgs() << "Got txt EsdId: " << TxtEsdId << '\n');
+
+ if (TxtEsdId != DefEsdId)
+ continue;
+
+ uint32_t TxtDataOffset;
+ TXTRecord::getOffset(TxtRecordPtr, TxtDataOffset);
+
+ uint16_t TxtDataSize;
+ TXTRecord::getDataLength(TxtRecordPtr, TxtDataSize);
+
+ LLVM_DEBUG(dbgs() << "Record offset " << TxtDataOffset << ", data size "
+ << TxtDataSize << "\n");
+
+ SmallString<256> CompleteData;
+ CompleteData.reserve(TxtDataSize);
+ if (Error Err = TXTRecord::getData(TxtRecordPtr, CompleteData))
+ return std::move(Err);
+ assert(CompleteData.size() == TxtDataSize && "Wrong length of data");
+ std::copy(CompleteData.data(), CompleteData.data() + TxtDataSize,
+ Data.begin() + TxtDataOffset);
+ }
+ SectionDataCache[Sec.d.a] = Data;
+ return ArrayRef<uint8_t>(Data);
+}
+
+uint64_t GOFFObjectFile::getSectionAlignment(DataRefImpl Sec) const {
+ const uint8_t *EsdRecord = getSectionEdEsdRecord(Sec);
+ GOFF::ESDAlignment Pow2Alignment;
+ ESDRecord::getAlignment(EsdRecord, Pow2Alignment);
+ return 1ULL << static_cast<uint64_t>(Pow2Alignment);
+}
+
+bool GOFFObjectFile::isSectionText(DataRefImpl Sec) const {
+ const uint8_t *EsdRecord = getSectionEdEsdRecord(Sec);
+ GOFF::ESDExecutable Executable;
+ ESDRecord::getExecutable(EsdRecord, Executable);
+ return Executable == GOFF::ESD_EXE_CODE;
+}
+
+bool GOFFObjectFile::isSectionData(DataRefImpl Sec) const {
+ const uint8_t *EsdRecord = getSectionEdEsdRecord(Sec);
+ GOFF::ESDExecutable Executable;
+ ESDRecord::getExecutable(EsdRecord, Executable);
+ return Executable == GOFF::ESD_EXE_DATA;
+}
+
+bool GOFFObjectFile::isSectionNoLoad(DataRefImpl Sec) const {
+ const uint8_t *EsdRecord = getSectionEdEsdRecord(Sec);
+ GOFF::ESDLoadingBehavior LoadingBehavior;
+ ESDRecord::getLoadingBehavior(EsdRecord, LoadingBehavior);
+ return LoadingBehavior == GOFF::ESD_LB_NoLoad;
+}
+
+bool GOFFObjectFile::isSectionReadOnlyData(DataRefImpl Sec) const {
+ if (!isSectionData(Sec))
+ return false;
+
+ const uint8_t *EsdRecord = getSectionEdEsdRecord(Sec);
+ GOFF::ESDLoadingBehavior LoadingBehavior;
+ ESDRecord::getLoadingBehavior(EsdRecord, LoadingBehavior);
+ return LoadingBehavior == GOFF::ESD_LB_Initial;
+}
+
+bool GOFFObjectFile::isSectionZeroInit(DataRefImpl Sec) const {
+ // GOFF uses fill characters and fill characters are applied
+ // on getSectionContents() - so we say false to zero init.
+ return false;
+}
+
section_iterator GOFFObjectFile::section_begin() const {
DataRefImpl Sec;
moveSectionNext(Sec);
@@ -476,6 +636,13 @@ Error ESDRecord::getData(const uint8_t *Record,
return getContinuousData(Record, DataSize, 72, CompleteData);
}
+Error TXTRecord::getData(const uint8_t *Record,
+ SmallString<256> &CompleteData) {
+ uint16_t Length;
+ getDataLength(Record, Length);
+ return getContinuousData(Record, Length, 24, CompleteData);
+}
+
Error ENDRecord::getData(const uint8_t *Record,
SmallString<256> &CompleteData) {
uint16_t Length = getNameLength(Record);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
index 72e8b59e0a40..052b231d62a3 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -22,6 +22,7 @@
#include "AMDKernelCodeT.h"
#include "GCNSubtarget.h"
#include "MCTargetDesc/AMDGPUInstPrinter.h"
+#include "MCTargetDesc/AMDGPUMCKernelDescriptor.h"
#include "MCTargetDesc/AMDGPUTargetStreamer.h"
#include "R600AsmPrinter.h"
#include "SIMachineFunctionInfo.h"
@@ -428,38 +429,43 @@ uint16_t AMDGPUAsmPrinter::getAmdhsaKernelCodeProperties(
return KernelCodeProperties;
}
-amdhsa::kernel_descriptor_t AMDGPUAsmPrinter::getAmdhsaKernelDescriptor(
- const MachineFunction &MF,
- const SIProgramInfo &PI) const {
+MCKernelDescriptor
+AMDGPUAsmPrinter::getAmdhsaKernelDescriptor(const MachineFunction &MF,
+ const SIProgramInfo &PI) const {
const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
const Function &F = MF.getFunction();
const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
+ MCContext &Ctx = MF.getContext();
- amdhsa::kernel_descriptor_t KernelDescriptor;
- memset(&KernelDescriptor, 0x0, sizeof(KernelDescriptor));
+ MCKernelDescriptor KernelDescriptor;
assert(isUInt<32>(PI.ScratchSize));
assert(isUInt<32>(PI.getComputePGMRSrc1(STM)));
assert(isUInt<32>(PI.getComputePGMRSrc2()));
- KernelDescriptor.group_segment_fixed_size = PI.LDSSize;
- KernelDescriptor.private_segment_fixed_size = PI.ScratchSize;
+ KernelDescriptor.group_segment_fixed_size =
+ MCConstantExpr::create(PI.LDSSize, Ctx);
+ KernelDescriptor.private_segment_fixed_size =
+ MCConstantExpr::create(PI.ScratchSize, Ctx);
Align MaxKernArgAlign;
- KernelDescriptor.kernarg_size = STM.getKernArgSegmentSize(F, MaxKernArgAlign);
+ KernelDescriptor.kernarg_size = MCConstantExpr::create(
+ STM.getKernArgSegmentSize(F, MaxKernArgAlign), Ctx);
- KernelDescriptor.compute_pgm_rsrc1 = PI.getComputePGMRSrc1(STM);
- KernelDescriptor.compute_pgm_rsrc2 = PI.getComputePGMRSrc2();
- KernelDescriptor.kernel_code_properties = getAmdhsaKernelCodeProperties(MF);
+ KernelDescriptor.compute_pgm_rsrc1 =
+ MCConstantExpr::create(PI.getComputePGMRSrc1(STM), Ctx);
+ KernelDescriptor.compute_pgm_rsrc2 =
+ MCConstantExpr::create(PI.getComputePGMRSrc2(), Ctx);
+ KernelDescriptor.kernel_code_properties =
+ MCConstantExpr::create(getAmdhsaKernelCodeProperties(MF), Ctx);
assert(STM.hasGFX90AInsts() || CurrentProgramInfo.ComputePGMRSrc3GFX90A == 0);
- if (STM.hasGFX90AInsts())
- KernelDescriptor.compute_pgm_rsrc3 =
- CurrentProgramInfo.ComputePGMRSrc3GFX90A;
+ KernelDescriptor.compute_pgm_rsrc3 = MCConstantExpr::create(
+ STM.hasGFX90AInsts() ? CurrentProgramInfo.ComputePGMRSrc3GFX90A : 0, Ctx);
- if (AMDGPU::hasKernargPreload(STM))
- KernelDescriptor.kernarg_preload =
- static_cast<uint16_t>(Info->getNumKernargPreloadedSGPRs());
+ KernelDescriptor.kernarg_preload = MCConstantExpr::create(
+ AMDGPU::hasKernargPreload(STM) ? Info->getNumKernargPreloadedSGPRs() : 0,
+ Ctx);
return KernelDescriptor;
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
index 79326cd3d328..b8b2718d293e 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
@@ -28,15 +28,12 @@ class MCCodeEmitter;
class MCOperand;
namespace AMDGPU {
+struct MCKernelDescriptor;
namespace HSAMD {
class MetadataStreamer;
}
} // namespace AMDGPU
-namespace amdhsa {
-struct kernel_descriptor_t;
-}
-
class AMDGPUAsmPrinter final : public AsmPrinter {
private:
unsigned CodeObjectVersion;
@@ -75,9 +72,9 @@ private:
uint16_t getAmdhsaKernelCodeProperties(
const MachineFunction &MF) const;
- amdhsa::kernel_descriptor_t getAmdhsaKernelDescriptor(
- const MachineFunction &MF,
- const SIProgramInfo &PI) const;
+ AMDGPU::MCKernelDescriptor
+ getAmdhsaKernelDescriptor(const MachineFunction &MF,
+ const SIProgramInfo &PI) const;
void initTargetStreamer(Module &M);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp b/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp
index 84b4ccc1ae7b..5aa35becd842 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp
@@ -657,6 +657,8 @@ bool AMDGPULibCalls::fold(CallInst *CI) {
return true;
IRBuilder<> B(CI);
+ if (CI->isStrictFP())
+ B.setIsFPConstrained(true);
if (FPMathOperator *FPOp = dyn_cast<FPMathOperator>(CI)) {
// Under unsafe-math, evaluate calls if possible.
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index 4648df199c74..294fc683fe92 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -8,6 +8,7 @@
#include "AMDKernelCodeT.h"
#include "MCTargetDesc/AMDGPUMCExpr.h"
+#include "MCTargetDesc/AMDGPUMCKernelDescriptor.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "MCTargetDesc/AMDGPUTargetStreamer.h"
#include "SIDefines.h"
@@ -5417,7 +5418,9 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
if (getParser().parseIdentifier(KernelName))
return true;
- kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI());
+ AMDGPU::MCKernelDescriptor KD =
+ AMDGPU::MCKernelDescriptor::getDefaultAmdhsaKernelDescriptor(
+ &getSTI(), getContext());
StringSet<> Seen;
@@ -5457,89 +5460,111 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
return TokError(".amdhsa_ directives cannot be repeated");
SMLoc ValStart = getLoc();
- int64_t IVal;
- if (getParser().parseAbsoluteExpression(IVal))
+ const MCExpr *ExprVal;
+ if (getParser().parseExpression(ExprVal))
return true;
SMLoc ValEnd = getLoc();
SMRange ValRange = SMRange(ValStart, ValEnd);
- if (IVal < 0)
- return OutOfRangeError(ValRange);
-
+ int64_t IVal = 0;
uint64_t Val = IVal;
+ bool EvaluatableExpr;
+ if ((EvaluatableExpr = ExprVal->evaluateAsAbsolute(IVal))) {
+ if (IVal < 0)
+ return OutOfRangeError(ValRange);
+ Val = IVal;
+ }
#define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \
- if (!isUInt<ENTRY##_WIDTH>(VALUE)) \
+ if (!isUInt<ENTRY##_WIDTH>(Val)) \
return OutOfRangeError(RANGE); \
- AMDHSA_BITS_SET(FIELD, ENTRY, VALUE);
+ AMDGPU::MCKernelDescriptor::bits_set(FIELD, VALUE, ENTRY##_SHIFT, ENTRY, \
+ getContext());
+
+// Some fields use the parsed value immediately which requires the expression to
+// be solvable.
+#define EXPR_RESOLVE_OR_ERROR(RESOLVED) \
+ if (!(RESOLVED)) \
+ return Error(IDRange.Start, "directive should have resolvable expression", \
+ IDRange);
if (ID == ".amdhsa_group_segment_fixed_size") {
- if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val))
+ if (!isUInt<sizeof(kernel_descriptor_t::group_segment_fixed_size) *
+ CHAR_BIT>(Val))
return OutOfRangeError(ValRange);
- KD.group_segment_fixed_size = Val;
+ KD.group_segment_fixed_size = ExprVal;
} else if (ID == ".amdhsa_private_segment_fixed_size") {
- if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val))
+ if (!isUInt<sizeof(kernel_descriptor_t::private_segment_fixed_size) *
+ CHAR_BIT>(Val))
return OutOfRangeError(ValRange);
- KD.private_segment_fixed_size = Val;
+ KD.private_segment_fixed_size = ExprVal;
} else if (ID == ".amdhsa_kernarg_size") {
- if (!isUInt<sizeof(KD.kernarg_size) * CHAR_BIT>(Val))
+ if (!isUInt<sizeof(kernel_descriptor_t::kernarg_size) * CHAR_BIT>(Val))
return OutOfRangeError(ValRange);
- KD.kernarg_size = Val;
+ KD.kernarg_size = ExprVal;
} else if (ID == ".amdhsa_user_sgpr_count") {
+ EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
ExplicitUserSGPRCount = Val;
} else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
+ EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
if (hasArchitectedFlatScratch())
return Error(IDRange.Start,
"directive is not supported with architected flat scratch",
IDRange);
PARSE_BITS_ENTRY(KD.kernel_code_properties,
KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
- Val, ValRange);
+ ExprVal, ValRange);
if (Val)
ImpliedUserSGPRCount += 4;
} else if (ID == ".amdhsa_user_sgpr_kernarg_preload_length") {
+ EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
if (!hasKernargPreload())
return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
if (Val > getMaxNumUserSGPRs())
return OutOfRangeError(ValRange);
- PARSE_BITS_ENTRY(KD.kernarg_preload, KERNARG_PRELOAD_SPEC_LENGTH, Val,
+ PARSE_BITS_ENTRY(KD.kernarg_preload, KERNARG_PRELOAD_SPEC_LENGTH, ExprVal,
ValRange);
if (Val) {
ImpliedUserSGPRCount += Val;
PreloadLength = Val;
}
} else if (ID == ".amdhsa_user_sgpr_kernarg_preload_offset") {
+ EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
if (!hasKernargPreload())
return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
if (Val >= 1024)
return OutOfRangeError(ValRange);
- PARSE_BITS_ENTRY(KD.kernarg_preload, KERNARG_PRELOAD_SPEC_OFFSET, Val,
+ PARSE_BITS_ENTRY(KD.kernarg_preload, KERNARG_PRELOAD_SPEC_OFFSET, ExprVal,
ValRange);
if (Val)
PreloadOffset = Val;
} else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
+ EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
PARSE_BITS_ENTRY(KD.kernel_code_properties,
- KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
+ KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, ExprVal,
ValRange);
if (Val)
ImpliedUserSGPRCount += 2;
} else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
+ EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
PARSE_BITS_ENTRY(KD.kernel_code_properties,
- KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
+ KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, ExprVal,
ValRange);
if (Val)
ImpliedUserSGPRCount += 2;
} else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
+ EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
PARSE_BITS_ENTRY(KD.kernel_code_properties,
KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
- Val, ValRange);
+ ExprVal, ValRange);
if (Val)
ImpliedUserSGPRCount += 2;
} else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
+ EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
PARSE_BITS_ENTRY(KD.kernel_code_properties,
- KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
+ KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, ExprVal,
ValRange);
if (Val)
ImpliedUserSGPRCount += 2;
@@ -5548,34 +5573,39 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
return Error(IDRange.Start,
"directive is not supported with architected flat scratch",
IDRange);
+ EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
PARSE_BITS_ENTRY(KD.kernel_code_properties,
- KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
- ValRange);
+ KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT,
+ ExprVal, ValRange);
if (Val)
ImpliedUserSGPRCount += 2;
} else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
+ EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
PARSE_BITS_ENTRY(KD.kernel_code_properties,
KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
- Val, ValRange);
+ ExprVal, ValRange);
if (Val)
ImpliedUserSGPRCount += 1;
} else if (ID == ".amdhsa_wavefront_size32") {
+ EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
if (IVersion.Major < 10)
return Error(IDRange.Start, "directive requires gfx10+", IDRange);
EnableWavefrontSize32 = Val;
PARSE_BITS_ENTRY(KD.kernel_code_properties,
- KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
- Val, ValRange);
+ KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, ExprVal,
+ ValRange);
} else if (ID == ".amdhsa_uses_dynamic_stack") {
PARSE_BITS_ENTRY(KD.kernel_code_properties,
- KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK, Val, ValRange);
+ KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK, ExprVal,
+ ValRange);
} else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
if (hasArchitectedFlatScratch())
return Error(IDRange.Start,
"directive is not supported with architected flat scratch",
IDRange);
PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
- COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange);
+ COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, ExprVal,
+ ValRange);
} else if (ID == ".amdhsa_enable_private_segment") {
if (!hasArchitectedFlatScratch())
return Error(
@@ -5583,42 +5613,48 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
"directive is not supported without architected flat scratch",
IDRange);
PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
- COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange);
+ COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, ExprVal,
+ ValRange);
} else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
- COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val,
+ COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, ExprVal,
ValRange);
} else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
- COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val,
+ COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, ExprVal,
ValRange);
} else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
- COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val,
+ COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, ExprVal,
ValRange);
} else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
- COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val,
+ COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, ExprVal,
ValRange);
} else if (ID == ".amdhsa_system_vgpr_workitem_id") {
PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
- COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val,
+ COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, ExprVal,
ValRange);
} else if (ID == ".amdhsa_next_free_vgpr") {
+ EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
VGPRRange = ValRange;
NextFreeVGPR = Val;
} else if (ID == ".amdhsa_next_free_sgpr") {
+ EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
SGPRRange = ValRange;
NextFreeSGPR = Val;
} else if (ID == ".amdhsa_accum_offset") {
if (!isGFX90A())
return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
+ EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
AccumOffset = Val;
} else if (ID == ".amdhsa_reserve_vcc") {
+ EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
if (!isUInt<1>(Val))
return OutOfRangeError(ValRange);
ReserveVCC = Val;
} else if (ID == ".amdhsa_reserve_flat_scratch") {
+ EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
if (IVersion.Major < 7)
return Error(IDRange.Start, "directive requires gfx7+", IDRange);
if (hasArchitectedFlatScratch())
@@ -5638,97 +5674,105 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
IDRange);
} else if (ID == ".amdhsa_float_round_mode_32") {
PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
- COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange);
+ COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, ExprVal,
+ ValRange);
} else if (ID == ".amdhsa_float_round_mode_16_64") {
PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
- COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange);
+ COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, ExprVal,
+ ValRange);
} else if (ID == ".amdhsa_float_denorm_mode_32") {
PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
- COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange);
+ COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, ExprVal,
+ ValRange);
} else if (ID == ".amdhsa_float_denorm_mode_16_64") {
PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
- COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val,
+ COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, ExprVal,
ValRange);
} else if (ID == ".amdhsa_dx10_clamp") {
if (IVersion.Major >= 12)
return Error(IDRange.Start, "directive unsupported on gfx12+", IDRange);
PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
- COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP, Val,
+ COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP, ExprVal,
ValRange);
} else if (ID == ".amdhsa_ieee_mode") {
if (IVersion.Major >= 12)
return Error(IDRange.Start, "directive unsupported on gfx12+", IDRange);
PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
- COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE, Val,
+ COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE, ExprVal,
ValRange);
} else if (ID == ".amdhsa_fp16_overflow") {
if (IVersion.Major < 9)
return Error(IDRange.Start, "directive requires gfx9+", IDRange);
- PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL, Val,
+ PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
+ COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL, ExprVal,
ValRange);
} else if (ID == ".amdhsa_tg_split") {
if (!isGFX90A())
return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
- PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, Val,
- ValRange);
+ PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT,
+ ExprVal, ValRange);
} else if (ID == ".amdhsa_workgroup_processor_mode") {
if (IVersion.Major < 10)
return Error(IDRange.Start, "directive requires gfx10+", IDRange);
- PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE, Val,
+ PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
+ COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE, ExprVal,
ValRange);
} else if (ID == ".amdhsa_memory_ordered") {
if (IVersion.Major < 10)
return Error(IDRange.Start, "directive requires gfx10+", IDRange);
- PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED, Val,
+ PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
+ COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED, ExprVal,
ValRange);
} else if (ID == ".amdhsa_forward_progress") {
if (IVersion.Major < 10)
return Error(IDRange.Start, "directive requires gfx10+", IDRange);
- PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS, Val,
+ PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
+ COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS, ExprVal,
ValRange);
} else if (ID == ".amdhsa_shared_vgpr_count") {
+ EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
if (IVersion.Major < 10 || IVersion.Major >= 12)
return Error(IDRange.Start, "directive requires gfx10 or gfx11",
IDRange);
SharedVGPRCount = Val;
PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3,
- COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT, Val,
+ COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT, ExprVal,
ValRange);
} else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
PARSE_BITS_ENTRY(
KD.compute_pgm_rsrc2,
- COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val,
- ValRange);
+ COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION,
+ ExprVal, ValRange);
} else if (ID == ".amdhsa_exception_fp_denorm_src") {
PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
- Val, ValRange);
+ ExprVal, ValRange);
} else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
PARSE_BITS_ENTRY(
KD.compute_pgm_rsrc2,
- COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val,
- ValRange);
+ COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO,
+ ExprVal, ValRange);
} else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
- Val, ValRange);
+ ExprVal, ValRange);
} else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
- Val, ValRange);
+ ExprVal, ValRange);
} else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
- Val, ValRange);
+ ExprVal, ValRange);
} else if (ID == ".amdhsa_exception_int_div_zero") {
PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
- Val, ValRange);
+ ExprVal, ValRange);
} else if (ID == ".amdhsa_round_robin_scheduling") {
if (IVersion.Major < 12)
return Error(IDRange.Start, "directive requires gfx12+", IDRange);
PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
- COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN, Val,
+ COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN, ExprVal,
ValRange);
} else {
return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange);
@@ -5755,15 +5799,18 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
VGPRBlocks))
return OutOfRangeError(VGPRRange);
- AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
- COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks);
+ AMDGPU::MCKernelDescriptor::bits_set(
+ KD.compute_pgm_rsrc1, MCConstantExpr::create(VGPRBlocks, getContext()),
+ COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_SHIFT,
+ COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, getContext());
if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
SGPRBlocks))
return OutOfRangeError(SGPRRange);
- AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
- COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
- SGPRBlocks);
+ AMDGPU::MCKernelDescriptor::bits_set(
+ KD.compute_pgm_rsrc1, MCConstantExpr::create(SGPRBlocks, getContext()),
+ COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_SHIFT,
+ COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, getContext());
if (ExplicitUserSGPRCount && ImpliedUserSGPRCount > *ExplicitUserSGPRCount)
return TokError("amdgpu_user_sgpr_count smaller than than implied by "
@@ -5774,11 +5821,17 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
return TokError("too many user SGPRs enabled");
- AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT,
- UserSGPRCount);
-
- if (PreloadLength && KD.kernarg_size &&
- (PreloadLength * 4 + PreloadOffset * 4 > KD.kernarg_size))
+ AMDGPU::MCKernelDescriptor::bits_set(
+ KD.compute_pgm_rsrc2, MCConstantExpr::create(UserSGPRCount, getContext()),
+ COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_SHIFT,
+ COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, getContext());
+
+ int64_t IVal = 0;
+ if (!KD.kernarg_size->evaluateAsAbsolute(IVal))
+ return TokError("Kernarg size should be resolvable");
+ uint64_t kernarg_size = IVal;
+ if (PreloadLength && kernarg_size &&
+ (PreloadLength * 4 + PreloadOffset * 4 > kernarg_size))
return TokError("Kernarg preload length + offset is larger than the "
"kernarg segment size");
@@ -5790,8 +5843,11 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
"increments of 4");
if (AccumOffset > alignTo(std::max((uint64_t)1, NextFreeVGPR), 4))
return TokError("accum_offset exceeds total VGPR allocation");
- AMDHSA_BITS_SET(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET,
- (AccumOffset / 4 - 1));
+ MCKernelDescriptor::bits_set(
+ KD.compute_pgm_rsrc3,
+ MCConstantExpr::create(AccumOffset / 4 - 1, getContext()),
+ COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET_SHIFT,
+ COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET, getContext());
}
if (IVersion.Major >= 10 && IVersion.Major < 12) {
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCKernelDescriptor.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCKernelDescriptor.cpp
new file mode 100644
index 000000000000..77e7e30ff528
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCKernelDescriptor.cpp
@@ -0,0 +1,98 @@
+//===--- AMDHSAKernelDescriptor.h -----------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPUMCKernelDescriptor.h"
+#include "AMDGPUMCTargetDesc.h"
+#include "Utils/AMDGPUBaseInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/TargetParser/TargetParser.h"
+
+using namespace llvm;
+using namespace llvm::AMDGPU;
+
+MCKernelDescriptor
+MCKernelDescriptor::getDefaultAmdhsaKernelDescriptor(const MCSubtargetInfo *STI,
+ MCContext &Ctx) {
+ IsaVersion Version = getIsaVersion(STI->getCPU());
+
+ MCKernelDescriptor KD;
+ const MCExpr *ZeroMCExpr = MCConstantExpr::create(0, Ctx);
+ const MCExpr *OneMCExpr = MCConstantExpr::create(1, Ctx);
+
+ KD.group_segment_fixed_size = ZeroMCExpr;
+ KD.private_segment_fixed_size = ZeroMCExpr;
+ KD.compute_pgm_rsrc1 = ZeroMCExpr;
+ KD.compute_pgm_rsrc2 = ZeroMCExpr;
+ KD.compute_pgm_rsrc3 = ZeroMCExpr;
+ KD.kernarg_size = ZeroMCExpr;
+ KD.kernel_code_properties = ZeroMCExpr;
+ KD.kernarg_preload = ZeroMCExpr;
+
+ MCKernelDescriptor::bits_set(
+ KD.compute_pgm_rsrc1,
+ MCConstantExpr::create(amdhsa::FLOAT_DENORM_MODE_FLUSH_NONE, Ctx),
+ amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64_SHIFT,
+ amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Ctx);
+ if (Version.Major < 12) {
+ MCKernelDescriptor::bits_set(
+ KD.compute_pgm_rsrc1, OneMCExpr,
+ amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP_SHIFT,
+ amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP, Ctx);
+ MCKernelDescriptor::bits_set(
+ KD.compute_pgm_rsrc1, OneMCExpr,
+ amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE_SHIFT,
+ amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE, Ctx);
+ }
+ MCKernelDescriptor::bits_set(
+ KD.compute_pgm_rsrc2, OneMCExpr,
+ amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X_SHIFT,
+ amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Ctx);
+ if (Version.Major >= 10) {
+ if (STI->getFeatureBits().test(FeatureWavefrontSize32))
+ MCKernelDescriptor::bits_set(
+ KD.kernel_code_properties, OneMCExpr,
+ amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32_SHIFT,
+ amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, Ctx);
+ if (!STI->getFeatureBits().test(FeatureCuMode))
+ MCKernelDescriptor::bits_set(
+ KD.compute_pgm_rsrc1, OneMCExpr,
+ amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE_SHIFT,
+ amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE, Ctx);
+
+ MCKernelDescriptor::bits_set(
+ KD.compute_pgm_rsrc1, OneMCExpr,
+ amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED_SHIFT,
+ amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED, Ctx);
+ }
+ if (AMDGPU::isGFX90A(*STI) && STI->getFeatureBits().test(FeatureTgSplit))
+ MCKernelDescriptor::bits_set(
+ KD.compute_pgm_rsrc3, OneMCExpr,
+ amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT_SHIFT,
+ amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, Ctx);
+ return KD;
+}
+
+void MCKernelDescriptor::bits_set(const MCExpr *&Dst, const MCExpr *Value,
+ uint32_t Shift, uint32_t Mask,
+ MCContext &Ctx) {
+ auto Sft = MCConstantExpr::create(Shift, Ctx);
+ auto Msk = MCConstantExpr::create(Mask, Ctx);
+ Dst = MCBinaryExpr::createAnd(Dst, MCUnaryExpr::createNot(Msk, Ctx), Ctx);
+ Dst = MCBinaryExpr::createOr(Dst, MCBinaryExpr::createShl(Value, Sft, Ctx),
+ Ctx);
+}
+
+const MCExpr *MCKernelDescriptor::bits_get(const MCExpr *Src, uint32_t Shift,
+ uint32_t Mask, MCContext &Ctx) {
+ auto Sft = MCConstantExpr::create(Shift, Ctx);
+ auto Msk = MCConstantExpr::create(Mask, Ctx);
+ return MCBinaryExpr::createLShr(MCBinaryExpr::createAnd(Src, Msk, Ctx), Sft,
+ Ctx);
+}
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCKernelDescriptor.h b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCKernelDescriptor.h
new file mode 100644
index 000000000000..26958ac8b9ee
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCKernelDescriptor.h
@@ -0,0 +1,54 @@
+//===--- AMDGPUMCKernelDescriptor.h ---------------------------*- C++ -*---===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// AMDHSA kernel descriptor MCExpr struct for use in MC layer. Uses
+/// AMDHSAKernelDescriptor.h for sizes and constants.
+///
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUMCKERNELDESCRIPTOR_H
+#define LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUMCKERNELDESCRIPTOR_H
+
+#include "llvm/Support/AMDHSAKernelDescriptor.h"
+
+namespace llvm {
+class MCExpr;
+class MCContext;
+class MCSubtargetInfo;
+namespace AMDGPU {
+
+struct MCKernelDescriptor {
+ const MCExpr *group_segment_fixed_size = nullptr;
+ const MCExpr *private_segment_fixed_size = nullptr;
+ const MCExpr *kernarg_size = nullptr;
+ const MCExpr *compute_pgm_rsrc3 = nullptr;
+ const MCExpr *compute_pgm_rsrc1 = nullptr;
+ const MCExpr *compute_pgm_rsrc2 = nullptr;
+ const MCExpr *kernel_code_properties = nullptr;
+ const MCExpr *kernarg_preload = nullptr;
+
+ static MCKernelDescriptor
+ getDefaultAmdhsaKernelDescriptor(const MCSubtargetInfo *STI, MCContext &Ctx);
+ // MCExpr for:
+ // Dst = Dst & ~Mask
+ // Dst = Dst | (Value << Shift)
+ static void bits_set(const MCExpr *&Dst, const MCExpr *Value, uint32_t Shift,
+ uint32_t Mask, MCContext &Ctx);
+
+ // MCExpr for:
+ // return (Src & Mask) >> Shift
+ static const MCExpr *bits_get(const MCExpr *Src, uint32_t Shift,
+ uint32_t Mask, MCContext &Ctx);
+};
+
+} // end namespace AMDGPU
+} // end namespace llvm
+
+#endif // LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUMCKERNELDESCRIPTOR_H
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
index 4742b0b3e52e..3006fcdb9282 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
@@ -11,6 +11,7 @@
//===----------------------------------------------------------------------===//
#include "AMDGPUTargetStreamer.h"
+#include "AMDGPUMCKernelDescriptor.h"
#include "AMDGPUPTNote.h"
#include "AMDKernelCodeT.h"
#include "Utils/AMDGPUBaseInfo.h"
@@ -307,94 +308,142 @@ bool AMDGPUTargetAsmStreamer::EmitCodeEnd(const MCSubtargetInfo &STI) {
void AMDGPUTargetAsmStreamer::EmitAmdhsaKernelDescriptor(
const MCSubtargetInfo &STI, StringRef KernelName,
- const amdhsa::kernel_descriptor_t &KD, uint64_t NextVGPR, uint64_t NextSGPR,
+ const MCKernelDescriptor &KD, uint64_t NextVGPR, uint64_t NextSGPR,
bool ReserveVCC, bool ReserveFlatScr) {
IsaVersion IVersion = getIsaVersion(STI.getCPU());
+ const MCAsmInfo *MAI = getContext().getAsmInfo();
OS << "\t.amdhsa_kernel " << KernelName << '\n';
-#define PRINT_FIELD(STREAM, DIRECTIVE, KERNEL_DESC, MEMBER_NAME, FIELD_NAME) \
- STREAM << "\t\t" << DIRECTIVE << " " \
- << AMDHSA_BITS_GET(KERNEL_DESC.MEMBER_NAME, FIELD_NAME) << '\n';
-
- OS << "\t\t.amdhsa_group_segment_fixed_size " << KD.group_segment_fixed_size
- << '\n';
- OS << "\t\t.amdhsa_private_segment_fixed_size "
- << KD.private_segment_fixed_size << '\n';
- OS << "\t\t.amdhsa_kernarg_size " << KD.kernarg_size << '\n';
-
- PRINT_FIELD(OS, ".amdhsa_user_sgpr_count", KD,
- compute_pgm_rsrc2,
- amdhsa::COMPUTE_PGM_RSRC2_USER_SGPR_COUNT);
+ auto PrintField = [&](const MCExpr *Expr, uint32_t Shift, uint32_t Mask,
+ StringRef Directive) {
+ int64_t IVal;
+ OS << "\t\t" << Directive << ' ';
+ const MCExpr *pgm_rsrc1_bits =
+ MCKernelDescriptor::bits_get(Expr, Shift, Mask, getContext());
+ if (pgm_rsrc1_bits->evaluateAsAbsolute(IVal))
+ OS << static_cast<uint64_t>(IVal);
+ else
+ pgm_rsrc1_bits->print(OS, MAI);
+ OS << '\n';
+ };
+
+ OS << "\t\t.amdhsa_group_segment_fixed_size ";
+ KD.group_segment_fixed_size->print(OS, MAI);
+ OS << '\n';
+
+ OS << "\t\t.amdhsa_private_segment_fixed_size ";
+ KD.private_segment_fixed_size->print(OS, MAI);
+ OS << '\n';
+
+ OS << "\t\t.amdhsa_kernarg_size ";
+ KD.kernarg_size->print(OS, MAI);
+ OS << '\n';
+
+ PrintField(
+ KD.compute_pgm_rsrc2, amdhsa::COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_SHIFT,
+ amdhsa::COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, ".amdhsa_user_sgpr_count");
if (!hasArchitectedFlatScratch(STI))
- PRINT_FIELD(
- OS, ".amdhsa_user_sgpr_private_segment_buffer", KD,
- kernel_code_properties,
- amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER);
- PRINT_FIELD(OS, ".amdhsa_user_sgpr_dispatch_ptr", KD,
- kernel_code_properties,
- amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR);
- PRINT_FIELD(OS, ".amdhsa_user_sgpr_queue_ptr", KD,
- kernel_code_properties,
- amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR);
- PRINT_FIELD(OS, ".amdhsa_user_sgpr_kernarg_segment_ptr", KD,
- kernel_code_properties,
- amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR);
- PRINT_FIELD(OS, ".amdhsa_user_sgpr_dispatch_id", KD,
- kernel_code_properties,
- amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID);
+ PrintField(
+ KD.kernel_code_properties,
+ amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER_SHIFT,
+ amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
+ ".amdhsa_user_sgpr_private_segment_buffer");
+ PrintField(KD.kernel_code_properties,
+ amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR_SHIFT,
+ amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR,
+ ".amdhsa_user_sgpr_dispatch_ptr");
+ PrintField(KD.kernel_code_properties,
+ amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR_SHIFT,
+ amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR,
+ ".amdhsa_user_sgpr_queue_ptr");
+ PrintField(KD.kernel_code_properties,
+ amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR_SHIFT,
+ amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
+ ".amdhsa_user_sgpr_kernarg_segment_ptr");
+ PrintField(KD.kernel_code_properties,
+ amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID_SHIFT,
+ amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID,
+ ".amdhsa_user_sgpr_dispatch_id");
if (!hasArchitectedFlatScratch(STI))
- PRINT_FIELD(OS, ".amdhsa_user_sgpr_flat_scratch_init", KD,
- kernel_code_properties,
- amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT);
+ PrintField(KD.kernel_code_properties,
+ amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT_SHIFT,
+ amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT,
+ ".amdhsa_user_sgpr_flat_scratch_init");
if (hasKernargPreload(STI)) {
- PRINT_FIELD(OS, ".amdhsa_user_sgpr_kernarg_preload_length ", KD,
- kernarg_preload, amdhsa::KERNARG_PRELOAD_SPEC_LENGTH);
- PRINT_FIELD(OS, ".amdhsa_user_sgpr_kernarg_preload_offset ", KD,
- kernarg_preload, amdhsa::KERNARG_PRELOAD_SPEC_OFFSET);
+ PrintField(KD.kernarg_preload, amdhsa::KERNARG_PRELOAD_SPEC_LENGTH_SHIFT,
+ amdhsa::KERNARG_PRELOAD_SPEC_LENGTH,
+ ".amdhsa_user_sgpr_kernarg_preload_length");
+ PrintField(KD.kernarg_preload, amdhsa::KERNARG_PRELOAD_SPEC_OFFSET_SHIFT,
+ amdhsa::KERNARG_PRELOAD_SPEC_OFFSET,
+ ".amdhsa_user_sgpr_kernarg_preload_offset");
}
- PRINT_FIELD(OS, ".amdhsa_user_sgpr_private_segment_size", KD,
- kernel_code_properties,
- amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE);
+ PrintField(
+ KD.kernel_code_properties,
+ amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE_SHIFT,
+ amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
+ ".amdhsa_user_sgpr_private_segment_size");
if (IVersion.Major >= 10)
- PRINT_FIELD(OS, ".amdhsa_wavefront_size32", KD,
- kernel_code_properties,
- amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32);
+ PrintField(KD.kernel_code_properties,
+ amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32_SHIFT,
+ amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
+ ".amdhsa_wavefront_size32");
if (CodeObjectVersion >= AMDGPU::AMDHSA_COV5)
- PRINT_FIELD(OS, ".amdhsa_uses_dynamic_stack", KD, kernel_code_properties,
- amdhsa::KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK);
- PRINT_FIELD(OS,
- (hasArchitectedFlatScratch(STI)
- ? ".amdhsa_enable_private_segment"
- : ".amdhsa_system_sgpr_private_segment_wavefront_offset"),
- KD, compute_pgm_rsrc2,
- amdhsa::COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT);
- PRINT_FIELD(OS, ".amdhsa_system_sgpr_workgroup_id_x", KD,
- compute_pgm_rsrc2,
- amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X);
- PRINT_FIELD(OS, ".amdhsa_system_sgpr_workgroup_id_y", KD,
- compute_pgm_rsrc2,
- amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y);
- PRINT_FIELD(OS, ".amdhsa_system_sgpr_workgroup_id_z", KD,
- compute_pgm_rsrc2,
- amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z);
- PRINT_FIELD(OS, ".amdhsa_system_sgpr_workgroup_info", KD,
- compute_pgm_rsrc2,
- amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO);
- PRINT_FIELD(OS, ".amdhsa_system_vgpr_workitem_id", KD,
- compute_pgm_rsrc2,
- amdhsa::COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID);
+ PrintField(KD.kernel_code_properties,
+ amdhsa::KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK_SHIFT,
+ amdhsa::KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK,
+ ".amdhsa_uses_dynamic_stack");
+ PrintField(KD.compute_pgm_rsrc2,
+ amdhsa::COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT_SHIFT,
+ amdhsa::COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT,
+ (hasArchitectedFlatScratch(STI)
+ ? ".amdhsa_enable_private_segment"
+ : ".amdhsa_system_sgpr_private_segment_wavefront_offset"));
+ PrintField(KD.compute_pgm_rsrc2,
+ amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X_SHIFT,
+ amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X,
+ ".amdhsa_system_sgpr_workgroup_id_x");
+ PrintField(KD.compute_pgm_rsrc2,
+ amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y_SHIFT,
+ amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y,
+ ".amdhsa_system_sgpr_workgroup_id_y");
+ PrintField(KD.compute_pgm_rsrc2,
+ amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z_SHIFT,
+ amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z,
+ ".amdhsa_system_sgpr_workgroup_id_z");
+ PrintField(KD.compute_pgm_rsrc2,
+ amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO_SHIFT,
+ amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO,
+ ".amdhsa_system_sgpr_workgroup_info");
+ PrintField(KD.compute_pgm_rsrc2,
+ amdhsa::COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID_SHIFT,
+ amdhsa::COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID,
+ ".amdhsa_system_vgpr_workitem_id");
// These directives are required.
OS << "\t\t.amdhsa_next_free_vgpr " << NextVGPR << '\n';
OS << "\t\t.amdhsa_next_free_sgpr " << NextSGPR << '\n';
- if (AMDGPU::isGFX90A(STI))
- OS << "\t\t.amdhsa_accum_offset " <<
- (AMDHSA_BITS_GET(KD.compute_pgm_rsrc3,
- amdhsa::COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET) + 1) * 4
- << '\n';
+ if (AMDGPU::isGFX90A(STI)) {
+ // MCExpr equivalent of taking the (accum_offset + 1) * 4.
+ const MCExpr *accum_bits = MCKernelDescriptor::bits_get(
+ KD.compute_pgm_rsrc3,
+ amdhsa::COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET_SHIFT,
+ amdhsa::COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET, getContext());
+ accum_bits = MCBinaryExpr::createAdd(
+ accum_bits, MCConstantExpr::create(1, getContext()), getContext());
+ accum_bits = MCBinaryExpr::createMul(
+ accum_bits, MCConstantExpr::create(4, getContext()), getContext());
+ OS << "\t\t.amdhsa_accum_offset ";
+ int64_t IVal;
+ if (accum_bits->evaluateAsAbsolute(IVal)) {
+ OS << static_cast<uint64_t>(IVal);
+ } else {
+ accum_bits->print(OS, MAI);
+ }
+ OS << '\n';
+ }
if (!ReserveVCC)
OS << "\t\t.amdhsa_reserve_vcc " << ReserveVCC << '\n';
@@ -411,74 +460,105 @@ void AMDGPUTargetAsmStreamer::EmitAmdhsaKernelDescriptor(
break;
}
- PRINT_FIELD(OS, ".amdhsa_float_round_mode_32", KD,
- compute_pgm_rsrc1,
- amdhsa::COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32);
- PRINT_FIELD(OS, ".amdhsa_float_round_mode_16_64", KD,
- compute_pgm_rsrc1,
- amdhsa::COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64);
- PRINT_FIELD(OS, ".amdhsa_float_denorm_mode_32", KD,
- compute_pgm_rsrc1,
- amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32);
- PRINT_FIELD(OS, ".amdhsa_float_denorm_mode_16_64", KD,
- compute_pgm_rsrc1,
- amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64);
+ PrintField(KD.compute_pgm_rsrc1,
+ amdhsa::COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32_SHIFT,
+ amdhsa::COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32,
+ ".amdhsa_float_round_mode_32");
+ PrintField(KD.compute_pgm_rsrc1,
+ amdhsa::COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64_SHIFT,
+ amdhsa::COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64,
+ ".amdhsa_float_round_mode_16_64");
+ PrintField(KD.compute_pgm_rsrc1,
+ amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32_SHIFT,
+ amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32,
+ ".amdhsa_float_denorm_mode_32");
+ PrintField(KD.compute_pgm_rsrc1,
+ amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64_SHIFT,
+ amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64,
+ ".amdhsa_float_denorm_mode_16_64");
if (IVersion.Major < 12) {
- PRINT_FIELD(OS, ".amdhsa_dx10_clamp", KD, compute_pgm_rsrc1,
- amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP);
- PRINT_FIELD(OS, ".amdhsa_ieee_mode", KD, compute_pgm_rsrc1,
- amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE);
+ PrintField(KD.compute_pgm_rsrc1,
+ amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP_SHIFT,
+ amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP,
+ ".amdhsa_dx10_clamp");
+ PrintField(KD.compute_pgm_rsrc1,
+ amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE_SHIFT,
+ amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE,
+ ".amdhsa_ieee_mode");
+ }
+ if (IVersion.Major >= 9) {
+ PrintField(KD.compute_pgm_rsrc1,
+ amdhsa::COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL_SHIFT,
+ amdhsa::COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL,
+ ".amdhsa_fp16_overflow");
}
- if (IVersion.Major >= 9)
- PRINT_FIELD(OS, ".amdhsa_fp16_overflow", KD,
- compute_pgm_rsrc1,
- amdhsa::COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL);
if (AMDGPU::isGFX90A(STI))
- PRINT_FIELD(OS, ".amdhsa_tg_split", KD,
- compute_pgm_rsrc3,
- amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT);
+ PrintField(KD.compute_pgm_rsrc3,
+ amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT_SHIFT,
+ amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, ".amdhsa_tg_split");
if (IVersion.Major >= 10) {
- PRINT_FIELD(OS, ".amdhsa_workgroup_processor_mode", KD,
- compute_pgm_rsrc1,
- amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE);
- PRINT_FIELD(OS, ".amdhsa_memory_ordered", KD,
- compute_pgm_rsrc1,
- amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED);
- PRINT_FIELD(OS, ".amdhsa_forward_progress", KD,
- compute_pgm_rsrc1,
- amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS);
+ PrintField(KD.compute_pgm_rsrc1,
+ amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE_SHIFT,
+ amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE,
+ ".amdhsa_workgroup_processor_mode");
+ PrintField(KD.compute_pgm_rsrc1,
+ amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED_SHIFT,
+ amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED,
+ ".amdhsa_memory_ordered");
+ PrintField(KD.compute_pgm_rsrc1,
+ amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS_SHIFT,
+ amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS,
+ ".amdhsa_forward_progress");
}
if (IVersion.Major >= 10 && IVersion.Major < 12) {
- PRINT_FIELD(OS, ".amdhsa_shared_vgpr_count", KD, compute_pgm_rsrc3,
- amdhsa::COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT);
+ PrintField(KD.compute_pgm_rsrc3,
+ amdhsa::COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT_SHIFT,
+ amdhsa::COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT,
+ ".amdhsa_shared_vgpr_count");
}
- if (IVersion.Major >= 12)
- PRINT_FIELD(OS, ".amdhsa_round_robin_scheduling", KD, compute_pgm_rsrc1,
- amdhsa::COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN);
- PRINT_FIELD(
- OS, ".amdhsa_exception_fp_ieee_invalid_op", KD,
- compute_pgm_rsrc2,
- amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION);
- PRINT_FIELD(OS, ".amdhsa_exception_fp_denorm_src", KD,
- compute_pgm_rsrc2,
- amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE);
- PRINT_FIELD(
- OS, ".amdhsa_exception_fp_ieee_div_zero", KD,
- compute_pgm_rsrc2,
- amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO);
- PRINT_FIELD(OS, ".amdhsa_exception_fp_ieee_overflow", KD,
- compute_pgm_rsrc2,
- amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW);
- PRINT_FIELD(OS, ".amdhsa_exception_fp_ieee_underflow", KD,
- compute_pgm_rsrc2,
- amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW);
- PRINT_FIELD(OS, ".amdhsa_exception_fp_ieee_inexact", KD,
- compute_pgm_rsrc2,
- amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT);
- PRINT_FIELD(OS, ".amdhsa_exception_int_div_zero", KD,
- compute_pgm_rsrc2,
- amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO);
-#undef PRINT_FIELD
+ if (IVersion.Major >= 12) {
+ PrintField(KD.compute_pgm_rsrc1,
+ amdhsa::COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN_SHIFT,
+ amdhsa::COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN,
+ ".amdhsa_round_robin_scheduling");
+ }
+ PrintField(
+ KD.compute_pgm_rsrc2,
+ amdhsa::
+ COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION_SHIFT,
+ amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION,
+ ".amdhsa_exception_fp_ieee_invalid_op");
+ PrintField(
+ KD.compute_pgm_rsrc2,
+ amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE_SHIFT,
+ amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
+ ".amdhsa_exception_fp_denorm_src");
+ PrintField(
+ KD.compute_pgm_rsrc2,
+ amdhsa::
+ COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO_SHIFT,
+ amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO,
+ ".amdhsa_exception_fp_ieee_div_zero");
+ PrintField(
+ KD.compute_pgm_rsrc2,
+ amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW_SHIFT,
+ amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
+ ".amdhsa_exception_fp_ieee_overflow");
+ PrintField(
+ KD.compute_pgm_rsrc2,
+ amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW_SHIFT,
+ amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
+ ".amdhsa_exception_fp_ieee_underflow");
+ PrintField(
+ KD.compute_pgm_rsrc2,
+ amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT_SHIFT,
+ amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
+ ".amdhsa_exception_fp_ieee_inexact");
+ PrintField(
+ KD.compute_pgm_rsrc2,
+ amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO_SHIFT,
+ amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
+ ".amdhsa_exception_int_div_zero");
OS << "\t.end_amdhsa_kernel\n";
}
@@ -835,7 +915,7 @@ bool AMDGPUTargetELFStreamer::EmitCodeEnd(const MCSubtargetInfo &STI) {
void AMDGPUTargetELFStreamer::EmitAmdhsaKernelDescriptor(
const MCSubtargetInfo &STI, StringRef KernelName,
- const amdhsa::kernel_descriptor_t &KernelDescriptor, uint64_t NextVGPR,
+ const MCKernelDescriptor &KernelDescriptor, uint64_t NextVGPR,
uint64_t NextSGPR, bool ReserveVCC, bool ReserveFlatScr) {
auto &Streamer = getStreamer();
auto &Context = Streamer.getContext();
@@ -853,7 +933,7 @@ void AMDGPUTargetELFStreamer::EmitAmdhsaKernelDescriptor(
// Kernel descriptor symbol's type and size are fixed.
KernelDescriptorSymbol->setType(ELF::STT_OBJECT);
KernelDescriptorSymbol->setSize(
- MCConstantExpr::create(sizeof(KernelDescriptor), Context));
+ MCConstantExpr::create(sizeof(amdhsa::kernel_descriptor_t), Context));
// The visibility of the kernel code symbol must be protected or less to allow
// static relocations from the kernel descriptor to be used.
@@ -861,31 +941,43 @@ void AMDGPUTargetELFStreamer::EmitAmdhsaKernelDescriptor(
KernelCodeSymbol->setVisibility(ELF::STV_PROTECTED);
Streamer.emitLabel(KernelDescriptorSymbol);
- Streamer.emitInt32(KernelDescriptor.group_segment_fixed_size);
- Streamer.emitInt32(KernelDescriptor.private_segment_fixed_size);
- Streamer.emitInt32(KernelDescriptor.kernarg_size);
-
- for (uint8_t Res : KernelDescriptor.reserved0)
- Streamer.emitInt8(Res);
+ Streamer.emitValue(
+ KernelDescriptor.group_segment_fixed_size,
+ sizeof(amdhsa::kernel_descriptor_t::group_segment_fixed_size));
+ Streamer.emitValue(
+ KernelDescriptor.private_segment_fixed_size,
+ sizeof(amdhsa::kernel_descriptor_t::private_segment_fixed_size));
+ Streamer.emitValue(KernelDescriptor.kernarg_size,
+ sizeof(amdhsa::kernel_descriptor_t::kernarg_size));
+
+ for (uint32_t i = 0; i < sizeof(amdhsa::kernel_descriptor_t::reserved0); ++i)
+ Streamer.emitInt8(0u);
// FIXME: Remove the use of VK_AMDGPU_REL64 in the expression below. The
// expression being created is:
// (start of kernel code) - (start of kernel descriptor)
// It implies R_AMDGPU_REL64, but ends up being R_AMDGPU_ABS64.
- Streamer.emitValue(MCBinaryExpr::createSub(
- MCSymbolRefExpr::create(
- KernelCodeSymbol, MCSymbolRefExpr::VK_AMDGPU_REL64, Context),
- MCSymbolRefExpr::create(
- KernelDescriptorSymbol, MCSymbolRefExpr::VK_None, Context),
- Context),
- sizeof(KernelDescriptor.kernel_code_entry_byte_offset));
- for (uint8_t Res : KernelDescriptor.reserved1)
- Streamer.emitInt8(Res);
- Streamer.emitInt32(KernelDescriptor.compute_pgm_rsrc3);
- Streamer.emitInt32(KernelDescriptor.compute_pgm_rsrc1);
- Streamer.emitInt32(KernelDescriptor.compute_pgm_rsrc2);
- Streamer.emitInt16(KernelDescriptor.kernel_code_properties);
- Streamer.emitInt16(KernelDescriptor.kernarg_preload);
- for (uint8_t Res : KernelDescriptor.reserved3)
- Streamer.emitInt8(Res);
+ Streamer.emitValue(
+ MCBinaryExpr::createSub(
+ MCSymbolRefExpr::create(KernelCodeSymbol,
+ MCSymbolRefExpr::VK_AMDGPU_REL64, Context),
+ MCSymbolRefExpr::create(KernelDescriptorSymbol,
+ MCSymbolRefExpr::VK_None, Context),
+ Context),
+ sizeof(amdhsa::kernel_descriptor_t::kernel_code_entry_byte_offset));
+ for (uint32_t i = 0; i < sizeof(amdhsa::kernel_descriptor_t::reserved1); ++i)
+ Streamer.emitInt8(0u);
+ Streamer.emitValue(KernelDescriptor.compute_pgm_rsrc3,
+ sizeof(amdhsa::kernel_descriptor_t::compute_pgm_rsrc3));
+ Streamer.emitValue(KernelDescriptor.compute_pgm_rsrc1,
+ sizeof(amdhsa::kernel_descriptor_t::compute_pgm_rsrc1));
+ Streamer.emitValue(KernelDescriptor.compute_pgm_rsrc2,
+ sizeof(amdhsa::kernel_descriptor_t::compute_pgm_rsrc2));
+ Streamer.emitValue(
+ KernelDescriptor.kernel_code_properties,
+ sizeof(amdhsa::kernel_descriptor_t::kernel_code_properties));
+ Streamer.emitValue(KernelDescriptor.kernarg_preload,
+ sizeof(amdhsa::kernel_descriptor_t::kernarg_preload));
+ for (uint32_t i = 0; i < sizeof(amdhsa::kernel_descriptor_t::reserved3); ++i)
+ Streamer.emitInt8(0u);
}
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h
index 5aa80ff578c6..706897a5dc1f 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h
@@ -22,15 +22,13 @@ class MCSymbol;
class formatted_raw_ostream;
namespace AMDGPU {
+
+struct MCKernelDescriptor;
namespace HSAMD {
struct Metadata;
}
} // namespace AMDGPU
-namespace amdhsa {
-struct kernel_descriptor_t;
-}
-
class AMDGPUTargetStreamer : public MCTargetStreamer {
AMDGPUPALMetadata PALMetadata;
@@ -94,10 +92,11 @@ public:
return true;
}
- virtual void EmitAmdhsaKernelDescriptor(
- const MCSubtargetInfo &STI, StringRef KernelName,
- const amdhsa::kernel_descriptor_t &KernelDescriptor, uint64_t NextVGPR,
- uint64_t NextSGPR, bool ReserveVCC, bool ReserveFlatScr) {}
+ virtual void
+ EmitAmdhsaKernelDescriptor(const MCSubtargetInfo &STI, StringRef KernelName,
+ const AMDGPU::MCKernelDescriptor &KernelDescriptor,
+ uint64_t NextVGPR, uint64_t NextSGPR,
+ bool ReserveVCC, bool ReserveFlatScr) {}
static StringRef getArchNameFromElfMach(unsigned ElfMach);
static unsigned getElfMach(StringRef GPU);
@@ -150,10 +149,11 @@ public:
bool EmitKernargPreloadHeader(const MCSubtargetInfo &STI,
bool TrapEnabled) override;
- void EmitAmdhsaKernelDescriptor(
- const MCSubtargetInfo &STI, StringRef KernelName,
- const amdhsa::kernel_descriptor_t &KernelDescriptor, uint64_t NextVGPR,
- uint64_t NextSGPR, bool ReserveVCC, bool ReserveFlatScr) override;
+ void
+ EmitAmdhsaKernelDescriptor(const MCSubtargetInfo &STI, StringRef KernelName,
+ const AMDGPU::MCKernelDescriptor &KernelDescriptor,
+ uint64_t NextVGPR, uint64_t NextSGPR,
+ bool ReserveVCC, bool ReserveFlatScr) override;
};
class AMDGPUTargetELFStreamer final : public AMDGPUTargetStreamer {
@@ -205,10 +205,11 @@ public:
bool EmitKernargPreloadHeader(const MCSubtargetInfo &STI,
bool TrapEnabled) override;
- void EmitAmdhsaKernelDescriptor(
- const MCSubtargetInfo &STI, StringRef KernelName,
- const amdhsa::kernel_descriptor_t &KernelDescriptor, uint64_t NextVGPR,
- uint64_t NextSGPR, bool ReserveVCC, bool ReserveFlatScr) override;
+ void
+ EmitAmdhsaKernelDescriptor(const MCSubtargetInfo &STI, StringRef KernelName,
+ const AMDGPU::MCKernelDescriptor &KernelDescriptor,
+ uint64_t NextVGPR, uint64_t NextSGPR,
+ bool ReserveVCC, bool ReserveFlatScr) override;
};
}
#endif
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/CMakeLists.txt b/llvm/lib/Target/AMDGPU/MCTargetDesc/CMakeLists.txt
index 0842a58f794b..14a02b6d8e36 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/CMakeLists.txt
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/CMakeLists.txt
@@ -8,6 +8,7 @@ add_llvm_component_library(LLVMAMDGPUDesc
AMDGPUMCExpr.cpp
AMDGPUMCTargetDesc.cpp
AMDGPUTargetStreamer.cpp
+ AMDGPUMCKernelDescriptor.cpp
R600InstPrinter.cpp
R600MCCodeEmitter.cpp
R600MCTargetDesc.cpp
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index 7bb84d78442b..5d44396b07b6 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -1221,47 +1221,6 @@ void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
}
}
-amdhsa::kernel_descriptor_t getDefaultAmdhsaKernelDescriptor(
- const MCSubtargetInfo *STI) {
- IsaVersion Version = getIsaVersion(STI->getCPU());
-
- amdhsa::kernel_descriptor_t KD;
- memset(&KD, 0, sizeof(KD));
-
- AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
- amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64,
- amdhsa::FLOAT_DENORM_MODE_FLUSH_NONE);
- if (Version.Major >= 12) {
- AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
- amdhsa::COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN, 0);
- AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
- amdhsa::COMPUTE_PGM_RSRC1_GFX12_PLUS_DISABLE_PERF, 0);
- } else {
- AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
- amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP, 1);
- AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
- amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE, 1);
- }
- AMDHSA_BITS_SET(KD.compute_pgm_rsrc2,
- amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, 1);
- if (Version.Major >= 10) {
- AMDHSA_BITS_SET(KD.kernel_code_properties,
- amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
- STI->getFeatureBits().test(FeatureWavefrontSize32) ? 1 : 0);
- AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
- amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE,
- STI->getFeatureBits().test(FeatureCuMode) ? 0 : 1);
- AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
- amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED, 1);
- }
- if (AMDGPU::isGFX90A(*STI)) {
- AMDHSA_BITS_SET(KD.compute_pgm_rsrc3,
- amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT,
- STI->getFeatureBits().test(FeatureTgSplit) ? 1 : 0);
- }
- return KD;
-}
-
bool isGroupSegment(const GlobalValue *GV) {
return GV->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
}
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index f4f9a787100b..943588fe701c 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -34,10 +34,6 @@ class StringRef;
class Triple;
class raw_ostream;
-namespace amdhsa {
-struct kernel_descriptor_t;
-}
-
namespace AMDGPU {
struct IsaVersion;
@@ -855,9 +851,6 @@ unsigned mapWMMA3AddrTo2AddrOpcode(unsigned Opc);
void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
const MCSubtargetInfo *STI);
-amdhsa::kernel_descriptor_t getDefaultAmdhsaKernelDescriptor(
- const MCSubtargetInfo *STI);
-
bool isGroupSegment(const GlobalValue *GV);
bool isGlobalSegment(const GlobalValue *GV);
bool isReadOnlySegment(const GlobalValue *GV);
diff --git a/llvm/lib/Target/ARM/MVETPAndVPTOptimisationsPass.cpp b/llvm/lib/Target/ARM/MVETPAndVPTOptimisationsPass.cpp
index 5c113ccfdc15..e8d2cba7ee55 100644
--- a/llvm/lib/Target/ARM/MVETPAndVPTOptimisationsPass.cpp
+++ b/llvm/lib/Target/ARM/MVETPAndVPTOptimisationsPass.cpp
@@ -958,6 +958,7 @@ bool MVETPAndVPTOptimisations::ReplaceConstByVPNOTs(MachineBasicBlock &MBB,
unsigned NotImm = ~Imm & 0xffff;
if (LastVPTReg != 0 && LastVPTReg != VPR && LastVPTImm == Imm) {
+ MRI->clearKillFlags(LastVPTReg);
Instr.getOperand(PIdx + 1).setReg(LastVPTReg);
if (MRI->use_empty(VPR)) {
DeadInstructions.insert(Copy);
diff --git a/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp b/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp
index 0f4ece64bff5..047c6731333c 100644
--- a/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp
+++ b/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp
@@ -612,11 +612,11 @@ bool Thumb1FrameLowering::needPopSpecialFixUp(const MachineFunction &MF) const {
static void findTemporariesForLR(const BitVector &GPRsNoLRSP,
const BitVector &PopFriendly,
- const LivePhysRegs &UsedRegs, unsigned &PopReg,
+ const LiveRegUnits &UsedRegs, unsigned &PopReg,
unsigned &TmpReg, MachineRegisterInfo &MRI) {
PopReg = TmpReg = 0;
for (auto Reg : GPRsNoLRSP.set_bits()) {
- if (UsedRegs.available(MRI, Reg)) {
+ if (UsedRegs.available(Reg)) {
// Remember the first pop-friendly register and exit.
if (PopFriendly.test(Reg)) {
PopReg = Reg;
@@ -684,7 +684,7 @@ bool Thumb1FrameLowering::emitPopSpecialFixUp(MachineBasicBlock &MBB,
// Look for a temporary register to use.
// First, compute the liveness information.
const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
- LivePhysRegs UsedRegs(TRI);
+ LiveRegUnits UsedRegs(TRI);
UsedRegs.addLiveOuts(MBB);
// The semantic of pristines changed recently and now,
// the callee-saved registers that are touched in the function
@@ -710,11 +710,6 @@ bool Thumb1FrameLowering::emitPopSpecialFixUp(MachineBasicBlock &MBB,
unsigned TemporaryReg = 0;
BitVector PopFriendly =
TRI.getAllocatableSet(MF, TRI.getRegClass(ARM::tGPRRegClassID));
- // R7 may be used as a frame pointer, hence marked as not generally
- // allocatable, however there's no reason to not use it as a temporary for
- // restoring LR.
- if (STI.getFramePointerReg() == ARM::R7)
- PopFriendly.set(ARM::R7);
assert(PopFriendly.any() && "No allocatable pop-friendly register?!");
// Rebuild the GPRs from the high registers because they are removed
diff --git a/llvm/lib/Target/RISCV/RISCVCallingConv.td b/llvm/lib/Target/RISCV/RISCVCallingConv.td
index 11b716f20f37..ad06f4774377 100644
--- a/llvm/lib/Target/RISCV/RISCVCallingConv.td
+++ b/llvm/lib/Target/RISCV/RISCVCallingConv.td
@@ -26,6 +26,19 @@ def CSR_ILP32D_LP64D
: CalleeSavedRegs<(add CSR_ILP32_LP64,
F8_D, F9_D, (sequence "F%u_D", 18, 27))>;
+defvar CSR_V = (add (sequence "V%u", 1, 7), (sequence "V%u", 24, 31),
+ V2M2, V4M2, V6M2, V24M2, V26M2, V28M2, V30M2,
+ V4M4, V24M4, V28M4, V24M8);
+
+def CSR_ILP32_LP64_V
+ : CalleeSavedRegs<(add CSR_ILP32_LP64, CSR_V)>;
+
+def CSR_ILP32F_LP64F_V
+ : CalleeSavedRegs<(add CSR_ILP32F_LP64F, CSR_V)>;
+
+def CSR_ILP32D_LP64D_V
+ : CalleeSavedRegs<(add CSR_ILP32D_LP64D, CSR_V)>;
+
// Needed for implementation of RISCVRegisterInfo::getNoPreservedMask()
def CSR_NoRegs : CalleeSavedRegs<(add)>;
diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
index 39f2b3f62a9a..39075c81b292 100644
--- a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
@@ -388,6 +388,21 @@ getUnmanagedCSI(const MachineFunction &MF,
return NonLibcallCSI;
}
+static SmallVector<CalleeSavedInfo, 8>
+getRVVCalleeSavedInfo(const MachineFunction &MF,
+ const std::vector<CalleeSavedInfo> &CSI) {
+ const MachineFrameInfo &MFI = MF.getFrameInfo();
+ SmallVector<CalleeSavedInfo, 8> RVVCSI;
+
+ for (auto &CS : CSI) {
+ int FI = CS.getFrameIdx();
+ if (FI >= 0 && MFI.getStackID(FI) == TargetStackID::ScalableVector)
+ RVVCSI.push_back(CS);
+ }
+
+ return RVVCSI;
+}
+
void RISCVFrameLowering::adjustStackForRVV(MachineFunction &MF,
MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
@@ -590,6 +605,10 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF,
// directives.
for (const auto &Entry : CSI) {
int FrameIdx = Entry.getFrameIdx();
+ if (FrameIdx >= 0 &&
+ MFI.getStackID(FrameIdx) == TargetStackID::ScalableVector)
+ continue;
+
int64_t Offset = MFI.getObjectOffset(FrameIdx);
Register Reg = Entry.getReg();
unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
@@ -726,7 +745,7 @@ void RISCVFrameLowering::emitEpilogue(MachineFunction &MF,
const auto &CSI = getUnmanagedCSI(MF, MFI.getCalleeSavedInfo());
- // Skip to before the restores of callee-saved registers
+ // Skip to before the restores of scalar callee-saved registers
// FIXME: assumes exactly one instruction is used to restore each
// callee-saved register.
auto LastFrameDestroy = MBBI;
@@ -1029,15 +1048,24 @@ RISCVFrameLowering::assignRVVStackObjectOffsets(MachineFunction &MF) const {
MachineFrameInfo &MFI = MF.getFrameInfo();
// Create a buffer of RVV objects to allocate.
SmallVector<int, 8> ObjectsToAllocate;
- for (int I = 0, E = MFI.getObjectIndexEnd(); I != E; ++I) {
- unsigned StackID = MFI.getStackID(I);
- if (StackID != TargetStackID::ScalableVector)
- continue;
- if (MFI.isDeadObjectIndex(I))
- continue;
+ auto pushRVVObjects = [&](int FIBegin, int FIEnd) {
+ for (int I = FIBegin, E = FIEnd; I != E; ++I) {
+ unsigned StackID = MFI.getStackID(I);
+ if (StackID != TargetStackID::ScalableVector)
+ continue;
+ if (MFI.isDeadObjectIndex(I))
+ continue;
- ObjectsToAllocate.push_back(I);
- }
+ ObjectsToAllocate.push_back(I);
+ }
+ };
+ // First push RVV Callee Saved object, then push RVV stack object
+ std::vector<CalleeSavedInfo> &CSI = MF.getFrameInfo().getCalleeSavedInfo();
+ const auto &RVVCSI = getRVVCalleeSavedInfo(MF, CSI);
+ if (!RVVCSI.empty())
+ pushRVVObjects(RVVCSI[0].getFrameIdx(),
+ RVVCSI[RVVCSI.size() - 1].getFrameIdx() + 1);
+ pushRVVObjects(0, MFI.getObjectIndexEnd() - RVVCSI.size());
// The minimum alignment is 16 bytes.
Align RVVStackAlign(16);
@@ -1487,13 +1515,19 @@ bool RISCVFrameLowering::spillCalleeSavedRegisters(
// Manually spill values not spilled by libcall & Push/Pop.
const auto &UnmanagedCSI = getUnmanagedCSI(*MF, CSI);
- for (auto &CS : UnmanagedCSI) {
- // Insert the spill to the stack frame.
- Register Reg = CS.getReg();
- const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
- TII.storeRegToStackSlot(MBB, MI, Reg, !MBB.isLiveIn(Reg), CS.getFrameIdx(),
- RC, TRI, Register());
- }
+ const auto &RVVCSI = getRVVCalleeSavedInfo(*MF, CSI);
+
+ auto storeRegToStackSlot = [&](decltype(UnmanagedCSI) CSInfo) {
+ for (auto &CS : CSInfo) {
+ // Insert the spill to the stack frame.
+ Register Reg = CS.getReg();
+ const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+ TII.storeRegToStackSlot(MBB, MI, Reg, !MBB.isLiveIn(Reg),
+ CS.getFrameIdx(), RC, TRI, Register());
+ }
+ };
+ storeRegToStackSlot(UnmanagedCSI);
+ storeRegToStackSlot(RVVCSI);
return true;
}
@@ -1511,19 +1545,26 @@ bool RISCVFrameLowering::restoreCalleeSavedRegisters(
DL = MI->getDebugLoc();
// Manually restore values not restored by libcall & Push/Pop.
- // Keep the same order as in the prologue. There is no need to reverse the
- // order in the epilogue. In addition, the return address will be restored
- // first in the epilogue. It increases the opportunity to avoid the
- // load-to-use data hazard between loading RA and return by RA.
- // loadRegFromStackSlot can insert multiple instructions.
+ // Reverse the restore order in epilog. In addition, the return
+ // address will be restored first in the epilogue. It increases
+ // the opportunity to avoid the load-to-use data hazard between
+ // loading RA and return by RA. loadRegFromStackSlot can insert
+ // multiple instructions.
const auto &UnmanagedCSI = getUnmanagedCSI(*MF, CSI);
- for (auto &CS : UnmanagedCSI) {
- Register Reg = CS.getReg();
- const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
- TII.loadRegFromStackSlot(MBB, MI, Reg, CS.getFrameIdx(), RC, TRI,
- Register());
- assert(MI != MBB.begin() && "loadRegFromStackSlot didn't insert any code!");
- }
+ const auto &RVVCSI = getRVVCalleeSavedInfo(*MF, CSI);
+
+ auto loadRegFromStackSlot = [&](decltype(UnmanagedCSI) CSInfo) {
+ for (auto &CS : CSInfo) {
+ Register Reg = CS.getReg();
+ const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+ TII.loadRegFromStackSlot(MBB, MI, Reg, CS.getFrameIdx(), RC, TRI,
+ Register());
+ assert(MI != MBB.begin() &&
+ "loadRegFromStackSlot didn't insert any code!");
+ }
+ };
+ loadRegFromStackSlot(RVVCSI);
+ loadRegFromStackSlot(UnmanagedCSI);
RISCVMachineFunctionInfo *RVFI = MF->getInfo<RISCVMachineFunctionInfo>();
if (RVFI->isPushable(*MF)) {
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index ca78648c6aa9..564fda674317 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -18724,6 +18724,7 @@ SDValue RISCVTargetLowering::LowerFormalArguments(
case CallingConv::Fast:
case CallingConv::SPIR_KERNEL:
case CallingConv::GRAAL:
+ case CallingConv::RISCV_VectorCall:
break;
case CallingConv::GHC:
if (Subtarget.isRVE())
diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
index 74d65324b95d..11c3f2d57eb0 100644
--- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
@@ -71,6 +71,9 @@ RISCVRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
: CSR_Interrupt_SaveList;
}
+ bool HasVectorCSR =
+ MF->getFunction().getCallingConv() == CallingConv::RISCV_VectorCall;
+
switch (Subtarget.getTargetABI()) {
default:
llvm_unreachable("Unrecognized ABI");
@@ -79,12 +82,18 @@ RISCVRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
return CSR_ILP32E_LP64E_SaveList;
case RISCVABI::ABI_ILP32:
case RISCVABI::ABI_LP64:
+ if (HasVectorCSR)
+ return CSR_ILP32_LP64_V_SaveList;
return CSR_ILP32_LP64_SaveList;
case RISCVABI::ABI_ILP32F:
case RISCVABI::ABI_LP64F:
+ if (HasVectorCSR)
+ return CSR_ILP32F_LP64F_V_SaveList;
return CSR_ILP32F_LP64F_SaveList;
case RISCVABI::ABI_ILP32D:
case RISCVABI::ABI_LP64D:
+ if (HasVectorCSR)
+ return CSR_ILP32D_LP64D_V_SaveList;
return CSR_ILP32D_LP64D_SaveList;
}
}
@@ -665,12 +674,18 @@ RISCVRegisterInfo::getCallPreservedMask(const MachineFunction & MF,
return CSR_ILP32E_LP64E_RegMask;
case RISCVABI::ABI_ILP32:
case RISCVABI::ABI_LP64:
+ if (CC == CallingConv::RISCV_VectorCall)
+ return CSR_ILP32_LP64_V_RegMask;
return CSR_ILP32_LP64_RegMask;
case RISCVABI::ABI_ILP32F:
case RISCVABI::ABI_LP64F:
+ if (CC == CallingConv::RISCV_VectorCall)
+ return CSR_ILP32F_LP64F_V_RegMask;
return CSR_ILP32F_LP64F_RegMask;
case RISCVABI::ABI_ILP32D:
case RISCVABI::ABI_LP64D:
+ if (CC == CallingConv::RISCV_VectorCall)
+ return CSR_ILP32D_LP64D_V_RegMask;
return CSR_ILP32D_LP64D_RegMask;
}
}
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index 000d01b8366c..38cdf3c47c64 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -909,23 +909,33 @@ InstructionCost RISCVTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
if (!IsTypeLegal)
return BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I);
+ std::pair<InstructionCost, MVT> DstLT = getTypeLegalizationCost(Dst);
+
int ISD = TLI->InstructionOpcodeToISD(Opcode);
assert(ISD && "Invalid opcode");
- // FIXME: Need to consider vsetvli and lmul.
int PowDiff = (int)Log2_32(Dst->getScalarSizeInBits()) -
(int)Log2_32(Src->getScalarSizeInBits());
switch (ISD) {
case ISD::SIGN_EXTEND:
- case ISD::ZERO_EXTEND:
- if (Src->getScalarSizeInBits() == 1) {
+ case ISD::ZERO_EXTEND: {
+ const unsigned SrcEltSize = Src->getScalarSizeInBits();
+ if (SrcEltSize == 1) {
// We do not use vsext/vzext to extend from mask vector.
// Instead we use the following instructions to extend from mask vector:
// vmv.v.i v8, 0
// vmerge.vim v8, v8, -1, v0
- return 2;
+ return getRISCVInstructionCost({RISCV::VMV_V_I, RISCV::VMERGE_VIM},
+ DstLT.second, CostKind);
}
- return 1;
+ if ((PowDiff < 1) || (PowDiff > 3))
+ return BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I);
+ unsigned SExtOp[] = {RISCV::VSEXT_VF2, RISCV::VSEXT_VF4, RISCV::VSEXT_VF8};
+ unsigned ZExtOp[] = {RISCV::VZEXT_VF2, RISCV::VZEXT_VF4, RISCV::VZEXT_VF8};
+ unsigned Op =
+ (ISD == ISD::SIGN_EXTEND) ? SExtOp[PowDiff - 1] : ZExtOp[PowDiff - 1];
+ return getRISCVInstructionCost(Op, DstLT.second, CostKind);
+ }
case ISD::TRUNCATE:
if (Dst->getScalarSizeInBits() == 1) {
// We do not use several vncvt to truncate to mask vector. So we could
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 9bad38f97c6a..9d98d31b31df 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -43995,6 +43995,50 @@ static SDValue combineBasicSADPattern(SDNode *Extract, SelectionDAG &DAG,
Extract->getOperand(1));
}
+// If this extract is from a loaded vector value and will be used as an
+// integer, that requires a potentially expensive XMM -> GPR transfer.
+// Additionally, if we can convert to a scalar integer load, that will likely
+// be folded into a subsequent integer op.
+// Note: SrcVec might not have a VecVT type, but it must be the same size.
+// Note: Unlike the related fold for this in DAGCombiner, this is not limited
+// to a single-use of the loaded vector. For the reasons above, we
+// expect this to be profitable even if it creates an extra load.
+static SDValue
+combineExtractFromVectorLoad(SDNode *N, EVT VecVT, SDValue SrcVec, uint64_t Idx,
+ const SDLoc &dl, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ assert(N->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
+ "Only EXTRACT_VECTOR_ELT supported so far");
+
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ EVT VT = N->getValueType(0);
+
+ bool LikelyUsedAsVector = any_of(N->uses(), [](SDNode *Use) {
+ return Use->getOpcode() == ISD::STORE ||
+ Use->getOpcode() == ISD::INSERT_VECTOR_ELT ||
+ Use->getOpcode() == ISD::SCALAR_TO_VECTOR;
+ });
+
+ auto *LoadVec = dyn_cast<LoadSDNode>(SrcVec);
+ if (LoadVec && ISD::isNormalLoad(LoadVec) && VT.isInteger() &&
+ VecVT.getVectorElementType() == VT &&
+ VecVT.getSizeInBits() == SrcVec.getValueSizeInBits() &&
+ DCI.isAfterLegalizeDAG() && !LikelyUsedAsVector && LoadVec->isSimple()) {
+ SDValue NewPtr = TLI.getVectorElementPointer(
+ DAG, LoadVec->getBasePtr(), VecVT, DAG.getVectorIdxConstant(Idx, dl));
+ unsigned PtrOff = VT.getSizeInBits() * Idx / 8;
+ MachinePointerInfo MPI = LoadVec->getPointerInfo().getWithOffset(PtrOff);
+ Align Alignment = commonAlignment(LoadVec->getAlign(), PtrOff);
+ SDValue Load =
+ DAG.getLoad(VT, dl, LoadVec->getChain(), NewPtr, MPI, Alignment,
+ LoadVec->getMemOperand()->getFlags(), LoadVec->getAAInfo());
+ DAG.makeEquivalentMemoryOrdering(LoadVec, Load);
+ return Load;
+ }
+
+ return SDValue();
+}
+
// Attempt to peek through a target shuffle and extract the scalar from the
// source.
static SDValue combineExtractWithShuffle(SDNode *N, SelectionDAG &DAG,
@@ -44191,6 +44235,11 @@ static SDValue combineExtractWithShuffle(SDNode *N, SelectionDAG &DAG,
if (SDValue V = GetLegalExtract(SrcOp, ExtractVT, ExtractIdx))
return DAG.getZExtOrTrunc(V, dl, VT);
+ if (N->getOpcode() == ISD::EXTRACT_VECTOR_ELT && ExtractVT == SrcVT)
+ if (SDValue V = combineExtractFromVectorLoad(
+ N, SrcVT, peekThroughBitcasts(SrcOp), ExtractIdx, dl, DAG, DCI))
+ return V;
+
return SDValue();
}
@@ -44600,6 +44649,12 @@ static SDValue combineExtractVectorElt(SDNode *N, SelectionDAG &DAG,
if (SDValue V = scalarizeExtEltFP(N, DAG, Subtarget))
return V;
+ if (CIdx)
+ if (SDValue V = combineExtractFromVectorLoad(
+ N, InputVector.getValueType(), InputVector, CIdx->getZExtValue(),
+ dl, DAG, DCI))
+ return V;
+
// Attempt to extract a i1 element by using MOVMSK to extract the signbits
// and then testing the relevant element.
//
@@ -44645,34 +44700,6 @@ static SDValue combineExtractVectorElt(SDNode *N, SelectionDAG &DAG,
}
}
- // If this extract is from a loaded vector value and will be used as an
- // integer, that requires a potentially expensive XMM -> GPR transfer.
- // Additionally, if we can convert to a scalar integer load, that will likely
- // be folded into a subsequent integer op.
- // Note: Unlike the related fold for this in DAGCombiner, this is not limited
- // to a single-use of the loaded vector. For the reasons above, we
- // expect this to be profitable even if it creates an extra load.
- bool LikelyUsedAsVector = any_of(N->uses(), [](SDNode *Use) {
- return Use->getOpcode() == ISD::STORE ||
- Use->getOpcode() == ISD::INSERT_VECTOR_ELT ||
- Use->getOpcode() == ISD::SCALAR_TO_VECTOR;
- });
- auto *LoadVec = dyn_cast<LoadSDNode>(InputVector);
- if (LoadVec && CIdx && ISD::isNormalLoad(LoadVec) && VT.isInteger() &&
- SrcVT.getVectorElementType() == VT && DCI.isAfterLegalizeDAG() &&
- !LikelyUsedAsVector && LoadVec->isSimple()) {
- SDValue NewPtr =
- TLI.getVectorElementPointer(DAG, LoadVec->getBasePtr(), SrcVT, EltIdx);
- unsigned PtrOff = VT.getSizeInBits() * CIdx->getZExtValue() / 8;
- MachinePointerInfo MPI = LoadVec->getPointerInfo().getWithOffset(PtrOff);
- Align Alignment = commonAlignment(LoadVec->getAlign(), PtrOff);
- SDValue Load =
- DAG.getLoad(VT, dl, LoadVec->getChain(), NewPtr, MPI, Alignment,
- LoadVec->getMemOperand()->getFlags(), LoadVec->getAAInfo());
- DAG.makeEquivalentMemoryOrdering(LoadVec, Load);
- return Load;
- }
-
return SDValue();
}
@@ -48273,7 +48300,7 @@ static SDValue combineAndShuffleNot(SDNode *N, SelectionDAG &DAG,
// We do not split for SSE at all, but we need to split vectors for AVX1 and
// AVX2.
- if (!Subtarget.useAVX512Regs() && VT.is512BitVector() &&
+ if (!Subtarget.useAVX512Regs() && VT.is512BitVector() &&
TLI.isTypeLegal(VT.getHalfNumVectorElementsVT(*DAG.getContext()))) {
SDValue LoX, HiX;
std::tie(LoX, HiX) = splitVector(X, DAG, DL);
diff --git a/llvm/lib/TargetParser/AArch64TargetParser.cpp b/llvm/lib/TargetParser/AArch64TargetParser.cpp
index e36832f563ee..71099462d5ec 100644
--- a/llvm/lib/TargetParser/AArch64TargetParser.cpp
+++ b/llvm/lib/TargetParser/AArch64TargetParser.cpp
@@ -186,11 +186,6 @@ void AArch64::ExtensionSet::enable(ArchExtKind E) {
// Special cases for dependencies which vary depending on the base
// architecture version.
if (BaseArch) {
- // +sve implies +f32mm if the base architecture is v8.6A+ or v9.1A+
- // It isn't the case in general that sve implies both f64mm and f32mm
- if (E == AEK_SVE && BaseArch->is_superset(ARMV8_6A))
- enable(AEK_F32MM);
-
// +fp16 implies +fp16fml for v8.4A+, but not v9.0-A+
if (E == AEK_FP16 && BaseArch->is_superset(ARMV8_4A) &&
!BaseArch->is_superset(ARMV9A))
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
index af238a43b11a..8c698e52b5a0 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@@ -611,6 +611,18 @@ Instruction *InstCombinerImpl::foldPowiReassoc(BinaryOperator &I) {
Y->getType() == Z->getType())
return createPowiExpr(I, *this, X, Y, Z);
+ // powi(X, Y) / X --> powi(X, Y-1)
+ // This is legal when (Y - 1) can't wraparound, in which case reassoc and nnan
+ // are required.
+ // TODO: Multi-use may be also better off creating Powi(x,y-1)
+ if (I.hasAllowReassoc() && I.hasNoNaNs() &&
+ match(Op0, m_OneUse(m_AllowReassoc(m_Intrinsic<Intrinsic::powi>(
+ m_Specific(Op1), m_Value(Y))))) &&
+ willNotOverflowSignedSub(Y, ConstantInt::get(Y->getType(), 1), I)) {
+ Constant *NegOne = ConstantInt::getAllOnesValue(Y->getType());
+ return createPowiExpr(I, *this, Op1, Y, NegOne);
+ }
+
return nullptr;
}
@@ -1904,20 +1916,8 @@ Instruction *InstCombinerImpl::visitFDiv(BinaryOperator &I) {
return replaceInstUsesWith(I, Pow);
}
- // powi(X, Y) / X --> powi(X, Y-1)
- // This is legal when (Y - 1) can't wraparound, in which case reassoc and nnan
- // are required.
- // TODO: Multi-use may be also better off creating Powi(x,y-1)
- if (I.hasAllowReassoc() && I.hasNoNaNs() &&
- match(Op0, m_OneUse(m_Intrinsic<Intrinsic::powi>(m_Specific(Op1),
- m_Value(Y)))) &&
- willNotOverflowSignedSub(Y, ConstantInt::get(Y->getType(), 1), I)) {
- Constant *NegOne = ConstantInt::getAllOnesValue(Y->getType());
- Value *Y1 = Builder.CreateAdd(Y, NegOne);
- Type *Types[] = {Op1->getType(), Y1->getType()};
- Value *Pow = Builder.CreateIntrinsic(Intrinsic::powi, Types, {Op1, Y1}, &I);
- return replaceInstUsesWith(I, Pow);
- }
+ if (Instruction *FoldedPowi = foldPowiReassoc(I))
+ return FoldedPowi;
return nullptr;
}
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index fbf1cb6a976f..e1f26b922dbe 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -11926,7 +11926,8 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) {
Builder.SetCurrentDebugLocation(PH->getDebugLoc());
Value *Vec = vectorizeOperand(E, I, /*PostponedPHIs=*/true);
if (VecTy != Vec->getType()) {
- assert((getOperandEntry(E, I)->State == TreeEntry::NeedToGather ||
+ assert((It != MinBWs.end() ||
+ getOperandEntry(E, I)->State == TreeEntry::NeedToGather ||
MinBWs.contains(getOperandEntry(E, I))) &&
"Expected item in MinBWs.");
Vec = Builder.CreateIntCast(Vec, VecTy, GetOperandSignedness(I));
diff --git a/llvm/test/Analysis/CostModel/RISCV/cast.ll b/llvm/test/Analysis/CostModel/RISCV/cast.ll
index bd26c19c2f2c..14da9a3f79d7 100644
--- a/llvm/test/Analysis/CostModel/RISCV/cast.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/cast.ll
@@ -16,74 +16,74 @@ define void @sext() {
; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i1_v2i64 = sext <2 x i1> undef to <2 x i64>
; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8_v4i16 = sext <4 x i8> undef to <4 x i16>
; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8_v4i32 = sext <4 x i8> undef to <4 x i32>
-; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8_v4i64 = sext <4 x i8> undef to <4 x i64>
+; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i8_v4i64 = sext <4 x i8> undef to <4 x i64>
; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i16_v4i32 = sext <4 x i16> undef to <4 x i32>
-; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i16_v4i64 = sext <4 x i16> undef to <4 x i64>
-; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_v4i64 = sext <4 x i32> undef to <4 x i64>
+; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i16_v4i64 = sext <4 x i16> undef to <4 x i64>
+; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i32_v4i64 = sext <4 x i32> undef to <4 x i64>
; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i1_v4i8 = sext <4 x i1> undef to <4 x i8>
; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i1_v4i16 = sext <4 x i1> undef to <4 x i16>
; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i1_v4i32 = sext <4 x i1> undef to <4 x i32>
-; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i1_v4i64 = sext <4 x i1> undef to <4 x i64>
+; RV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i1_v4i64 = sext <4 x i1> undef to <4 x i64>
; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8_v8i16 = sext <8 x i8> undef to <8 x i16>
-; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8_v8i32 = sext <8 x i8> undef to <8 x i32>
-; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8_v8i64 = sext <8 x i8> undef to <8 x i64>
-; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_v8i32 = sext <8 x i16> undef to <8 x i32>
-; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_v8i64 = sext <8 x i16> undef to <8 x i64>
-; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_v8i64 = sext <8 x i32> undef to <8 x i64>
+; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i8_v8i32 = sext <8 x i8> undef to <8 x i32>
+; RV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8i8_v8i64 = sext <8 x i8> undef to <8 x i64>
+; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i16_v8i32 = sext <8 x i16> undef to <8 x i32>
+; RV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8i16_v8i64 = sext <8 x i16> undef to <8 x i64>
+; RV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8i32_v8i64 = sext <8 x i32> undef to <8 x i64>
; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i1_v8i8 = sext <8 x i1> undef to <8 x i8>
; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i1_v8i16 = sext <8 x i1> undef to <8 x i16>
-; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i1_v8i32 = sext <8 x i1> undef to <8 x i32>
-; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i1_v8i64 = sext <8 x i1> undef to <8 x i64>
-; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_v16i16 = sext <16 x i8> undef to <16 x i16>
-; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_v16i32 = sext <16 x i8> undef to <16 x i32>
-; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_v16i64 = sext <16 x i8> undef to <16 x i64>
-; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_v16i32 = sext <16 x i16> undef to <16 x i32>
-; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_v16i64 = sext <16 x i16> undef to <16 x i64>
-; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_v16i64 = sext <16 x i32> undef to <16 x i64>
+; RV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8i1_v8i32 = sext <8 x i1> undef to <8 x i32>
+; RV32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8i1_v8i64 = sext <8 x i1> undef to <8 x i64>
+; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i8_v16i16 = sext <16 x i8> undef to <16 x i16>
+; RV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16i8_v16i32 = sext <16 x i8> undef to <16 x i32>
+; RV32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16i8_v16i64 = sext <16 x i8> undef to <16 x i64>
+; RV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16i16_v16i32 = sext <16 x i16> undef to <16 x i32>
+; RV32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16i16_v16i64 = sext <16 x i16> undef to <16 x i64>
+; RV32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16i32_v16i64 = sext <16 x i32> undef to <16 x i64>
; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i1_v16i8 = sext <16 x i1> undef to <16 x i8>
-; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i1_v16i16 = sext <16 x i1> undef to <16 x i16>
-; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i1_v16i32 = sext <16 x i1> undef to <16 x i32>
-; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i1_v16i64 = sext <16 x i1> undef to <16 x i64>
-; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_v32i16 = sext <32 x i8> undef to <32 x i16>
-; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_v32i32 = sext <32 x i8> undef to <32 x i32>
-; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32i8_v32i64 = sext <32 x i8> undef to <32 x i64>
-; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_v32i32 = sext <32 x i16> undef to <32 x i32>
-; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32i16_v32i64 = sext <32 x i16> undef to <32 x i64>
-; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32i32_v32i64 = sext <32 x i32> undef to <32 x i64>
-; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v32i1_v32i8 = sext <32 x i1> undef to <32 x i8>
-; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v32i1_v32i16 = sext <32 x i1> undef to <32 x i16>
-; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v32i1_v32i32 = sext <32 x i1> undef to <32 x i32>
-; RV32-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v32i1_v32i64 = sext <32 x i1> undef to <32 x i64>
-; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_v64i16 = sext <64 x i8> undef to <64 x i16>
-; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v64i8_v64i32 = sext <64 x i8> undef to <64 x i32>
-; RV32-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v64i8_v64i64 = sext <64 x i8> undef to <64 x i64>
-; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v64i16_v64i32 = sext <64 x i16> undef to <64 x i32>
-; RV32-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v64i16_v64i64 = sext <64 x i16> undef to <64 x i64>
-; RV32-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v64i32_v64i64 = sext <64 x i32> undef to <64 x i64>
-; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i1_v64i8 = sext <64 x i1> undef to <64 x i8>
-; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i1_v64i16 = sext <64 x i1> undef to <64 x i16>
-; RV32-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v64i1_v64i32 = sext <64 x i1> undef to <64 x i32>
-; RV32-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v64i1_v64i64 = sext <64 x i1> undef to <64 x i64>
-; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v128i8_v128i16 = sext <128 x i8> undef to <128 x i16>
-; RV32-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v128i8_v128i32 = sext <128 x i8> undef to <128 x i32>
-; RV32-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %v128i8_v128i64 = sext <128 x i8> undef to <128 x i64>
-; RV32-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v128i16_v128i32 = sext <128 x i16> undef to <128 x i32>
-; RV32-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v128i16_v128i64 = sext <128 x i16> undef to <128 x i64>
-; RV32-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v128i32_v128i64 = sext <128 x i32> undef to <128 x i64>
-; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v128i1_v128i8 = sext <128 x i1> undef to <128 x i8>
-; RV32-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v128i1_v128i16 = sext <128 x i1> undef to <128 x i16>
-; RV32-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v128i1_v128i32 = sext <128 x i1> undef to <128 x i32>
-; RV32-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %v128i1_v128i64 = sext <128 x i1> undef to <128 x i64>
-; RV32-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v256i8_v256i16 = sext <256 x i8> undef to <256 x i16>
-; RV32-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v256i8_v256i32 = sext <256 x i8> undef to <256 x i32>
-; RV32-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %v256i8_v256i64 = sext <256 x i8> undef to <256 x i64>
-; RV32-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v256i16_v256i32 = sext <256 x i16> undef to <256 x i32>
-; RV32-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %v256i16_v256i64 = sext <256 x i16> undef to <256 x i64>
-; RV32-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v256i32_v256i64 = sext <256 x i32> undef to <256 x i64>
-; RV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v256i1_v256i8 = sext <256 x i1> undef to <256 x i8>
-; RV32-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v256i1_v256i16 = sext <256 x i1> undef to <256 x i16>
-; RV32-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %v256i1_v256i32 = sext <256 x i1> undef to <256 x i32>
-; RV32-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %v256i1_v256i64 = sext <256 x i1> undef to <256 x i64>
+; RV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16i1_v16i16 = sext <16 x i1> undef to <16 x i16>
+; RV32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16i1_v16i32 = sext <16 x i1> undef to <16 x i32>
+; RV32-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v16i1_v16i64 = sext <16 x i1> undef to <16 x i64>
+; RV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v32i8_v32i16 = sext <32 x i8> undef to <32 x i16>
+; RV32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v32i8_v32i32 = sext <32 x i8> undef to <32 x i32>
+; RV32-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %v32i8_v32i64 = sext <32 x i8> undef to <32 x i64>
+; RV32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v32i16_v32i32 = sext <32 x i16> undef to <32 x i32>
+; RV32-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %v32i16_v32i64 = sext <32 x i16> undef to <32 x i64>
+; RV32-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %v32i32_v32i64 = sext <32 x i32> undef to <32 x i64>
+; RV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v32i1_v32i8 = sext <32 x i1> undef to <32 x i8>
+; RV32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v32i1_v32i16 = sext <32 x i1> undef to <32 x i16>
+; RV32-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v32i1_v32i32 = sext <32 x i1> undef to <32 x i32>
+; RV32-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %v32i1_v32i64 = sext <32 x i1> undef to <32 x i64>
+; RV32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v64i8_v64i16 = sext <64 x i8> undef to <64 x i16>
+; RV32-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %v64i8_v64i32 = sext <64 x i8> undef to <64 x i32>
+; RV32-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %v64i8_v64i64 = sext <64 x i8> undef to <64 x i64>
+; RV32-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %v64i16_v64i32 = sext <64 x i16> undef to <64 x i32>
+; RV32-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %v64i16_v64i64 = sext <64 x i16> undef to <64 x i64>
+; RV32-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %v64i32_v64i64 = sext <64 x i32> undef to <64 x i64>
+; RV32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v64i1_v64i8 = sext <64 x i1> undef to <64 x i8>
+; RV32-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v64i1_v64i16 = sext <64 x i1> undef to <64 x i16>
+; RV32-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %v64i1_v64i32 = sext <64 x i1> undef to <64 x i32>
+; RV32-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %v64i1_v64i64 = sext <64 x i1> undef to <64 x i64>
+; RV32-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %v128i8_v128i16 = sext <128 x i8> undef to <128 x i16>
+; RV32-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %v128i8_v128i32 = sext <128 x i8> undef to <128 x i32>
+; RV32-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %v128i8_v128i64 = sext <128 x i8> undef to <128 x i64>
+; RV32-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %v128i16_v128i32 = sext <128 x i16> undef to <128 x i32>
+; RV32-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %v128i16_v128i64 = sext <128 x i16> undef to <128 x i64>
+; RV32-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %v128i32_v128i64 = sext <128 x i32> undef to <128 x i64>
+; RV32-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v128i1_v128i8 = sext <128 x i1> undef to <128 x i8>
+; RV32-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %v128i1_v128i16 = sext <128 x i1> undef to <128 x i16>
+; RV32-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %v128i1_v128i32 = sext <128 x i1> undef to <128 x i32>
+; RV32-NEXT: Cost Model: Found an estimated cost of 135 for instruction: %v128i1_v128i64 = sext <128 x i1> undef to <128 x i64>
+; RV32-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %v256i8_v256i16 = sext <256 x i8> undef to <256 x i16>
+; RV32-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %v256i8_v256i32 = sext <256 x i8> undef to <256 x i32>
+; RV32-NEXT: Cost Model: Found an estimated cost of 142 for instruction: %v256i8_v256i64 = sext <256 x i8> undef to <256 x i64>
+; RV32-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %v256i16_v256i32 = sext <256 x i16> undef to <256 x i32>
+; RV32-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %v256i16_v256i64 = sext <256 x i16> undef to <256 x i64>
+; RV32-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %v256i32_v256i64 = sext <256 x i32> undef to <256 x i64>
+; RV32-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v256i1_v256i8 = sext <256 x i1> undef to <256 x i8>
+; RV32-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %v256i1_v256i16 = sext <256 x i1> undef to <256 x i16>
+; RV32-NEXT: Cost Model: Found an estimated cost of 134 for instruction: %v256i1_v256i32 = sext <256 x i1> undef to <256 x i32>
+; RV32-NEXT: Cost Model: Found an estimated cost of 270 for instruction: %v256i1_v256i64 = sext <256 x i1> undef to <256 x i64>
; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1i8_nxv1i16 = sext <vscale x 1 x i8> undef to <vscale x 1 x i16>
; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1i8_nxv1i32 = sext <vscale x 1 x i8> undef to <vscale x 1 x i32>
; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1i8_nxv1i64 = sext <vscale x 1 x i8> undef to <vscale x 1 x i64>
@@ -96,73 +96,73 @@ define void @sext() {
; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv1i1_nxv1i64 = sext <vscale x 1 x i1> undef to <vscale x 1 x i64>
; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i8_nxv2i16 = sext <vscale x 2 x i8> undef to <vscale x 2 x i16>
; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i8_nxv2i32 = sext <vscale x 2 x i8> undef to <vscale x 2 x i32>
-; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i8_nxv2i64 = sext <vscale x 2 x i8> undef to <vscale x 2 x i64>
+; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv2i8_nxv2i64 = sext <vscale x 2 x i8> undef to <vscale x 2 x i64>
; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i16_nxv2i32 = sext <vscale x 2 x i16> undef to <vscale x 2 x i32>
-; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i16_nxv2i64 = sext <vscale x 2 x i16> undef to <vscale x 2 x i64>
-; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i32_nxv2i64 = sext <vscale x 2 x i32> undef to <vscale x 2 x i64>
+; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv2i16_nxv2i64 = sext <vscale x 2 x i16> undef to <vscale x 2 x i64>
+; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv2i32_nxv2i64 = sext <vscale x 2 x i32> undef to <vscale x 2 x i64>
; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv2i1_nxv2i8 = sext <vscale x 2 x i1> undef to <vscale x 2 x i8>
; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv2i1_nxv2i16 = sext <vscale x 2 x i1> undef to <vscale x 2 x i16>
; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv2i1_nxv2i32 = sext <vscale x 2 x i1> undef to <vscale x 2 x i32>
-; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv2i1_nxv2i64 = sext <vscale x 2 x i1> undef to <vscale x 2 x i64>
+; RV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv2i1_nxv2i64 = sext <vscale x 2 x i1> undef to <vscale x 2 x i64>
; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i8_nxv4i16 = sext <vscale x 4 x i8> undef to <vscale x 4 x i16>
-; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i8_nxv4i32 = sext <vscale x 4 x i8> undef to <vscale x 4 x i32>
-; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i8_nxv4i64 = sext <vscale x 4 x i8> undef to <vscale x 4 x i64>
-; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i16_nxv4i32 = sext <vscale x 4 x i16> undef to <vscale x 4 x i32>
-; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i16_nxv4i64 = sext <vscale x 4 x i16> undef to <vscale x 4 x i64>
-; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i32_nxv4i64 = sext <vscale x 4 x i32> undef to <vscale x 4 x i64>
+; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv4i8_nxv4i32 = sext <vscale x 4 x i8> undef to <vscale x 4 x i32>
+; RV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv4i8_nxv4i64 = sext <vscale x 4 x i8> undef to <vscale x 4 x i64>
+; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv4i16_nxv4i32 = sext <vscale x 4 x i16> undef to <vscale x 4 x i32>
+; RV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv4i16_nxv4i64 = sext <vscale x 4 x i16> undef to <vscale x 4 x i64>
+; RV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv4i32_nxv4i64 = sext <vscale x 4 x i32> undef to <vscale x 4 x i64>
; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv4i1_nxv4i8 = sext <vscale x 4 x i1> undef to <vscale x 4 x i8>
; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv4i1_nxv4i16 = sext <vscale x 4 x i1> undef to <vscale x 4 x i16>
-; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv4i1_nxv4i32 = sext <vscale x 4 x i1> undef to <vscale x 4 x i32>
-; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv4i1_nxv4i64 = sext <vscale x 4 x i1> undef to <vscale x 4 x i64>
-; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8i8_nxv8i16 = sext <vscale x 8 x i8> undef to <vscale x 8 x i16>
-; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8i8_nxv8i32 = sext <vscale x 8 x i8> undef to <vscale x 8 x i32>
-; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8i8_nxv8i64 = sext <vscale x 8 x i8> undef to <vscale x 8 x i64>
-; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8i16_nxv8i32 = sext <vscale x 8 x i16> undef to <vscale x 8 x i32>
-; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8i16_nxv8i64 = sext <vscale x 8 x i16> undef to <vscale x 8 x i64>
-; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8i32_nxv8i64 = sext <vscale x 8 x i32> undef to <vscale x 8 x i64>
+; RV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv4i1_nxv4i32 = sext <vscale x 4 x i1> undef to <vscale x 4 x i32>
+; RV32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %nxv4i1_nxv4i64 = sext <vscale x 4 x i1> undef to <vscale x 4 x i64>
+; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv8i8_nxv8i16 = sext <vscale x 8 x i8> undef to <vscale x 8 x i16>
+; RV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv8i8_nxv8i32 = sext <vscale x 8 x i8> undef to <vscale x 8 x i32>
+; RV32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %nxv8i8_nxv8i64 = sext <vscale x 8 x i8> undef to <vscale x 8 x i64>
+; RV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv8i16_nxv8i32 = sext <vscale x 8 x i16> undef to <vscale x 8 x i32>
+; RV32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %nxv8i16_nxv8i64 = sext <vscale x 8 x i16> undef to <vscale x 8 x i64>
+; RV32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %nxv8i32_nxv8i64 = sext <vscale x 8 x i32> undef to <vscale x 8 x i64>
; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv8i1_nxv8i8 = sext <vscale x 8 x i1> undef to <vscale x 8 x i8>
-; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv8i1_nxv8i16 = sext <vscale x 8 x i1> undef to <vscale x 8 x i16>
-; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv8i1_nxv8i32 = sext <vscale x 8 x i1> undef to <vscale x 8 x i32>
-; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv8i1_nxv8i64 = sext <vscale x 8 x i1> undef to <vscale x 8 x i64>
-; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv16i8_nxv16i16 = sext <vscale x 16 x i8> undef to <vscale x 16 x i16>
-; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv16i8_nxv16i32 = sext <vscale x 16 x i8> undef to <vscale x 16 x i32>
-; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv16i8_nxv16i64 = sext <vscale x 16 x i8> undef to <vscale x 16 x i64>
-; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv16i16_nxv16i32 = sext <vscale x 16 x i16> undef to <vscale x 16 x i32>
-; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv16i16_nxv16i64 = sext <vscale x 16 x i16> undef to <vscale x 16 x i64>
-; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv16i32_nxv16i64 = sext <vscale x 16 x i32> undef to <vscale x 16 x i64>
-; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv16i1_nxv16i8 = sext <vscale x 16 x i1> undef to <vscale x 16 x i8>
-; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv16i1_nxv16i16 = sext <vscale x 16 x i1> undef to <vscale x 16 x i16>
-; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv16i1_nxv16i32 = sext <vscale x 16 x i1> undef to <vscale x 16 x i32>
-; RV32-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %nxv16i1_nxv16i64 = sext <vscale x 16 x i1> undef to <vscale x 16 x i64>
-; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv32i8_nxv32i16 = sext <vscale x 32 x i8> undef to <vscale x 32 x i16>
-; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv32i8_nxv32i32 = sext <vscale x 32 x i8> undef to <vscale x 32 x i32>
-; RV32-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %nxv32i8_nxv32i64 = sext <vscale x 32 x i8> undef to <vscale x 32 x i64>
-; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv32i16_nxv32i32 = sext <vscale x 32 x i16> undef to <vscale x 32 x i32>
-; RV32-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %nxv32i16_nxv32i64 = sext <vscale x 32 x i16> undef to <vscale x 32 x i64>
-; RV32-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %nxv32i32_nxv32i64 = sext <vscale x 32 x i32> undef to <vscale x 32 x i64>
-; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv32i1_nxv32i8 = sext <vscale x 32 x i1> undef to <vscale x 32 x i8>
-; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv32i1_nxv32i16 = sext <vscale x 32 x i1> undef to <vscale x 32 x i16>
-; RV32-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %nxv32i1_nxv32i32 = sext <vscale x 32 x i1> undef to <vscale x 32 x i32>
-; RV32-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %nxv32i1_nxv32i64 = sext <vscale x 32 x i1> undef to <vscale x 32 x i64>
-; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv64i8_nxv64i16 = sext <vscale x 64 x i8> undef to <vscale x 64 x i16>
-; RV32-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %nxv64i8_nxv64i32 = sext <vscale x 64 x i8> undef to <vscale x 64 x i32>
+; RV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv8i1_nxv8i16 = sext <vscale x 8 x i1> undef to <vscale x 8 x i16>
+; RV32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %nxv8i1_nxv8i32 = sext <vscale x 8 x i1> undef to <vscale x 8 x i32>
+; RV32-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %nxv8i1_nxv8i64 = sext <vscale x 8 x i1> undef to <vscale x 8 x i64>
+; RV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv16i8_nxv16i16 = sext <vscale x 16 x i8> undef to <vscale x 16 x i16>
+; RV32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %nxv16i8_nxv16i32 = sext <vscale x 16 x i8> undef to <vscale x 16 x i32>
+; RV32-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %nxv16i8_nxv16i64 = sext <vscale x 16 x i8> undef to <vscale x 16 x i64>
+; RV32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %nxv16i16_nxv16i32 = sext <vscale x 16 x i16> undef to <vscale x 16 x i32>
+; RV32-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %nxv16i16_nxv16i64 = sext <vscale x 16 x i16> undef to <vscale x 16 x i64>
+; RV32-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %nxv16i32_nxv16i64 = sext <vscale x 16 x i32> undef to <vscale x 16 x i64>
+; RV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv16i1_nxv16i8 = sext <vscale x 16 x i1> undef to <vscale x 16 x i8>
+; RV32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %nxv16i1_nxv16i16 = sext <vscale x 16 x i1> undef to <vscale x 16 x i16>
+; RV32-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %nxv16i1_nxv16i32 = sext <vscale x 16 x i1> undef to <vscale x 16 x i32>
+; RV32-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %nxv16i1_nxv16i64 = sext <vscale x 16 x i1> undef to <vscale x 16 x i64>
+; RV32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %nxv32i8_nxv32i16 = sext <vscale x 32 x i8> undef to <vscale x 32 x i16>
+; RV32-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %nxv32i8_nxv32i32 = sext <vscale x 32 x i8> undef to <vscale x 32 x i32>
+; RV32-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %nxv32i8_nxv32i64 = sext <vscale x 32 x i8> undef to <vscale x 32 x i64>
+; RV32-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %nxv32i16_nxv32i32 = sext <vscale x 32 x i16> undef to <vscale x 32 x i32>
+; RV32-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %nxv32i16_nxv32i64 = sext <vscale x 32 x i16> undef to <vscale x 32 x i64>
+; RV32-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %nxv32i32_nxv32i64 = sext <vscale x 32 x i32> undef to <vscale x 32 x i64>
+; RV32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %nxv32i1_nxv32i8 = sext <vscale x 32 x i1> undef to <vscale x 32 x i8>
+; RV32-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %nxv32i1_nxv32i16 = sext <vscale x 32 x i1> undef to <vscale x 32 x i16>
+; RV32-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %nxv32i1_nxv32i32 = sext <vscale x 32 x i1> undef to <vscale x 32 x i32>
+; RV32-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %nxv32i1_nxv32i64 = sext <vscale x 32 x i1> undef to <vscale x 32 x i64>
+; RV32-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %nxv64i8_nxv64i16 = sext <vscale x 64 x i8> undef to <vscale x 64 x i16>
+; RV32-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %nxv64i8_nxv64i32 = sext <vscale x 64 x i8> undef to <vscale x 64 x i32>
; RV32-NEXT: Cost Model: Invalid cost for instruction: %nxv64i8_nxv64i64 = sext <vscale x 64 x i8> undef to <vscale x 64 x i64>
-; RV32-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %nxv64i16_nxv64i32 = sext <vscale x 64 x i16> undef to <vscale x 64 x i32>
-; RV32-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %nxv64i16_nxv64i64 = sext <vscale x 64 x i16> undef to <vscale x 64 x i64>
-; RV32-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %nxv64i32_nxv64i64 = sext <vscale x 64 x i32> undef to <vscale x 64 x i64>
-; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv64i1_nxv64i8 = sext <vscale x 64 x i1> undef to <vscale x 64 x i8>
-; RV32-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %nxv64i1_nxv64i16 = sext <vscale x 64 x i1> undef to <vscale x 64 x i16>
-; RV32-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %nxv64i1_nxv64i32 = sext <vscale x 64 x i1> undef to <vscale x 64 x i32>
+; RV32-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %nxv64i16_nxv64i32 = sext <vscale x 64 x i16> undef to <vscale x 64 x i32>
+; RV32-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %nxv64i16_nxv64i64 = sext <vscale x 64 x i16> undef to <vscale x 64 x i64>
+; RV32-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %nxv64i32_nxv64i64 = sext <vscale x 64 x i32> undef to <vscale x 64 x i64>
+; RV32-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %nxv64i1_nxv64i8 = sext <vscale x 64 x i1> undef to <vscale x 64 x i8>
+; RV32-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %nxv64i1_nxv64i16 = sext <vscale x 64 x i1> undef to <vscale x 64 x i16>
+; RV32-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %nxv64i1_nxv64i32 = sext <vscale x 64 x i1> undef to <vscale x 64 x i32>
; RV32-NEXT: Cost Model: Invalid cost for instruction: %nxv64i1_nxv64i64 = sext <vscale x 64 x i1> undef to <vscale x 64 x i64>
-; RV32-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %nxv128i8_nxv128i16 = sext <vscale x 128 x i8> undef to <vscale x 128 x i16>
-; RV32-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %nxv128i8_nxv128i32 = sext <vscale x 128 x i8> undef to <vscale x 128 x i32>
+; RV32-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %nxv128i8_nxv128i16 = sext <vscale x 128 x i8> undef to <vscale x 128 x i16>
+; RV32-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %nxv128i8_nxv128i32 = sext <vscale x 128 x i8> undef to <vscale x 128 x i32>
; RV32-NEXT: Cost Model: Invalid cost for instruction: %nxv128i8_nxv128i128 = sext <vscale x 128 x i8> undef to <vscale x 128 x i128>
-; RV32-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %nxv128i16_nxv128i32 = sext <vscale x 128 x i16> undef to <vscale x 128 x i32>
+; RV32-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %nxv128i16_nxv128i32 = sext <vscale x 128 x i16> undef to <vscale x 128 x i32>
; RV32-NEXT: Cost Model: Invalid cost for instruction: %nxv128i16_nxv128i128 = sext <vscale x 128 x i16> undef to <vscale x 128 x i128>
; RV32-NEXT: Cost Model: Invalid cost for instruction: %nxv128i32_nxv128i128 = sext <vscale x 128 x i32> undef to <vscale x 128 x i128>
-; RV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv128i1_nxv128i8 = sext <vscale x 128 x i1> undef to <vscale x 128 x i8>
-; RV32-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %nxv128i1_nxv128i16 = sext <vscale x 128 x i1> undef to <vscale x 128 x i16>
-; RV32-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %nxv128i1_nxv128i32 = sext <vscale x 128 x i1> undef to <vscale x 128 x i32>
+; RV32-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %nxv128i1_nxv128i8 = sext <vscale x 128 x i1> undef to <vscale x 128 x i8>
+; RV32-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %nxv128i1_nxv128i16 = sext <vscale x 128 x i1> undef to <vscale x 128 x i16>
+; RV32-NEXT: Cost Model: Found an estimated cost of 134 for instruction: %nxv128i1_nxv128i32 = sext <vscale x 128 x i1> undef to <vscale x 128 x i32>
; RV32-NEXT: Cost Model: Invalid cost for instruction: %nxv128i1_nxv128i128 = sext <vscale x 128 x i1> undef to <vscale x 128 x i128>
; RV32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
@@ -179,74 +179,74 @@ define void @sext() {
; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i1_v2i64 = sext <2 x i1> undef to <2 x i64>
; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8_v4i16 = sext <4 x i8> undef to <4 x i16>
; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8_v4i32 = sext <4 x i8> undef to <4 x i32>
-; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8_v4i64 = sext <4 x i8> undef to <4 x i64>
+; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i8_v4i64 = sext <4 x i8> undef to <4 x i64>
; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i16_v4i32 = sext <4 x i16> undef to <4 x i32>
-; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i16_v4i64 = sext <4 x i16> undef to <4 x i64>
-; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_v4i64 = sext <4 x i32> undef to <4 x i64>
+; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i16_v4i64 = sext <4 x i16> undef to <4 x i64>
+; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i32_v4i64 = sext <4 x i32> undef to <4 x i64>
; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i1_v4i8 = sext <4 x i1> undef to <4 x i8>
; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i1_v4i16 = sext <4 x i1> undef to <4 x i16>
; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i1_v4i32 = sext <4 x i1> undef to <4 x i32>
-; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i1_v4i64 = sext <4 x i1> undef to <4 x i64>
+; RV64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i1_v4i64 = sext <4 x i1> undef to <4 x i64>
; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8_v8i16 = sext <8 x i8> undef to <8 x i16>
-; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8_v8i32 = sext <8 x i8> undef to <8 x i32>
-; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8_v8i64 = sext <8 x i8> undef to <8 x i64>
-; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_v8i32 = sext <8 x i16> undef to <8 x i32>
-; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_v8i64 = sext <8 x i16> undef to <8 x i64>
-; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_v8i64 = sext <8 x i32> undef to <8 x i64>
+; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i8_v8i32 = sext <8 x i8> undef to <8 x i32>
+; RV64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8i8_v8i64 = sext <8 x i8> undef to <8 x i64>
+; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i16_v8i32 = sext <8 x i16> undef to <8 x i32>
+; RV64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8i16_v8i64 = sext <8 x i16> undef to <8 x i64>
+; RV64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8i32_v8i64 = sext <8 x i32> undef to <8 x i64>
; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i1_v8i8 = sext <8 x i1> undef to <8 x i8>
; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i1_v8i16 = sext <8 x i1> undef to <8 x i16>
-; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i1_v8i32 = sext <8 x i1> undef to <8 x i32>
-; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i1_v8i64 = sext <8 x i1> undef to <8 x i64>
-; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_v16i16 = sext <16 x i8> undef to <16 x i16>
-; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_v16i32 = sext <16 x i8> undef to <16 x i32>
-; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_v16i64 = sext <16 x i8> undef to <16 x i64>
-; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_v16i32 = sext <16 x i16> undef to <16 x i32>
-; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_v16i64 = sext <16 x i16> undef to <16 x i64>
-; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_v16i64 = sext <16 x i32> undef to <16 x i64>
+; RV64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8i1_v8i32 = sext <8 x i1> undef to <8 x i32>
+; RV64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8i1_v8i64 = sext <8 x i1> undef to <8 x i64>
+; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i8_v16i16 = sext <16 x i8> undef to <16 x i16>
+; RV64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16i8_v16i32 = sext <16 x i8> undef to <16 x i32>
+; RV64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16i8_v16i64 = sext <16 x i8> undef to <16 x i64>
+; RV64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16i16_v16i32 = sext <16 x i16> undef to <16 x i32>
+; RV64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16i16_v16i64 = sext <16 x i16> undef to <16 x i64>
+; RV64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16i32_v16i64 = sext <16 x i32> undef to <16 x i64>
; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i1_v16i8 = sext <16 x i1> undef to <16 x i8>
-; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i1_v16i16 = sext <16 x i1> undef to <16 x i16>
-; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i1_v16i32 = sext <16 x i1> undef to <16 x i32>
-; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i1_v16i64 = sext <16 x i1> undef to <16 x i64>
-; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_v32i16 = sext <32 x i8> undef to <32 x i16>
-; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_v32i32 = sext <32 x i8> undef to <32 x i32>
-; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32i8_v32i64 = sext <32 x i8> undef to <32 x i64>
-; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_v32i32 = sext <32 x i16> undef to <32 x i32>
-; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32i16_v32i64 = sext <32 x i16> undef to <32 x i64>
-; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32i32_v32i64 = sext <32 x i32> undef to <32 x i64>
-; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v32i1_v32i8 = sext <32 x i1> undef to <32 x i8>
-; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v32i1_v32i16 = sext <32 x i1> undef to <32 x i16>
-; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v32i1_v32i32 = sext <32 x i1> undef to <32 x i32>
-; RV64-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v32i1_v32i64 = sext <32 x i1> undef to <32 x i64>
-; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_v64i16 = sext <64 x i8> undef to <64 x i16>
-; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v64i8_v64i32 = sext <64 x i8> undef to <64 x i32>
-; RV64-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v64i8_v64i64 = sext <64 x i8> undef to <64 x i64>
-; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v64i16_v64i32 = sext <64 x i16> undef to <64 x i32>
-; RV64-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v64i16_v64i64 = sext <64 x i16> undef to <64 x i64>
-; RV64-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v64i32_v64i64 = sext <64 x i32> undef to <64 x i64>
-; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i1_v64i8 = sext <64 x i1> undef to <64 x i8>
-; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i1_v64i16 = sext <64 x i1> undef to <64 x i16>
-; RV64-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v64i1_v64i32 = sext <64 x i1> undef to <64 x i32>
-; RV64-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v64i1_v64i64 = sext <64 x i1> undef to <64 x i64>
-; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v128i8_v128i16 = sext <128 x i8> undef to <128 x i16>
-; RV64-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v128i8_v128i32 = sext <128 x i8> undef to <128 x i32>
-; RV64-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %v128i8_v128i64 = sext <128 x i8> undef to <128 x i64>
-; RV64-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v128i16_v128i32 = sext <128 x i16> undef to <128 x i32>
-; RV64-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v128i16_v128i64 = sext <128 x i16> undef to <128 x i64>
-; RV64-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v128i32_v128i64 = sext <128 x i32> undef to <128 x i64>
-; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v128i1_v128i8 = sext <128 x i1> undef to <128 x i8>
-; RV64-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v128i1_v128i16 = sext <128 x i1> undef to <128 x i16>
-; RV64-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v128i1_v128i32 = sext <128 x i1> undef to <128 x i32>
-; RV64-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %v128i1_v128i64 = sext <128 x i1> undef to <128 x i64>
-; RV64-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v256i8_v256i16 = sext <256 x i8> undef to <256 x i16>
-; RV64-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v256i8_v256i32 = sext <256 x i8> undef to <256 x i32>
-; RV64-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %v256i8_v256i64 = sext <256 x i8> undef to <256 x i64>
-; RV64-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v256i16_v256i32 = sext <256 x i16> undef to <256 x i32>
-; RV64-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %v256i16_v256i64 = sext <256 x i16> undef to <256 x i64>
-; RV64-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v256i32_v256i64 = sext <256 x i32> undef to <256 x i64>
-; RV64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v256i1_v256i8 = sext <256 x i1> undef to <256 x i8>
-; RV64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v256i1_v256i16 = sext <256 x i1> undef to <256 x i16>
-; RV64-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %v256i1_v256i32 = sext <256 x i1> undef to <256 x i32>
-; RV64-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %v256i1_v256i64 = sext <256 x i1> undef to <256 x i64>
+; RV64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16i1_v16i16 = sext <16 x i1> undef to <16 x i16>
+; RV64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16i1_v16i32 = sext <16 x i1> undef to <16 x i32>
+; RV64-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v16i1_v16i64 = sext <16 x i1> undef to <16 x i64>
+; RV64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v32i8_v32i16 = sext <32 x i8> undef to <32 x i16>
+; RV64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v32i8_v32i32 = sext <32 x i8> undef to <32 x i32>
+; RV64-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %v32i8_v32i64 = sext <32 x i8> undef to <32 x i64>
+; RV64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v32i16_v32i32 = sext <32 x i16> undef to <32 x i32>
+; RV64-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %v32i16_v32i64 = sext <32 x i16> undef to <32 x i64>
+; RV64-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %v32i32_v32i64 = sext <32 x i32> undef to <32 x i64>
+; RV64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v32i1_v32i8 = sext <32 x i1> undef to <32 x i8>
+; RV64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v32i1_v32i16 = sext <32 x i1> undef to <32 x i16>
+; RV64-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v32i1_v32i32 = sext <32 x i1> undef to <32 x i32>
+; RV64-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %v32i1_v32i64 = sext <32 x i1> undef to <32 x i64>
+; RV64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v64i8_v64i16 = sext <64 x i8> undef to <64 x i16>
+; RV64-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %v64i8_v64i32 = sext <64 x i8> undef to <64 x i32>
+; RV64-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %v64i8_v64i64 = sext <64 x i8> undef to <64 x i64>
+; RV64-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %v64i16_v64i32 = sext <64 x i16> undef to <64 x i32>
+; RV64-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %v64i16_v64i64 = sext <64 x i16> undef to <64 x i64>
+; RV64-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %v64i32_v64i64 = sext <64 x i32> undef to <64 x i64>
+; RV64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v64i1_v64i8 = sext <64 x i1> undef to <64 x i8>
+; RV64-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v64i1_v64i16 = sext <64 x i1> undef to <64 x i16>
+; RV64-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %v64i1_v64i32 = sext <64 x i1> undef to <64 x i32>
+; RV64-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %v64i1_v64i64 = sext <64 x i1> undef to <64 x i64>
+; RV64-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %v128i8_v128i16 = sext <128 x i8> undef to <128 x i16>
+; RV64-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %v128i8_v128i32 = sext <128 x i8> undef to <128 x i32>
+; RV64-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %v128i8_v128i64 = sext <128 x i8> undef to <128 x i64>
+; RV64-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %v128i16_v128i32 = sext <128 x i16> undef to <128 x i32>
+; RV64-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %v128i16_v128i64 = sext <128 x i16> undef to <128 x i64>
+; RV64-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %v128i32_v128i64 = sext <128 x i32> undef to <128 x i64>
+; RV64-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v128i1_v128i8 = sext <128 x i1> undef to <128 x i8>
+; RV64-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %v128i1_v128i16 = sext <128 x i1> undef to <128 x i16>
+; RV64-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %v128i1_v128i32 = sext <128 x i1> undef to <128 x i32>
+; RV64-NEXT: Cost Model: Found an estimated cost of 135 for instruction: %v128i1_v128i64 = sext <128 x i1> undef to <128 x i64>
+; RV64-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %v256i8_v256i16 = sext <256 x i8> undef to <256 x i16>
+; RV64-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %v256i8_v256i32 = sext <256 x i8> undef to <256 x i32>
+; RV64-NEXT: Cost Model: Found an estimated cost of 142 for instruction: %v256i8_v256i64 = sext <256 x i8> undef to <256 x i64>
+; RV64-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %v256i16_v256i32 = sext <256 x i16> undef to <256 x i32>
+; RV64-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %v256i16_v256i64 = sext <256 x i16> undef to <256 x i64>
+; RV64-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %v256i32_v256i64 = sext <256 x i32> undef to <256 x i64>
+; RV64-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v256i1_v256i8 = sext <256 x i1> undef to <256 x i8>
+; RV64-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %v256i1_v256i16 = sext <256 x i1> undef to <256 x i16>
+; RV64-NEXT: Cost Model: Found an estimated cost of 134 for instruction: %v256i1_v256i32 = sext <256 x i1> undef to <256 x i32>
+; RV64-NEXT: Cost Model: Found an estimated cost of 270 for instruction: %v256i1_v256i64 = sext <256 x i1> undef to <256 x i64>
; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1i8_nxv1i16 = sext <vscale x 1 x i8> undef to <vscale x 1 x i16>
; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1i8_nxv1i32 = sext <vscale x 1 x i8> undef to <vscale x 1 x i32>
; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1i8_nxv1i64 = sext <vscale x 1 x i8> undef to <vscale x 1 x i64>
@@ -259,73 +259,73 @@ define void @sext() {
; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv1i1_nxv1i64 = sext <vscale x 1 x i1> undef to <vscale x 1 x i64>
; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i8_nxv2i16 = sext <vscale x 2 x i8> undef to <vscale x 2 x i16>
; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i8_nxv2i32 = sext <vscale x 2 x i8> undef to <vscale x 2 x i32>
-; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i8_nxv2i64 = sext <vscale x 2 x i8> undef to <vscale x 2 x i64>
+; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv2i8_nxv2i64 = sext <vscale x 2 x i8> undef to <vscale x 2 x i64>
; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i16_nxv2i32 = sext <vscale x 2 x i16> undef to <vscale x 2 x i32>
-; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i16_nxv2i64 = sext <vscale x 2 x i16> undef to <vscale x 2 x i64>
-; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i32_nxv2i64 = sext <vscale x 2 x i32> undef to <vscale x 2 x i64>
+; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv2i16_nxv2i64 = sext <vscale x 2 x i16> undef to <vscale x 2 x i64>
+; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv2i32_nxv2i64 = sext <vscale x 2 x i32> undef to <vscale x 2 x i64>
; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv2i1_nxv2i8 = sext <vscale x 2 x i1> undef to <vscale x 2 x i8>
; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv2i1_nxv2i16 = sext <vscale x 2 x i1> undef to <vscale x 2 x i16>
; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv2i1_nxv2i32 = sext <vscale x 2 x i1> undef to <vscale x 2 x i32>
-; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv2i1_nxv2i64 = sext <vscale x 2 x i1> undef to <vscale x 2 x i64>
+; RV64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv2i1_nxv2i64 = sext <vscale x 2 x i1> undef to <vscale x 2 x i64>
; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i8_nxv4i16 = sext <vscale x 4 x i8> undef to <vscale x 4 x i16>
-; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i8_nxv4i32 = sext <vscale x 4 x i8> undef to <vscale x 4 x i32>
-; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i8_nxv4i64 = sext <vscale x 4 x i8> undef to <vscale x 4 x i64>
-; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i16_nxv4i32 = sext <vscale x 4 x i16> undef to <vscale x 4 x i32>
-; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i16_nxv4i64 = sext <vscale x 4 x i16> undef to <vscale x 4 x i64>
-; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i32_nxv4i64 = sext <vscale x 4 x i32> undef to <vscale x 4 x i64>
+; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv4i8_nxv4i32 = sext <vscale x 4 x i8> undef to <vscale x 4 x i32>
+; RV64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv4i8_nxv4i64 = sext <vscale x 4 x i8> undef to <vscale x 4 x i64>
+; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv4i16_nxv4i32 = sext <vscale x 4 x i16> undef to <vscale x 4 x i32>
+; RV64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv4i16_nxv4i64 = sext <vscale x 4 x i16> undef to <vscale x 4 x i64>
+; RV64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv4i32_nxv4i64 = sext <vscale x 4 x i32> undef to <vscale x 4 x i64>
; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv4i1_nxv4i8 = sext <vscale x 4 x i1> undef to <vscale x 4 x i8>
; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv4i1_nxv4i16 = sext <vscale x 4 x i1> undef to <vscale x 4 x i16>
-; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv4i1_nxv4i32 = sext <vscale x 4 x i1> undef to <vscale x 4 x i32>
-; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv4i1_nxv4i64 = sext <vscale x 4 x i1> undef to <vscale x 4 x i64>
-; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8i8_nxv8i16 = sext <vscale x 8 x i8> undef to <vscale x 8 x i16>
-; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8i8_nxv8i32 = sext <vscale x 8 x i8> undef to <vscale x 8 x i32>
-; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8i8_nxv8i64 = sext <vscale x 8 x i8> undef to <vscale x 8 x i64>
-; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8i16_nxv8i32 = sext <vscale x 8 x i16> undef to <vscale x 8 x i32>
-; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8i16_nxv8i64 = sext <vscale x 8 x i16> undef to <vscale x 8 x i64>
-; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8i32_nxv8i64 = sext <vscale x 8 x i32> undef to <vscale x 8 x i64>
+; RV64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv4i1_nxv4i32 = sext <vscale x 4 x i1> undef to <vscale x 4 x i32>
+; RV64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %nxv4i1_nxv4i64 = sext <vscale x 4 x i1> undef to <vscale x 4 x i64>
+; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv8i8_nxv8i16 = sext <vscale x 8 x i8> undef to <vscale x 8 x i16>
+; RV64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv8i8_nxv8i32 = sext <vscale x 8 x i8> undef to <vscale x 8 x i32>
+; RV64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %nxv8i8_nxv8i64 = sext <vscale x 8 x i8> undef to <vscale x 8 x i64>
+; RV64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv8i16_nxv8i32 = sext <vscale x 8 x i16> undef to <vscale x 8 x i32>
+; RV64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %nxv8i16_nxv8i64 = sext <vscale x 8 x i16> undef to <vscale x 8 x i64>
+; RV64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %nxv8i32_nxv8i64 = sext <vscale x 8 x i32> undef to <vscale x 8 x i64>
; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv8i1_nxv8i8 = sext <vscale x 8 x i1> undef to <vscale x 8 x i8>
-; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv8i1_nxv8i16 = sext <vscale x 8 x i1> undef to <vscale x 8 x i16>
-; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv8i1_nxv8i32 = sext <vscale x 8 x i1> undef to <vscale x 8 x i32>
-; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv8i1_nxv8i64 = sext <vscale x 8 x i1> undef to <vscale x 8 x i64>
-; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv16i8_nxv16i16 = sext <vscale x 16 x i8> undef to <vscale x 16 x i16>
-; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv16i8_nxv16i32 = sext <vscale x 16 x i8> undef to <vscale x 16 x i32>
-; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv16i8_nxv16i64 = sext <vscale x 16 x i8> undef to <vscale x 16 x i64>
-; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv16i16_nxv16i32 = sext <vscale x 16 x i16> undef to <vscale x 16 x i32>
-; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv16i16_nxv16i64 = sext <vscale x 16 x i16> undef to <vscale x 16 x i64>
-; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv16i32_nxv16i64 = sext <vscale x 16 x i32> undef to <vscale x 16 x i64>
-; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv16i1_nxv16i8 = sext <vscale x 16 x i1> undef to <vscale x 16 x i8>
-; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv16i1_nxv16i16 = sext <vscale x 16 x i1> undef to <vscale x 16 x i16>
-; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv16i1_nxv16i32 = sext <vscale x 16 x i1> undef to <vscale x 16 x i32>
-; RV64-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %nxv16i1_nxv16i64 = sext <vscale x 16 x i1> undef to <vscale x 16 x i64>
-; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv32i8_nxv32i16 = sext <vscale x 32 x i8> undef to <vscale x 32 x i16>
-; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv32i8_nxv32i32 = sext <vscale x 32 x i8> undef to <vscale x 32 x i32>
-; RV64-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %nxv32i8_nxv32i64 = sext <vscale x 32 x i8> undef to <vscale x 32 x i64>
-; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv32i16_nxv32i32 = sext <vscale x 32 x i16> undef to <vscale x 32 x i32>
-; RV64-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %nxv32i16_nxv32i64 = sext <vscale x 32 x i16> undef to <vscale x 32 x i64>
-; RV64-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %nxv32i32_nxv32i64 = sext <vscale x 32 x i32> undef to <vscale x 32 x i64>
-; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv32i1_nxv32i8 = sext <vscale x 32 x i1> undef to <vscale x 32 x i8>
-; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv32i1_nxv32i16 = sext <vscale x 32 x i1> undef to <vscale x 32 x i16>
-; RV64-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %nxv32i1_nxv32i32 = sext <vscale x 32 x i1> undef to <vscale x 32 x i32>
-; RV64-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %nxv32i1_nxv32i64 = sext <vscale x 32 x i1> undef to <vscale x 32 x i64>
-; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv64i8_nxv64i16 = sext <vscale x 64 x i8> undef to <vscale x 64 x i16>
-; RV64-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %nxv64i8_nxv64i32 = sext <vscale x 64 x i8> undef to <vscale x 64 x i32>
-; RV64-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %nxv64i8_nxv64i64 = sext <vscale x 64 x i8> undef to <vscale x 64 x i64>
-; RV64-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %nxv64i16_nxv64i32 = sext <vscale x 64 x i16> undef to <vscale x 64 x i32>
-; RV64-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %nxv64i16_nxv64i64 = sext <vscale x 64 x i16> undef to <vscale x 64 x i64>
-; RV64-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %nxv64i32_nxv64i64 = sext <vscale x 64 x i32> undef to <vscale x 64 x i64>
-; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv64i1_nxv64i8 = sext <vscale x 64 x i1> undef to <vscale x 64 x i8>
-; RV64-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %nxv64i1_nxv64i16 = sext <vscale x 64 x i1> undef to <vscale x 64 x i16>
-; RV64-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %nxv64i1_nxv64i32 = sext <vscale x 64 x i1> undef to <vscale x 64 x i32>
-; RV64-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %nxv64i1_nxv64i64 = sext <vscale x 64 x i1> undef to <vscale x 64 x i64>
-; RV64-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %nxv128i8_nxv128i16 = sext <vscale x 128 x i8> undef to <vscale x 128 x i16>
-; RV64-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %nxv128i8_nxv128i32 = sext <vscale x 128 x i8> undef to <vscale x 128 x i32>
+; RV64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv8i1_nxv8i16 = sext <vscale x 8 x i1> undef to <vscale x 8 x i16>
+; RV64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %nxv8i1_nxv8i32 = sext <vscale x 8 x i1> undef to <vscale x 8 x i32>
+; RV64-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %nxv8i1_nxv8i64 = sext <vscale x 8 x i1> undef to <vscale x 8 x i64>
+; RV64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv16i8_nxv16i16 = sext <vscale x 16 x i8> undef to <vscale x 16 x i16>
+; RV64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %nxv16i8_nxv16i32 = sext <vscale x 16 x i8> undef to <vscale x 16 x i32>
+; RV64-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %nxv16i8_nxv16i64 = sext <vscale x 16 x i8> undef to <vscale x 16 x i64>
+; RV64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %nxv16i16_nxv16i32 = sext <vscale x 16 x i16> undef to <vscale x 16 x i32>
+; RV64-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %nxv16i16_nxv16i64 = sext <vscale x 16 x i16> undef to <vscale x 16 x i64>
+; RV64-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %nxv16i32_nxv16i64 = sext <vscale x 16 x i32> undef to <vscale x 16 x i64>
+; RV64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv16i1_nxv16i8 = sext <vscale x 16 x i1> undef to <vscale x 16 x i8>
+; RV64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %nxv16i1_nxv16i16 = sext <vscale x 16 x i1> undef to <vscale x 16 x i16>
+; RV64-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %nxv16i1_nxv16i32 = sext <vscale x 16 x i1> undef to <vscale x 16 x i32>
+; RV64-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %nxv16i1_nxv16i64 = sext <vscale x 16 x i1> undef to <vscale x 16 x i64>
+; RV64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %nxv32i8_nxv32i16 = sext <vscale x 32 x i8> undef to <vscale x 32 x i16>
+; RV64-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %nxv32i8_nxv32i32 = sext <vscale x 32 x i8> undef to <vscale x 32 x i32>
+; RV64-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %nxv32i8_nxv32i64 = sext <vscale x 32 x i8> undef to <vscale x 32 x i64>
+; RV64-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %nxv32i16_nxv32i32 = sext <vscale x 32 x i16> undef to <vscale x 32 x i32>
+; RV64-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %nxv32i16_nxv32i64 = sext <vscale x 32 x i16> undef to <vscale x 32 x i64>
+; RV64-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %nxv32i32_nxv32i64 = sext <vscale x 32 x i32> undef to <vscale x 32 x i64>
+; RV64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %nxv32i1_nxv32i8 = sext <vscale x 32 x i1> undef to <vscale x 32 x i8>
+; RV64-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %nxv32i1_nxv32i16 = sext <vscale x 32 x i1> undef to <vscale x 32 x i16>
+; RV64-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %nxv32i1_nxv32i32 = sext <vscale x 32 x i1> undef to <vscale x 32 x i32>
+; RV64-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %nxv32i1_nxv32i64 = sext <vscale x 32 x i1> undef to <vscale x 32 x i64>
+; RV64-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %nxv64i8_nxv64i16 = sext <vscale x 64 x i8> undef to <vscale x 64 x i16>
+; RV64-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %nxv64i8_nxv64i32 = sext <vscale x 64 x i8> undef to <vscale x 64 x i32>
+; RV64-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %nxv64i8_nxv64i64 = sext <vscale x 64 x i8> undef to <vscale x 64 x i64>
+; RV64-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %nxv64i16_nxv64i32 = sext <vscale x 64 x i16> undef to <vscale x 64 x i32>
+; RV64-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %nxv64i16_nxv64i64 = sext <vscale x 64 x i16> undef to <vscale x 64 x i64>
+; RV64-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %nxv64i32_nxv64i64 = sext <vscale x 64 x i32> undef to <vscale x 64 x i64>
+; RV64-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %nxv64i1_nxv64i8 = sext <vscale x 64 x i1> undef to <vscale x 64 x i8>
+; RV64-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %nxv64i1_nxv64i16 = sext <vscale x 64 x i1> undef to <vscale x 64 x i16>
+; RV64-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %nxv64i1_nxv64i32 = sext <vscale x 64 x i1> undef to <vscale x 64 x i32>
+; RV64-NEXT: Cost Model: Found an estimated cost of 135 for instruction: %nxv64i1_nxv64i64 = sext <vscale x 64 x i1> undef to <vscale x 64 x i64>
+; RV64-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %nxv128i8_nxv128i16 = sext <vscale x 128 x i8> undef to <vscale x 128 x i16>
+; RV64-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %nxv128i8_nxv128i32 = sext <vscale x 128 x i8> undef to <vscale x 128 x i32>
; RV64-NEXT: Cost Model: Invalid cost for instruction: %nxv128i8_nxv128i128 = sext <vscale x 128 x i8> undef to <vscale x 128 x i128>
-; RV64-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %nxv128i16_nxv128i32 = sext <vscale x 128 x i16> undef to <vscale x 128 x i32>
+; RV64-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %nxv128i16_nxv128i32 = sext <vscale x 128 x i16> undef to <vscale x 128 x i32>
; RV64-NEXT: Cost Model: Invalid cost for instruction: %nxv128i16_nxv128i128 = sext <vscale x 128 x i16> undef to <vscale x 128 x i128>
; RV64-NEXT: Cost Model: Invalid cost for instruction: %nxv128i32_nxv128i128 = sext <vscale x 128 x i32> undef to <vscale x 128 x i128>
-; RV64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv128i1_nxv128i8 = sext <vscale x 128 x i1> undef to <vscale x 128 x i8>
-; RV64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %nxv128i1_nxv128i16 = sext <vscale x 128 x i1> undef to <vscale x 128 x i16>
-; RV64-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %nxv128i1_nxv128i32 = sext <vscale x 128 x i1> undef to <vscale x 128 x i32>
+; RV64-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %nxv128i1_nxv128i8 = sext <vscale x 128 x i1> undef to <vscale x 128 x i8>
+; RV64-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %nxv128i1_nxv128i16 = sext <vscale x 128 x i1> undef to <vscale x 128 x i16>
+; RV64-NEXT: Cost Model: Found an estimated cost of 134 for instruction: %nxv128i1_nxv128i32 = sext <vscale x 128 x i1> undef to <vscale x 128 x i32>
; RV64-NEXT: Cost Model: Invalid cost for instruction: %nxv128i1_nxv128i128 = sext <vscale x 128 x i1> undef to <vscale x 128 x i128>
; RV64-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
@@ -522,74 +522,74 @@ define void @zext() {
; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i1_v2i64 = zext <2 x i1> undef to <2 x i64>
; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8_v4i16 = zext <4 x i8> undef to <4 x i16>
; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8_v4i32 = zext <4 x i8> undef to <4 x i32>
-; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8_v4i64 = zext <4 x i8> undef to <4 x i64>
+; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i8_v4i64 = zext <4 x i8> undef to <4 x i64>
; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i16_v4i32 = zext <4 x i16> undef to <4 x i32>
-; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i16_v4i64 = zext <4 x i16> undef to <4 x i64>
-; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_v4i64 = zext <4 x i32> undef to <4 x i64>
+; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i16_v4i64 = zext <4 x i16> undef to <4 x i64>
+; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i32_v4i64 = zext <4 x i32> undef to <4 x i64>
; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i1_v4i8 = zext <4 x i1> undef to <4 x i8>
; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i1_v4i16 = zext <4 x i1> undef to <4 x i16>
; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i1_v4i32 = zext <4 x i1> undef to <4 x i32>
-; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i1_v4i64 = zext <4 x i1> undef to <4 x i64>
+; RV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i1_v4i64 = zext <4 x i1> undef to <4 x i64>
; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8_v8i16 = zext <8 x i8> undef to <8 x i16>
-; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8_v8i32 = zext <8 x i8> undef to <8 x i32>
-; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8_v8i64 = zext <8 x i8> undef to <8 x i64>
-; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_v8i32 = zext <8 x i16> undef to <8 x i32>
-; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_v8i64 = zext <8 x i16> undef to <8 x i64>
-; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_v8i64 = zext <8 x i32> undef to <8 x i64>
+; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i8_v8i32 = zext <8 x i8> undef to <8 x i32>
+; RV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8i8_v8i64 = zext <8 x i8> undef to <8 x i64>
+; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i16_v8i32 = zext <8 x i16> undef to <8 x i32>
+; RV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8i16_v8i64 = zext <8 x i16> undef to <8 x i64>
+; RV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8i32_v8i64 = zext <8 x i32> undef to <8 x i64>
; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i1_v8i8 = zext <8 x i1> undef to <8 x i8>
; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i1_v8i16 = zext <8 x i1> undef to <8 x i16>
-; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i1_v8i32 = zext <8 x i1> undef to <8 x i32>
-; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i1_v8i64 = zext <8 x i1> undef to <8 x i64>
-; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_v16i16 = zext <16 x i8> undef to <16 x i16>
-; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_v16i32 = zext <16 x i8> undef to <16 x i32>
-; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_v16i64 = zext <16 x i8> undef to <16 x i64>
-; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_v16i32 = zext <16 x i16> undef to <16 x i32>
-; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_v16i64 = zext <16 x i16> undef to <16 x i64>
-; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_v16i64 = zext <16 x i32> undef to <16 x i64>
+; RV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8i1_v8i32 = zext <8 x i1> undef to <8 x i32>
+; RV32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8i1_v8i64 = zext <8 x i1> undef to <8 x i64>
+; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i8_v16i16 = zext <16 x i8> undef to <16 x i16>
+; RV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16i8_v16i32 = zext <16 x i8> undef to <16 x i32>
+; RV32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16i8_v16i64 = zext <16 x i8> undef to <16 x i64>
+; RV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16i16_v16i32 = zext <16 x i16> undef to <16 x i32>
+; RV32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16i16_v16i64 = zext <16 x i16> undef to <16 x i64>
+; RV32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16i32_v16i64 = zext <16 x i32> undef to <16 x i64>
; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i1_v16i8 = zext <16 x i1> undef to <16 x i8>
-; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i1_v16i16 = zext <16 x i1> undef to <16 x i16>
-; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i1_v16i32 = zext <16 x i1> undef to <16 x i32>
-; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i1_v16i64 = zext <16 x i1> undef to <16 x i64>
-; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_v32i16 = zext <32 x i8> undef to <32 x i16>
-; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_v32i32 = zext <32 x i8> undef to <32 x i32>
-; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32i8_v32i64 = zext <32 x i8> undef to <32 x i64>
-; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_v32i32 = zext <32 x i16> undef to <32 x i32>
-; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32i16_v32i64 = zext <32 x i16> undef to <32 x i64>
-; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32i32_v32i64 = zext <32 x i32> undef to <32 x i64>
-; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v32i1_v32i8 = zext <32 x i1> undef to <32 x i8>
-; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v32i1_v32i16 = zext <32 x i1> undef to <32 x i16>
-; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v32i1_v32i32 = zext <32 x i1> undef to <32 x i32>
-; RV32-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v32i1_v32i64 = zext <32 x i1> undef to <32 x i64>
-; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_v64i16 = zext <64 x i8> undef to <64 x i16>
-; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v64i8_v64i32 = zext <64 x i8> undef to <64 x i32>
-; RV32-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v64i8_v64i64 = zext <64 x i8> undef to <64 x i64>
-; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v64i16_v64i32 = zext <64 x i16> undef to <64 x i32>
-; RV32-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v64i16_v64i64 = zext <64 x i16> undef to <64 x i64>
-; RV32-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v64i32_v64i64 = zext <64 x i32> undef to <64 x i64>
-; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i1_v64i8 = zext <64 x i1> undef to <64 x i8>
-; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i1_v64i16 = zext <64 x i1> undef to <64 x i16>
-; RV32-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v64i1_v64i32 = zext <64 x i1> undef to <64 x i32>
-; RV32-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v64i1_v64i64 = zext <64 x i1> undef to <64 x i64>
-; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v128i8_v128i16 = zext <128 x i8> undef to <128 x i16>
-; RV32-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v128i8_v128i32 = zext <128 x i8> undef to <128 x i32>
-; RV32-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %v128i8_v128i64 = zext <128 x i8> undef to <128 x i64>
-; RV32-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v128i16_v128i32 = zext <128 x i16> undef to <128 x i32>
-; RV32-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v128i16_v128i64 = zext <128 x i16> undef to <128 x i64>
-; RV32-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v128i32_v128i64 = zext <128 x i32> undef to <128 x i64>
-; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v128i1_v128i8 = zext <128 x i1> undef to <128 x i8>
-; RV32-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v128i1_v128i16 = zext <128 x i1> undef to <128 x i16>
-; RV32-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v128i1_v128i32 = zext <128 x i1> undef to <128 x i32>
-; RV32-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %v128i1_v128i64 = zext <128 x i1> undef to <128 x i64>
-; RV32-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v256i8_v256i16 = zext <256 x i8> undef to <256 x i16>
-; RV32-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v256i8_v256i32 = zext <256 x i8> undef to <256 x i32>
-; RV32-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %v256i8_v256i64 = zext <256 x i8> undef to <256 x i64>
-; RV32-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v256i16_v256i32 = zext <256 x i16> undef to <256 x i32>
-; RV32-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %v256i16_v256i64 = zext <256 x i16> undef to <256 x i64>
-; RV32-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v256i32_v256i64 = zext <256 x i32> undef to <256 x i64>
-; RV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v256i1_v256i8 = zext <256 x i1> undef to <256 x i8>
-; RV32-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v256i1_v256i16 = zext <256 x i1> undef to <256 x i16>
-; RV32-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %v256i1_v256i32 = zext <256 x i1> undef to <256 x i32>
-; RV32-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %v256i1_v256i64 = zext <256 x i1> undef to <256 x i64>
+; RV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16i1_v16i16 = zext <16 x i1> undef to <16 x i16>
+; RV32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16i1_v16i32 = zext <16 x i1> undef to <16 x i32>
+; RV32-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v16i1_v16i64 = zext <16 x i1> undef to <16 x i64>
+; RV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v32i8_v32i16 = zext <32 x i8> undef to <32 x i16>
+; RV32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v32i8_v32i32 = zext <32 x i8> undef to <32 x i32>
+; RV32-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %v32i8_v32i64 = zext <32 x i8> undef to <32 x i64>
+; RV32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v32i16_v32i32 = zext <32 x i16> undef to <32 x i32>
+; RV32-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %v32i16_v32i64 = zext <32 x i16> undef to <32 x i64>
+; RV32-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %v32i32_v32i64 = zext <32 x i32> undef to <32 x i64>
+; RV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v32i1_v32i8 = zext <32 x i1> undef to <32 x i8>
+; RV32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v32i1_v32i16 = zext <32 x i1> undef to <32 x i16>
+; RV32-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v32i1_v32i32 = zext <32 x i1> undef to <32 x i32>
+; RV32-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %v32i1_v32i64 = zext <32 x i1> undef to <32 x i64>
+; RV32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v64i8_v64i16 = zext <64 x i8> undef to <64 x i16>
+; RV32-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %v64i8_v64i32 = zext <64 x i8> undef to <64 x i32>
+; RV32-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %v64i8_v64i64 = zext <64 x i8> undef to <64 x i64>
+; RV32-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %v64i16_v64i32 = zext <64 x i16> undef to <64 x i32>
+; RV32-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %v64i16_v64i64 = zext <64 x i16> undef to <64 x i64>
+; RV32-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %v64i32_v64i64 = zext <64 x i32> undef to <64 x i64>
+; RV32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v64i1_v64i8 = zext <64 x i1> undef to <64 x i8>
+; RV32-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v64i1_v64i16 = zext <64 x i1> undef to <64 x i16>
+; RV32-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %v64i1_v64i32 = zext <64 x i1> undef to <64 x i32>
+; RV32-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %v64i1_v64i64 = zext <64 x i1> undef to <64 x i64>
+; RV32-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %v128i8_v128i16 = zext <128 x i8> undef to <128 x i16>
+; RV32-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %v128i8_v128i32 = zext <128 x i8> undef to <128 x i32>
+; RV32-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %v128i8_v128i64 = zext <128 x i8> undef to <128 x i64>
+; RV32-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %v128i16_v128i32 = zext <128 x i16> undef to <128 x i32>
+; RV32-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %v128i16_v128i64 = zext <128 x i16> undef to <128 x i64>
+; RV32-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %v128i32_v128i64 = zext <128 x i32> undef to <128 x i64>
+; RV32-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v128i1_v128i8 = zext <128 x i1> undef to <128 x i8>
+; RV32-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %v128i1_v128i16 = zext <128 x i1> undef to <128 x i16>
+; RV32-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %v128i1_v128i32 = zext <128 x i1> undef to <128 x i32>
+; RV32-NEXT: Cost Model: Found an estimated cost of 135 for instruction: %v128i1_v128i64 = zext <128 x i1> undef to <128 x i64>
+; RV32-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %v256i8_v256i16 = zext <256 x i8> undef to <256 x i16>
+; RV32-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %v256i8_v256i32 = zext <256 x i8> undef to <256 x i32>
+; RV32-NEXT: Cost Model: Found an estimated cost of 142 for instruction: %v256i8_v256i64 = zext <256 x i8> undef to <256 x i64>
+; RV32-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %v256i16_v256i32 = zext <256 x i16> undef to <256 x i32>
+; RV32-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %v256i16_v256i64 = zext <256 x i16> undef to <256 x i64>
+; RV32-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %v256i32_v256i64 = zext <256 x i32> undef to <256 x i64>
+; RV32-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v256i1_v256i8 = zext <256 x i1> undef to <256 x i8>
+; RV32-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %v256i1_v256i16 = zext <256 x i1> undef to <256 x i16>
+; RV32-NEXT: Cost Model: Found an estimated cost of 134 for instruction: %v256i1_v256i32 = zext <256 x i1> undef to <256 x i32>
+; RV32-NEXT: Cost Model: Found an estimated cost of 270 for instruction: %v256i1_v256i64 = zext <256 x i1> undef to <256 x i64>
; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1i8_nxv1i16 = zext <vscale x 1 x i8> undef to <vscale x 1 x i16>
; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1i8_nxv1i32 = zext <vscale x 1 x i8> undef to <vscale x 1 x i32>
; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1i8_nxv1i64 = zext <vscale x 1 x i8> undef to <vscale x 1 x i64>
@@ -602,73 +602,73 @@ define void @zext() {
; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv1i1_nxv1i64 = zext <vscale x 1 x i1> undef to <vscale x 1 x i64>
; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i8_nxv2i16 = zext <vscale x 2 x i8> undef to <vscale x 2 x i16>
; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i8_nxv2i32 = zext <vscale x 2 x i8> undef to <vscale x 2 x i32>
-; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i8_nxv2i64 = zext <vscale x 2 x i8> undef to <vscale x 2 x i64>
+; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv2i8_nxv2i64 = zext <vscale x 2 x i8> undef to <vscale x 2 x i64>
; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i16_nxv2i32 = zext <vscale x 2 x i16> undef to <vscale x 2 x i32>
-; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i16_nxv2i64 = zext <vscale x 2 x i16> undef to <vscale x 2 x i64>
-; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i32_nxv2i64 = zext <vscale x 2 x i32> undef to <vscale x 2 x i64>
+; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv2i16_nxv2i64 = zext <vscale x 2 x i16> undef to <vscale x 2 x i64>
+; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv2i32_nxv2i64 = zext <vscale x 2 x i32> undef to <vscale x 2 x i64>
; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv2i1_nxv2i8 = zext <vscale x 2 x i1> undef to <vscale x 2 x i8>
; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv2i1_nxv2i16 = zext <vscale x 2 x i1> undef to <vscale x 2 x i16>
; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv2i1_nxv2i32 = zext <vscale x 2 x i1> undef to <vscale x 2 x i32>
-; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv2i1_nxv2i64 = zext <vscale x 2 x i1> undef to <vscale x 2 x i64>
+; RV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv2i1_nxv2i64 = zext <vscale x 2 x i1> undef to <vscale x 2 x i64>
; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i8_nxv4i16 = zext <vscale x 4 x i8> undef to <vscale x 4 x i16>
-; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i8_nxv4i32 = zext <vscale x 4 x i8> undef to <vscale x 4 x i32>
-; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i8_nxv4i64 = zext <vscale x 4 x i8> undef to <vscale x 4 x i64>
-; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i16_nxv4i32 = zext <vscale x 4 x i16> undef to <vscale x 4 x i32>
-; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i16_nxv4i64 = zext <vscale x 4 x i16> undef to <vscale x 4 x i64>
-; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i32_nxv4i64 = zext <vscale x 4 x i32> undef to <vscale x 4 x i64>
+; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv4i8_nxv4i32 = zext <vscale x 4 x i8> undef to <vscale x 4 x i32>
+; RV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv4i8_nxv4i64 = zext <vscale x 4 x i8> undef to <vscale x 4 x i64>
+; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv4i16_nxv4i32 = zext <vscale x 4 x i16> undef to <vscale x 4 x i32>
+; RV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv4i16_nxv4i64 = zext <vscale x 4 x i16> undef to <vscale x 4 x i64>
+; RV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv4i32_nxv4i64 = zext <vscale x 4 x i32> undef to <vscale x 4 x i64>
; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv4i1_nxv4i8 = zext <vscale x 4 x i1> undef to <vscale x 4 x i8>
; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv4i1_nxv4i16 = zext <vscale x 4 x i1> undef to <vscale x 4 x i16>
-; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv4i1_nxv4i32 = zext <vscale x 4 x i1> undef to <vscale x 4 x i32>
-; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv4i1_nxv4i64 = zext <vscale x 4 x i1> undef to <vscale x 4 x i64>
-; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8i8_nxv8i16 = zext <vscale x 8 x i8> undef to <vscale x 8 x i16>
-; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8i8_nxv8i32 = zext <vscale x 8 x i8> undef to <vscale x 8 x i32>
-; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8i8_nxv8i64 = zext <vscale x 8 x i8> undef to <vscale x 8 x i64>
-; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8i16_nxv8i32 = zext <vscale x 8 x i16> undef to <vscale x 8 x i32>
-; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8i16_nxv8i64 = zext <vscale x 8 x i16> undef to <vscale x 8 x i64>
-; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8i32_nxv8i64 = zext <vscale x 8 x i32> undef to <vscale x 8 x i64>
+; RV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv4i1_nxv4i32 = zext <vscale x 4 x i1> undef to <vscale x 4 x i32>
+; RV32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %nxv4i1_nxv4i64 = zext <vscale x 4 x i1> undef to <vscale x 4 x i64>
+; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv8i8_nxv8i16 = zext <vscale x 8 x i8> undef to <vscale x 8 x i16>
+; RV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv8i8_nxv8i32 = zext <vscale x 8 x i8> undef to <vscale x 8 x i32>
+; RV32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %nxv8i8_nxv8i64 = zext <vscale x 8 x i8> undef to <vscale x 8 x i64>
+; RV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv8i16_nxv8i32 = zext <vscale x 8 x i16> undef to <vscale x 8 x i32>
+; RV32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %nxv8i16_nxv8i64 = zext <vscale x 8 x i16> undef to <vscale x 8 x i64>
+; RV32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %nxv8i32_nxv8i64 = zext <vscale x 8 x i32> undef to <vscale x 8 x i64>
; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv8i1_nxv8i8 = zext <vscale x 8 x i1> undef to <vscale x 8 x i8>
-; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv8i1_nxv8i16 = zext <vscale x 8 x i1> undef to <vscale x 8 x i16>
-; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv8i1_nxv8i32 = zext <vscale x 8 x i1> undef to <vscale x 8 x i32>
-; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv8i1_nxv8i64 = zext <vscale x 8 x i1> undef to <vscale x 8 x i64>
-; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv16i8_nxv16i16 = zext <vscale x 16 x i8> undef to <vscale x 16 x i16>
-; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv16i8_nxv16i32 = zext <vscale x 16 x i8> undef to <vscale x 16 x i32>
-; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv16i8_nxv16i64 = zext <vscale x 16 x i8> undef to <vscale x 16 x i64>
-; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv16i16_nxv16i32 = zext <vscale x 16 x i16> undef to <vscale x 16 x i32>
-; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv16i16_nxv16i64 = zext <vscale x 16 x i16> undef to <vscale x 16 x i64>
-; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv16i32_nxv16i64 = zext <vscale x 16 x i32> undef to <vscale x 16 x i64>
-; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv16i1_nxv16i8 = zext <vscale x 16 x i1> undef to <vscale x 16 x i8>
-; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv16i1_nxv16i16 = zext <vscale x 16 x i1> undef to <vscale x 16 x i16>
-; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv16i1_nxv16i32 = zext <vscale x 16 x i1> undef to <vscale x 16 x i32>
-; RV32-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %nxv16i1_nxv16i64 = zext <vscale x 16 x i1> undef to <vscale x 16 x i64>
-; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv32i8_nxv32i16 = zext <vscale x 32 x i8> undef to <vscale x 32 x i16>
-; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv32i8_nxv32i32 = zext <vscale x 32 x i8> undef to <vscale x 32 x i32>
-; RV32-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %nxv32i8_nxv32i64 = zext <vscale x 32 x i8> undef to <vscale x 32 x i64>
-; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv32i16_nxv32i32 = zext <vscale x 32 x i16> undef to <vscale x 32 x i32>
-; RV32-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %nxv32i16_nxv32i64 = zext <vscale x 32 x i16> undef to <vscale x 32 x i64>
-; RV32-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %nxv32i32_nxv32i64 = zext <vscale x 32 x i32> undef to <vscale x 32 x i64>
-; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv32i1_nxv32i8 = zext <vscale x 32 x i1> undef to <vscale x 32 x i8>
-; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv32i1_nxv32i16 = zext <vscale x 32 x i1> undef to <vscale x 32 x i16>
-; RV32-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %nxv32i1_nxv32i32 = zext <vscale x 32 x i1> undef to <vscale x 32 x i32>
-; RV32-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %nxv32i1_nxv32i64 = zext <vscale x 32 x i1> undef to <vscale x 32 x i64>
-; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv64i8_nxv64i16 = zext <vscale x 64 x i8> undef to <vscale x 64 x i16>
-; RV32-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %nxv64i8_nxv64i32 = zext <vscale x 64 x i8> undef to <vscale x 64 x i32>
+; RV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv8i1_nxv8i16 = zext <vscale x 8 x i1> undef to <vscale x 8 x i16>
+; RV32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %nxv8i1_nxv8i32 = zext <vscale x 8 x i1> undef to <vscale x 8 x i32>
+; RV32-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %nxv8i1_nxv8i64 = zext <vscale x 8 x i1> undef to <vscale x 8 x i64>
+; RV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv16i8_nxv16i16 = zext <vscale x 16 x i8> undef to <vscale x 16 x i16>
+; RV32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %nxv16i8_nxv16i32 = zext <vscale x 16 x i8> undef to <vscale x 16 x i32>
+; RV32-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %nxv16i8_nxv16i64 = zext <vscale x 16 x i8> undef to <vscale x 16 x i64>
+; RV32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %nxv16i16_nxv16i32 = zext <vscale x 16 x i16> undef to <vscale x 16 x i32>
+; RV32-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %nxv16i16_nxv16i64 = zext <vscale x 16 x i16> undef to <vscale x 16 x i64>
+; RV32-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %nxv16i32_nxv16i64 = zext <vscale x 16 x i32> undef to <vscale x 16 x i64>
+; RV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv16i1_nxv16i8 = zext <vscale x 16 x i1> undef to <vscale x 16 x i8>
+; RV32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %nxv16i1_nxv16i16 = zext <vscale x 16 x i1> undef to <vscale x 16 x i16>
+; RV32-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %nxv16i1_nxv16i32 = zext <vscale x 16 x i1> undef to <vscale x 16 x i32>
+; RV32-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %nxv16i1_nxv16i64 = zext <vscale x 16 x i1> undef to <vscale x 16 x i64>
+; RV32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %nxv32i8_nxv32i16 = zext <vscale x 32 x i8> undef to <vscale x 32 x i16>
+; RV32-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %nxv32i8_nxv32i32 = zext <vscale x 32 x i8> undef to <vscale x 32 x i32>
+; RV32-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %nxv32i8_nxv32i64 = zext <vscale x 32 x i8> undef to <vscale x 32 x i64>
+; RV32-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %nxv32i16_nxv32i32 = zext <vscale x 32 x i16> undef to <vscale x 32 x i32>
+; RV32-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %nxv32i16_nxv32i64 = zext <vscale x 32 x i16> undef to <vscale x 32 x i64>
+; RV32-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %nxv32i32_nxv32i64 = zext <vscale x 32 x i32> undef to <vscale x 32 x i64>
+; RV32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %nxv32i1_nxv32i8 = zext <vscale x 32 x i1> undef to <vscale x 32 x i8>
+; RV32-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %nxv32i1_nxv32i16 = zext <vscale x 32 x i1> undef to <vscale x 32 x i16>
+; RV32-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %nxv32i1_nxv32i32 = zext <vscale x 32 x i1> undef to <vscale x 32 x i32>
+; RV32-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %nxv32i1_nxv32i64 = zext <vscale x 32 x i1> undef to <vscale x 32 x i64>
+; RV32-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %nxv64i8_nxv64i16 = zext <vscale x 64 x i8> undef to <vscale x 64 x i16>
+; RV32-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %nxv64i8_nxv64i32 = zext <vscale x 64 x i8> undef to <vscale x 64 x i32>
; RV32-NEXT: Cost Model: Invalid cost for instruction: %nxv64i8_nxv64i64 = zext <vscale x 64 x i8> undef to <vscale x 64 x i64>
-; RV32-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %nxv64i16_nxv64i32 = zext <vscale x 64 x i16> undef to <vscale x 64 x i32>
-; RV32-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %nxv64i16_nxv64i64 = zext <vscale x 64 x i16> undef to <vscale x 64 x i64>
-; RV32-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %nxv64i32_nxv64i64 = zext <vscale x 64 x i32> undef to <vscale x 64 x i64>
-; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv64i1_nxv64i8 = zext <vscale x 64 x i1> undef to <vscale x 64 x i8>
-; RV32-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %nxv64i1_nxv64i16 = zext <vscale x 64 x i1> undef to <vscale x 64 x i16>
-; RV32-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %nxv64i1_nxv64i32 = zext <vscale x 64 x i1> undef to <vscale x 64 x i32>
+; RV32-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %nxv64i16_nxv64i32 = zext <vscale x 64 x i16> undef to <vscale x 64 x i32>
+; RV32-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %nxv64i16_nxv64i64 = zext <vscale x 64 x i16> undef to <vscale x 64 x i64>
+; RV32-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %nxv64i32_nxv64i64 = zext <vscale x 64 x i32> undef to <vscale x 64 x i64>
+; RV32-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %nxv64i1_nxv64i8 = zext <vscale x 64 x i1> undef to <vscale x 64 x i8>
+; RV32-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %nxv64i1_nxv64i16 = zext <vscale x 64 x i1> undef to <vscale x 64 x i16>
+; RV32-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %nxv64i1_nxv64i32 = zext <vscale x 64 x i1> undef to <vscale x 64 x i32>
; RV32-NEXT: Cost Model: Invalid cost for instruction: %nxv64i1_nxv64i64 = zext <vscale x 64 x i1> undef to <vscale x 64 x i64>
-; RV32-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %nxv128i8_nxv128i16 = zext <vscale x 128 x i8> undef to <vscale x 128 x i16>
-; RV32-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %nxv128i8_nxv128i32 = zext <vscale x 128 x i8> undef to <vscale x 128 x i32>
+; RV32-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %nxv128i8_nxv128i16 = zext <vscale x 128 x i8> undef to <vscale x 128 x i16>
+; RV32-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %nxv128i8_nxv128i32 = zext <vscale x 128 x i8> undef to <vscale x 128 x i32>
; RV32-NEXT: Cost Model: Invalid cost for instruction: %nxv128i8_nxv128i128 = zext <vscale x 128 x i8> undef to <vscale x 128 x i128>
-; RV32-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %nxv128i16_nxv128i32 = zext <vscale x 128 x i16> undef to <vscale x 128 x i32>
+; RV32-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %nxv128i16_nxv128i32 = zext <vscale x 128 x i16> undef to <vscale x 128 x i32>
; RV32-NEXT: Cost Model: Invalid cost for instruction: %nxv128i16_nxv128i128 = zext <vscale x 128 x i16> undef to <vscale x 128 x i128>
; RV32-NEXT: Cost Model: Invalid cost for instruction: %nxv128i32_nxv128i128 = zext <vscale x 128 x i32> undef to <vscale x 128 x i128>
-; RV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv128i1_nxv128i8 = zext <vscale x 128 x i1> undef to <vscale x 128 x i8>
-; RV32-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %nxv128i1_nxv128i16 = zext <vscale x 128 x i1> undef to <vscale x 128 x i16>
-; RV32-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %nxv128i1_nxv128i32 = zext <vscale x 128 x i1> undef to <vscale x 128 x i32>
+; RV32-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %nxv128i1_nxv128i8 = zext <vscale x 128 x i1> undef to <vscale x 128 x i8>
+; RV32-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %nxv128i1_nxv128i16 = zext <vscale x 128 x i1> undef to <vscale x 128 x i16>
+; RV32-NEXT: Cost Model: Found an estimated cost of 134 for instruction: %nxv128i1_nxv128i32 = zext <vscale x 128 x i1> undef to <vscale x 128 x i32>
; RV32-NEXT: Cost Model: Invalid cost for instruction: %nxv128i1_nxv128i128 = zext <vscale x 128 x i1> undef to <vscale x 128 x i128>
; RV32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
@@ -685,74 +685,74 @@ define void @zext() {
; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i1_v2i64 = zext <2 x i1> undef to <2 x i64>
; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8_v4i16 = zext <4 x i8> undef to <4 x i16>
; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8_v4i32 = zext <4 x i8> undef to <4 x i32>
-; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8_v4i64 = zext <4 x i8> undef to <4 x i64>
+; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i8_v4i64 = zext <4 x i8> undef to <4 x i64>
; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i16_v4i32 = zext <4 x i16> undef to <4 x i32>
-; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i16_v4i64 = zext <4 x i16> undef to <4 x i64>
-; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_v4i64 = zext <4 x i32> undef to <4 x i64>
+; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i16_v4i64 = zext <4 x i16> undef to <4 x i64>
+; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i32_v4i64 = zext <4 x i32> undef to <4 x i64>
; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i1_v4i8 = zext <4 x i1> undef to <4 x i8>
; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i1_v4i16 = zext <4 x i1> undef to <4 x i16>
; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i1_v4i32 = zext <4 x i1> undef to <4 x i32>
-; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i1_v4i64 = zext <4 x i1> undef to <4 x i64>
+; RV64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i1_v4i64 = zext <4 x i1> undef to <4 x i64>
; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8_v8i16 = zext <8 x i8> undef to <8 x i16>
-; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8_v8i32 = zext <8 x i8> undef to <8 x i32>
-; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8_v8i64 = zext <8 x i8> undef to <8 x i64>
-; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_v8i32 = zext <8 x i16> undef to <8 x i32>
-; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_v8i64 = zext <8 x i16> undef to <8 x i64>
-; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_v8i64 = zext <8 x i32> undef to <8 x i64>
+; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i8_v8i32 = zext <8 x i8> undef to <8 x i32>
+; RV64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8i8_v8i64 = zext <8 x i8> undef to <8 x i64>
+; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i16_v8i32 = zext <8 x i16> undef to <8 x i32>
+; RV64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8i16_v8i64 = zext <8 x i16> undef to <8 x i64>
+; RV64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8i32_v8i64 = zext <8 x i32> undef to <8 x i64>
; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i1_v8i8 = zext <8 x i1> undef to <8 x i8>
; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i1_v8i16 = zext <8 x i1> undef to <8 x i16>
-; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i1_v8i32 = zext <8 x i1> undef to <8 x i32>
-; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i1_v8i64 = zext <8 x i1> undef to <8 x i64>
-; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_v16i16 = zext <16 x i8> undef to <16 x i16>
-; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_v16i32 = zext <16 x i8> undef to <16 x i32>
-; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_v16i64 = zext <16 x i8> undef to <16 x i64>
-; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_v16i32 = zext <16 x i16> undef to <16 x i32>
-; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_v16i64 = zext <16 x i16> undef to <16 x i64>
-; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_v16i64 = zext <16 x i32> undef to <16 x i64>
+; RV64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8i1_v8i32 = zext <8 x i1> undef to <8 x i32>
+; RV64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8i1_v8i64 = zext <8 x i1> undef to <8 x i64>
+; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i8_v16i16 = zext <16 x i8> undef to <16 x i16>
+; RV64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16i8_v16i32 = zext <16 x i8> undef to <16 x i32>
+; RV64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16i8_v16i64 = zext <16 x i8> undef to <16 x i64>
+; RV64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16i16_v16i32 = zext <16 x i16> undef to <16 x i32>
+; RV64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16i16_v16i64 = zext <16 x i16> undef to <16 x i64>
+; RV64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16i32_v16i64 = zext <16 x i32> undef to <16 x i64>
; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i1_v16i8 = zext <16 x i1> undef to <16 x i8>
-; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i1_v16i16 = zext <16 x i1> undef to <16 x i16>
-; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i1_v16i32 = zext <16 x i1> undef to <16 x i32>
-; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i1_v16i64 = zext <16 x i1> undef to <16 x i64>
-; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_v32i16 = zext <32 x i8> undef to <32 x i16>
-; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_v32i32 = zext <32 x i8> undef to <32 x i32>
-; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32i8_v32i64 = zext <32 x i8> undef to <32 x i64>
-; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_v32i32 = zext <32 x i16> undef to <32 x i32>
-; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32i16_v32i64 = zext <32 x i16> undef to <32 x i64>
-; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32i32_v32i64 = zext <32 x i32> undef to <32 x i64>
-; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v32i1_v32i8 = zext <32 x i1> undef to <32 x i8>
-; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v32i1_v32i16 = zext <32 x i1> undef to <32 x i16>
-; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v32i1_v32i32 = zext <32 x i1> undef to <32 x i32>
-; RV64-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v32i1_v32i64 = zext <32 x i1> undef to <32 x i64>
-; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_v64i16 = zext <64 x i8> undef to <64 x i16>
-; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v64i8_v64i32 = zext <64 x i8> undef to <64 x i32>
-; RV64-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v64i8_v64i64 = zext <64 x i8> undef to <64 x i64>
-; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v64i16_v64i32 = zext <64 x i16> undef to <64 x i32>
-; RV64-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v64i16_v64i64 = zext <64 x i16> undef to <64 x i64>
-; RV64-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v64i32_v64i64 = zext <64 x i32> undef to <64 x i64>
-; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i1_v64i8 = zext <64 x i1> undef to <64 x i8>
-; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i1_v64i16 = zext <64 x i1> undef to <64 x i16>
-; RV64-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v64i1_v64i32 = zext <64 x i1> undef to <64 x i32>
-; RV64-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v64i1_v64i64 = zext <64 x i1> undef to <64 x i64>
-; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v128i8_v128i16 = zext <128 x i8> undef to <128 x i16>
-; RV64-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v128i8_v128i32 = zext <128 x i8> undef to <128 x i32>
-; RV64-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %v128i8_v128i64 = zext <128 x i8> undef to <128 x i64>
-; RV64-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v128i16_v128i32 = zext <128 x i16> undef to <128 x i32>
-; RV64-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v128i16_v128i64 = zext <128 x i16> undef to <128 x i64>
-; RV64-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v128i32_v128i64 = zext <128 x i32> undef to <128 x i64>
-; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v128i1_v128i8 = zext <128 x i1> undef to <128 x i8>
-; RV64-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v128i1_v128i16 = zext <128 x i1> undef to <128 x i16>
-; RV64-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v128i1_v128i32 = zext <128 x i1> undef to <128 x i32>
-; RV64-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %v128i1_v128i64 = zext <128 x i1> undef to <128 x i64>
-; RV64-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v256i8_v256i16 = zext <256 x i8> undef to <256 x i16>
-; RV64-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v256i8_v256i32 = zext <256 x i8> undef to <256 x i32>
-; RV64-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %v256i8_v256i64 = zext <256 x i8> undef to <256 x i64>
-; RV64-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v256i16_v256i32 = zext <256 x i16> undef to <256 x i32>
-; RV64-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %v256i16_v256i64 = zext <256 x i16> undef to <256 x i64>
-; RV64-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v256i32_v256i64 = zext <256 x i32> undef to <256 x i64>
-; RV64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v256i1_v256i8 = zext <256 x i1> undef to <256 x i8>
-; RV64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v256i1_v256i16 = zext <256 x i1> undef to <256 x i16>
-; RV64-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %v256i1_v256i32 = zext <256 x i1> undef to <256 x i32>
-; RV64-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %v256i1_v256i64 = zext <256 x i1> undef to <256 x i64>
+; RV64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16i1_v16i16 = zext <16 x i1> undef to <16 x i16>
+; RV64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16i1_v16i32 = zext <16 x i1> undef to <16 x i32>
+; RV64-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v16i1_v16i64 = zext <16 x i1> undef to <16 x i64>
+; RV64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v32i8_v32i16 = zext <32 x i8> undef to <32 x i16>
+; RV64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v32i8_v32i32 = zext <32 x i8> undef to <32 x i32>
+; RV64-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %v32i8_v32i64 = zext <32 x i8> undef to <32 x i64>
+; RV64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v32i16_v32i32 = zext <32 x i16> undef to <32 x i32>
+; RV64-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %v32i16_v32i64 = zext <32 x i16> undef to <32 x i64>
+; RV64-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %v32i32_v32i64 = zext <32 x i32> undef to <32 x i64>
+; RV64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v32i1_v32i8 = zext <32 x i1> undef to <32 x i8>
+; RV64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v32i1_v32i16 = zext <32 x i1> undef to <32 x i16>
+; RV64-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v32i1_v32i32 = zext <32 x i1> undef to <32 x i32>
+; RV64-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %v32i1_v32i64 = zext <32 x i1> undef to <32 x i64>
+; RV64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v64i8_v64i16 = zext <64 x i8> undef to <64 x i16>
+; RV64-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %v64i8_v64i32 = zext <64 x i8> undef to <64 x i32>
+; RV64-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %v64i8_v64i64 = zext <64 x i8> undef to <64 x i64>
+; RV64-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %v64i16_v64i32 = zext <64 x i16> undef to <64 x i32>
+; RV64-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %v64i16_v64i64 = zext <64 x i16> undef to <64 x i64>
+; RV64-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %v64i32_v64i64 = zext <64 x i32> undef to <64 x i64>
+; RV64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v64i1_v64i8 = zext <64 x i1> undef to <64 x i8>
+; RV64-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v64i1_v64i16 = zext <64 x i1> undef to <64 x i16>
+; RV64-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %v64i1_v64i32 = zext <64 x i1> undef to <64 x i32>
+; RV64-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %v64i1_v64i64 = zext <64 x i1> undef to <64 x i64>
+; RV64-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %v128i8_v128i16 = zext <128 x i8> undef to <128 x i16>
+; RV64-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %v128i8_v128i32 = zext <128 x i8> undef to <128 x i32>
+; RV64-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %v128i8_v128i64 = zext <128 x i8> undef to <128 x i64>
+; RV64-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %v128i16_v128i32 = zext <128 x i16> undef to <128 x i32>
+; RV64-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %v128i16_v128i64 = zext <128 x i16> undef to <128 x i64>
+; RV64-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %v128i32_v128i64 = zext <128 x i32> undef to <128 x i64>
+; RV64-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v128i1_v128i8 = zext <128 x i1> undef to <128 x i8>
+; RV64-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %v128i1_v128i16 = zext <128 x i1> undef to <128 x i16>
+; RV64-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %v128i1_v128i32 = zext <128 x i1> undef to <128 x i32>
+; RV64-NEXT: Cost Model: Found an estimated cost of 135 for instruction: %v128i1_v128i64 = zext <128 x i1> undef to <128 x i64>
+; RV64-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %v256i8_v256i16 = zext <256 x i8> undef to <256 x i16>
+; RV64-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %v256i8_v256i32 = zext <256 x i8> undef to <256 x i32>
+; RV64-NEXT: Cost Model: Found an estimated cost of 142 for instruction: %v256i8_v256i64 = zext <256 x i8> undef to <256 x i64>
+; RV64-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %v256i16_v256i32 = zext <256 x i16> undef to <256 x i32>
+; RV64-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %v256i16_v256i64 = zext <256 x i16> undef to <256 x i64>
+; RV64-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %v256i32_v256i64 = zext <256 x i32> undef to <256 x i64>
+; RV64-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v256i1_v256i8 = zext <256 x i1> undef to <256 x i8>
+; RV64-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %v256i1_v256i16 = zext <256 x i1> undef to <256 x i16>
+; RV64-NEXT: Cost Model: Found an estimated cost of 134 for instruction: %v256i1_v256i32 = zext <256 x i1> undef to <256 x i32>
+; RV64-NEXT: Cost Model: Found an estimated cost of 270 for instruction: %v256i1_v256i64 = zext <256 x i1> undef to <256 x i64>
; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1i8_nxv1i16 = zext <vscale x 1 x i8> undef to <vscale x 1 x i16>
; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1i8_nxv1i32 = zext <vscale x 1 x i8> undef to <vscale x 1 x i32>
; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1i8_nxv1i64 = zext <vscale x 1 x i8> undef to <vscale x 1 x i64>
@@ -765,73 +765,73 @@ define void @zext() {
; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv1i1_nxv1i64 = zext <vscale x 1 x i1> undef to <vscale x 1 x i64>
; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i8_nxv2i16 = zext <vscale x 2 x i8> undef to <vscale x 2 x i16>
; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i8_nxv2i32 = zext <vscale x 2 x i8> undef to <vscale x 2 x i32>
-; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i8_nxv2i64 = zext <vscale x 2 x i8> undef to <vscale x 2 x i64>
+; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv2i8_nxv2i64 = zext <vscale x 2 x i8> undef to <vscale x 2 x i64>
; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i16_nxv2i32 = zext <vscale x 2 x i16> undef to <vscale x 2 x i32>
-; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i16_nxv2i64 = zext <vscale x 2 x i16> undef to <vscale x 2 x i64>
-; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i32_nxv2i64 = zext <vscale x 2 x i32> undef to <vscale x 2 x i64>
+; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv2i16_nxv2i64 = zext <vscale x 2 x i16> undef to <vscale x 2 x i64>
+; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv2i32_nxv2i64 = zext <vscale x 2 x i32> undef to <vscale x 2 x i64>
; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv2i1_nxv2i8 = zext <vscale x 2 x i1> undef to <vscale x 2 x i8>
; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv2i1_nxv2i16 = zext <vscale x 2 x i1> undef to <vscale x 2 x i16>
; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv2i1_nxv2i32 = zext <vscale x 2 x i1> undef to <vscale x 2 x i32>
-; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv2i1_nxv2i64 = zext <vscale x 2 x i1> undef to <vscale x 2 x i64>
+; RV64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv2i1_nxv2i64 = zext <vscale x 2 x i1> undef to <vscale x 2 x i64>
; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i8_nxv4i16 = zext <vscale x 4 x i8> undef to <vscale x 4 x i16>
-; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i8_nxv4i32 = zext <vscale x 4 x i8> undef to <vscale x 4 x i32>
-; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i8_nxv4i64 = zext <vscale x 4 x i8> undef to <vscale x 4 x i64>
-; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i16_nxv4i32 = zext <vscale x 4 x i16> undef to <vscale x 4 x i32>
-; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i16_nxv4i64 = zext <vscale x 4 x i16> undef to <vscale x 4 x i64>
-; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i32_nxv4i64 = zext <vscale x 4 x i32> undef to <vscale x 4 x i64>
+; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv4i8_nxv4i32 = zext <vscale x 4 x i8> undef to <vscale x 4 x i32>
+; RV64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv4i8_nxv4i64 = zext <vscale x 4 x i8> undef to <vscale x 4 x i64>
+; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv4i16_nxv4i32 = zext <vscale x 4 x i16> undef to <vscale x 4 x i32>
+; RV64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv4i16_nxv4i64 = zext <vscale x 4 x i16> undef to <vscale x 4 x i64>
+; RV64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv4i32_nxv4i64 = zext <vscale x 4 x i32> undef to <vscale x 4 x i64>
; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv4i1_nxv4i8 = zext <vscale x 4 x i1> undef to <vscale x 4 x i8>
; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv4i1_nxv4i16 = zext <vscale x 4 x i1> undef to <vscale x 4 x i16>
-; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv4i1_nxv4i32 = zext <vscale x 4 x i1> undef to <vscale x 4 x i32>
-; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv4i1_nxv4i64 = zext <vscale x 4 x i1> undef to <vscale x 4 x i64>
-; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8i8_nxv8i16 = zext <vscale x 8 x i8> undef to <vscale x 8 x i16>
-; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8i8_nxv8i32 = zext <vscale x 8 x i8> undef to <vscale x 8 x i32>
-; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8i8_nxv8i64 = zext <vscale x 8 x i8> undef to <vscale x 8 x i64>
-; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8i16_nxv8i32 = zext <vscale x 8 x i16> undef to <vscale x 8 x i32>
-; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8i16_nxv8i64 = zext <vscale x 8 x i16> undef to <vscale x 8 x i64>
-; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8i32_nxv8i64 = zext <vscale x 8 x i32> undef to <vscale x 8 x i64>
+; RV64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv4i1_nxv4i32 = zext <vscale x 4 x i1> undef to <vscale x 4 x i32>
+; RV64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %nxv4i1_nxv4i64 = zext <vscale x 4 x i1> undef to <vscale x 4 x i64>
+; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv8i8_nxv8i16 = zext <vscale x 8 x i8> undef to <vscale x 8 x i16>
+; RV64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv8i8_nxv8i32 = zext <vscale x 8 x i8> undef to <vscale x 8 x i32>
+; RV64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %nxv8i8_nxv8i64 = zext <vscale x 8 x i8> undef to <vscale x 8 x i64>
+; RV64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv8i16_nxv8i32 = zext <vscale x 8 x i16> undef to <vscale x 8 x i32>
+; RV64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %nxv8i16_nxv8i64 = zext <vscale x 8 x i16> undef to <vscale x 8 x i64>
+; RV64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %nxv8i32_nxv8i64 = zext <vscale x 8 x i32> undef to <vscale x 8 x i64>
; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv8i1_nxv8i8 = zext <vscale x 8 x i1> undef to <vscale x 8 x i8>
-; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv8i1_nxv8i16 = zext <vscale x 8 x i1> undef to <vscale x 8 x i16>
-; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv8i1_nxv8i32 = zext <vscale x 8 x i1> undef to <vscale x 8 x i32>
-; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv8i1_nxv8i64 = zext <vscale x 8 x i1> undef to <vscale x 8 x i64>
-; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv16i8_nxv16i16 = zext <vscale x 16 x i8> undef to <vscale x 16 x i16>
-; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv16i8_nxv16i32 = zext <vscale x 16 x i8> undef to <vscale x 16 x i32>
-; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv16i8_nxv16i64 = zext <vscale x 16 x i8> undef to <vscale x 16 x i64>
-; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv16i16_nxv16i32 = zext <vscale x 16 x i16> undef to <vscale x 16 x i32>
-; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv16i16_nxv16i64 = zext <vscale x 16 x i16> undef to <vscale x 16 x i64>
-; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv16i32_nxv16i64 = zext <vscale x 16 x i32> undef to <vscale x 16 x i64>
-; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv16i1_nxv16i8 = zext <vscale x 16 x i1> undef to <vscale x 16 x i8>
-; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv16i1_nxv16i16 = zext <vscale x 16 x i1> undef to <vscale x 16 x i16>
-; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv16i1_nxv16i32 = zext <vscale x 16 x i1> undef to <vscale x 16 x i32>
-; RV64-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %nxv16i1_nxv16i64 = zext <vscale x 16 x i1> undef to <vscale x 16 x i64>
-; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv32i8_nxv32i16 = zext <vscale x 32 x i8> undef to <vscale x 32 x i16>
-; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv32i8_nxv32i32 = zext <vscale x 32 x i8> undef to <vscale x 32 x i32>
-; RV64-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %nxv32i8_nxv32i64 = zext <vscale x 32 x i8> undef to <vscale x 32 x i64>
-; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv32i16_nxv32i32 = zext <vscale x 32 x i16> undef to <vscale x 32 x i32>
-; RV64-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %nxv32i16_nxv32i64 = zext <vscale x 32 x i16> undef to <vscale x 32 x i64>
-; RV64-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %nxv32i32_nxv32i64 = zext <vscale x 32 x i32> undef to <vscale x 32 x i64>
-; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv32i1_nxv32i8 = zext <vscale x 32 x i1> undef to <vscale x 32 x i8>
-; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv32i1_nxv32i16 = zext <vscale x 32 x i1> undef to <vscale x 32 x i16>
-; RV64-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %nxv32i1_nxv32i32 = zext <vscale x 32 x i1> undef to <vscale x 32 x i32>
-; RV64-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %nxv32i1_nxv32i64 = zext <vscale x 32 x i1> undef to <vscale x 32 x i64>
-; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv64i8_nxv64i16 = zext <vscale x 64 x i8> undef to <vscale x 64 x i16>
-; RV64-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %nxv64i8_nxv64i32 = zext <vscale x 64 x i8> undef to <vscale x 64 x i32>
-; RV64-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %nxv64i8_nxv64i64 = zext <vscale x 64 x i8> undef to <vscale x 64 x i64>
-; RV64-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %nxv64i16_nxv64i32 = zext <vscale x 64 x i16> undef to <vscale x 64 x i32>
-; RV64-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %nxv64i16_nxv64i64 = zext <vscale x 64 x i16> undef to <vscale x 64 x i64>
-; RV64-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %nxv64i32_nxv64i64 = zext <vscale x 64 x i32> undef to <vscale x 64 x i64>
-; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv64i1_nxv64i8 = zext <vscale x 64 x i1> undef to <vscale x 64 x i8>
-; RV64-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %nxv64i1_nxv64i16 = zext <vscale x 64 x i1> undef to <vscale x 64 x i16>
-; RV64-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %nxv64i1_nxv64i32 = zext <vscale x 64 x i1> undef to <vscale x 64 x i32>
-; RV64-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %nxv64i1_nxv64i64 = zext <vscale x 64 x i1> undef to <vscale x 64 x i64>
-; RV64-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %nxv128i8_nxv128i16 = zext <vscale x 128 x i8> undef to <vscale x 128 x i16>
-; RV64-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %nxv128i8_nxv128i32 = zext <vscale x 128 x i8> undef to <vscale x 128 x i32>
+; RV64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv8i1_nxv8i16 = zext <vscale x 8 x i1> undef to <vscale x 8 x i16>
+; RV64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %nxv8i1_nxv8i32 = zext <vscale x 8 x i1> undef to <vscale x 8 x i32>
+; RV64-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %nxv8i1_nxv8i64 = zext <vscale x 8 x i1> undef to <vscale x 8 x i64>
+; RV64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv16i8_nxv16i16 = zext <vscale x 16 x i8> undef to <vscale x 16 x i16>
+; RV64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %nxv16i8_nxv16i32 = zext <vscale x 16 x i8> undef to <vscale x 16 x i32>
+; RV64-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %nxv16i8_nxv16i64 = zext <vscale x 16 x i8> undef to <vscale x 16 x i64>
+; RV64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %nxv16i16_nxv16i32 = zext <vscale x 16 x i16> undef to <vscale x 16 x i32>
+; RV64-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %nxv16i16_nxv16i64 = zext <vscale x 16 x i16> undef to <vscale x 16 x i64>
+; RV64-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %nxv16i32_nxv16i64 = zext <vscale x 16 x i32> undef to <vscale x 16 x i64>
+; RV64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv16i1_nxv16i8 = zext <vscale x 16 x i1> undef to <vscale x 16 x i8>
+; RV64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %nxv16i1_nxv16i16 = zext <vscale x 16 x i1> undef to <vscale x 16 x i16>
+; RV64-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %nxv16i1_nxv16i32 = zext <vscale x 16 x i1> undef to <vscale x 16 x i32>
+; RV64-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %nxv16i1_nxv16i64 = zext <vscale x 16 x i1> undef to <vscale x 16 x i64>
+; RV64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %nxv32i8_nxv32i16 = zext <vscale x 32 x i8> undef to <vscale x 32 x i16>
+; RV64-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %nxv32i8_nxv32i32 = zext <vscale x 32 x i8> undef to <vscale x 32 x i32>
+; RV64-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %nxv32i8_nxv32i64 = zext <vscale x 32 x i8> undef to <vscale x 32 x i64>
+; RV64-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %nxv32i16_nxv32i32 = zext <vscale x 32 x i16> undef to <vscale x 32 x i32>
+; RV64-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %nxv32i16_nxv32i64 = zext <vscale x 32 x i16> undef to <vscale x 32 x i64>
+; RV64-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %nxv32i32_nxv32i64 = zext <vscale x 32 x i32> undef to <vscale x 32 x i64>
+; RV64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %nxv32i1_nxv32i8 = zext <vscale x 32 x i1> undef to <vscale x 32 x i8>
+; RV64-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %nxv32i1_nxv32i16 = zext <vscale x 32 x i1> undef to <vscale x 32 x i16>
+; RV64-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %nxv32i1_nxv32i32 = zext <vscale x 32 x i1> undef to <vscale x 32 x i32>
+; RV64-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %nxv32i1_nxv32i64 = zext <vscale x 32 x i1> undef to <vscale x 32 x i64>
+; RV64-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %nxv64i8_nxv64i16 = zext <vscale x 64 x i8> undef to <vscale x 64 x i16>
+; RV64-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %nxv64i8_nxv64i32 = zext <vscale x 64 x i8> undef to <vscale x 64 x i32>
+; RV64-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %nxv64i8_nxv64i64 = zext <vscale x 64 x i8> undef to <vscale x 64 x i64>
+; RV64-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %nxv64i16_nxv64i32 = zext <vscale x 64 x i16> undef to <vscale x 64 x i32>
+; RV64-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %nxv64i16_nxv64i64 = zext <vscale x 64 x i16> undef to <vscale x 64 x i64>
+; RV64-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %nxv64i32_nxv64i64 = zext <vscale x 64 x i32> undef to <vscale x 64 x i64>
+; RV64-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %nxv64i1_nxv64i8 = zext <vscale x 64 x i1> undef to <vscale x 64 x i8>
+; RV64-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %nxv64i1_nxv64i16 = zext <vscale x 64 x i1> undef to <vscale x 64 x i16>
+; RV64-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %nxv64i1_nxv64i32 = zext <vscale x 64 x i1> undef to <vscale x 64 x i32>
+; RV64-NEXT: Cost Model: Found an estimated cost of 135 for instruction: %nxv64i1_nxv64i64 = zext <vscale x 64 x i1> undef to <vscale x 64 x i64>
+; RV64-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %nxv128i8_nxv128i16 = zext <vscale x 128 x i8> undef to <vscale x 128 x i16>
+; RV64-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %nxv128i8_nxv128i32 = zext <vscale x 128 x i8> undef to <vscale x 128 x i32>
; RV64-NEXT: Cost Model: Invalid cost for instruction: %nxv128i8_nxv128i128 = zext <vscale x 128 x i8> undef to <vscale x 128 x i128>
-; RV64-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %nxv128i16_nxv128i32 = zext <vscale x 128 x i16> undef to <vscale x 128 x i32>
+; RV64-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %nxv128i16_nxv128i32 = zext <vscale x 128 x i16> undef to <vscale x 128 x i32>
; RV64-NEXT: Cost Model: Invalid cost for instruction: %nxv128i16_nxv128i128 = zext <vscale x 128 x i16> undef to <vscale x 128 x i128>
; RV64-NEXT: Cost Model: Invalid cost for instruction: %nxv128i32_nxv128i128 = zext <vscale x 128 x i32> undef to <vscale x 128 x i128>
-; RV64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv128i1_nxv128i8 = zext <vscale x 128 x i1> undef to <vscale x 128 x i8>
-; RV64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %nxv128i1_nxv128i16 = zext <vscale x 128 x i1> undef to <vscale x 128 x i16>
-; RV64-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %nxv128i1_nxv128i32 = zext <vscale x 128 x i1> undef to <vscale x 128 x i32>
+; RV64-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %nxv128i1_nxv128i8 = zext <vscale x 128 x i1> undef to <vscale x 128 x i8>
+; RV64-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %nxv128i1_nxv128i16 = zext <vscale x 128 x i1> undef to <vscale x 128 x i16>
+; RV64-NEXT: Cost Model: Found an estimated cost of 134 for instruction: %nxv128i1_nxv128i32 = zext <vscale x 128 x i1> undef to <vscale x 128 x i32>
; RV64-NEXT: Cost Model: Invalid cost for instruction: %nxv128i1_nxv128i128 = zext <vscale x 128 x i1> undef to <vscale x 128 x i128>
; RV64-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-scalable-int.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-scalable-int.ll
index 80efe912c869..30cb32ce4eaf 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-scalable-int.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-scalable-int.ll
@@ -1141,7 +1141,7 @@ define signext i32 @vreduce_add_nxv4i32(<vscale x 4 x i32> %v) {
define signext i32 @vwreduce_add_nxv4i16(<vscale x 4 x i16> %v) {
; CHECK-LABEL: 'vwreduce_add_nxv4i16'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = sext <vscale x 4 x i16> %v to <vscale x 4 x i32>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %e = sext <vscale x 4 x i16> %v to <vscale x 4 x i32>
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> %e)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
;
@@ -1157,7 +1157,7 @@ define signext i32 @vwreduce_add_nxv4i16(<vscale x 4 x i16> %v) {
define signext i32 @vwreduce_uadd_nxv4i16(<vscale x 4 x i16> %v) {
; CHECK-LABEL: 'vwreduce_uadd_nxv4i16'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = zext <vscale x 4 x i16> %v to <vscale x 4 x i32>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %e = zext <vscale x 4 x i16> %v to <vscale x 4 x i32>
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> %e)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
;
@@ -1445,7 +1445,7 @@ define i64 @vreduce_add_nxv2i64(<vscale x 2 x i64> %v) {
define i64 @vwreduce_add_nxv2i32(<vscale x 2 x i32> %v) {
; CHECK-LABEL: 'vwreduce_add_nxv2i32'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = sext <vscale x 2 x i32> %v to <vscale x 2 x i64>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %e = sext <vscale x 2 x i32> %v to <vscale x 2 x i64>
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv2i64(<vscale x 2 x i64> %e)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
;
@@ -1461,7 +1461,7 @@ define i64 @vwreduce_add_nxv2i32(<vscale x 2 x i32> %v) {
define i64 @vwreduce_uadd_nxv2i32(<vscale x 2 x i32> %v) {
; CHECK-LABEL: 'vwreduce_uadd_nxv2i32'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = zext <vscale x 2 x i32> %v to <vscale x 2 x i64>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %e = zext <vscale x 2 x i32> %v to <vscale x 2 x i64>
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv2i64(<vscale x 2 x i64> %e)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
;
@@ -1597,7 +1597,7 @@ define i64 @vreduce_add_nxv4i64(<vscale x 4 x i64> %v) {
define i64 @vwreduce_add_nxv4i32(<vscale x 4 x i32> %v) {
; CHECK-LABEL: 'vwreduce_add_nxv4i32'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = sext <vscale x 4 x i32> %v to <vscale x 4 x i64>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %e = sext <vscale x 4 x i32> %v to <vscale x 4 x i64>
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv4i64(<vscale x 4 x i64> %e)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
;
@@ -1613,7 +1613,7 @@ define i64 @vwreduce_add_nxv4i32(<vscale x 4 x i32> %v) {
define i64 @vwreduce_uadd_nxv4i32(<vscale x 4 x i32> %v) {
; CHECK-LABEL: 'vwreduce_uadd_nxv4i32'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = zext <vscale x 4 x i32> %v to <vscale x 4 x i64>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %e = zext <vscale x 4 x i32> %v to <vscale x 4 x i64>
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv4i64(<vscale x 4 x i64> %e)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
;
diff --git a/llvm/test/Analysis/CostModel/RISCV/rvv-extractelement.ll b/llvm/test/Analysis/CostModel/RISCV/rvv-extractelement.ll
index 225bad6da591..aa7a90bece33 100644
--- a/llvm/test/Analysis/CostModel/RISCV/rvv-extractelement.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/rvv-extractelement.ll
@@ -12,12 +12,12 @@ define void @extractelement_int(i32 %x) {
; RV32V-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i1_0 = extractelement <4 x i1> undef, i32 0
; RV32V-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i1_0 = extractelement <8 x i1> undef, i32 0
; RV32V-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16i1_0 = extractelement <16 x i1> undef, i32 0
-; RV32V-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32i1_0 = extractelement <32 x i1> undef, i32 0
+; RV32V-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v32i1_0 = extractelement <32 x i1> undef, i32 0
; RV32V-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv2i1_0 = extractelement <vscale x 2 x i1> undef, i32 0
; RV32V-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv4i1_0 = extractelement <vscale x 4 x i1> undef, i32 0
; RV32V-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv8i1_0 = extractelement <vscale x 8 x i1> undef, i32 0
-; RV32V-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv16i1_0 = extractelement <vscale x 16 x i1> undef, i32 0
-; RV32V-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv32i1_0 = extractelement <vscale x 32 x i1> undef, i32 0
+; RV32V-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %nxv16i1_0 = extractelement <vscale x 16 x i1> undef, i32 0
+; RV32V-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %nxv32i1_0 = extractelement <vscale x 32 x i1> undef, i32 0
; RV32V-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i8_0 = extractelement <2 x i8> undef, i32 0
; RV32V-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8_0 = extractelement <4 x i8> undef, i32 0
; RV32V-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8_0 = extractelement <8 x i8> undef, i32 0
@@ -66,12 +66,12 @@ define void @extractelement_int(i32 %x) {
; RV32V-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i1_1 = extractelement <4 x i1> undef, i32 1
; RV32V-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8i1_1 = extractelement <8 x i1> undef, i32 1
; RV32V-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16i1_1 = extractelement <16 x i1> undef, i32 1
-; RV32V-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v32i1_1 = extractelement <32 x i1> undef, i32 1
+; RV32V-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v32i1_1 = extractelement <32 x i1> undef, i32 1
; RV32V-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv2i1_1 = extractelement <vscale x 2 x i1> undef, i32 1
; RV32V-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv4i1_1 = extractelement <vscale x 4 x i1> undef, i32 1
; RV32V-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv8i1_1 = extractelement <vscale x 8 x i1> undef, i32 1
-; RV32V-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv16i1_1 = extractelement <vscale x 16 x i1> undef, i32 1
-; RV32V-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv32i1_1 = extractelement <vscale x 32 x i1> undef, i32 1
+; RV32V-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %nxv16i1_1 = extractelement <vscale x 16 x i1> undef, i32 1
+; RV32V-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %nxv32i1_1 = extractelement <vscale x 32 x i1> undef, i32 1
; RV32V-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i8_1 = extractelement <2 x i8> undef, i32 1
; RV32V-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i8_1 = extractelement <4 x i8> undef, i32 1
; RV32V-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i8_1 = extractelement <8 x i8> undef, i32 1
@@ -120,12 +120,12 @@ define void @extractelement_int(i32 %x) {
; RV32V-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i1_x = extractelement <4 x i1> undef, i32 %x
; RV32V-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8i1_x = extractelement <8 x i1> undef, i32 %x
; RV32V-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16i1_x = extractelement <16 x i1> undef, i32 %x
-; RV32V-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v32i1_x = extractelement <32 x i1> undef, i32 %x
+; RV32V-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v32i1_x = extractelement <32 x i1> undef, i32 %x
; RV32V-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv2i1_x = extractelement <vscale x 2 x i1> undef, i32 %x
; RV32V-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv4i1_x = extractelement <vscale x 4 x i1> undef, i32 %x
; RV32V-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv8i1_x = extractelement <vscale x 8 x i1> undef, i32 %x
-; RV32V-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv16i1_x = extractelement <vscale x 16 x i1> undef, i32 %x
-; RV32V-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv32i1_x = extractelement <vscale x 32 x i1> undef, i32 %x
+; RV32V-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %nxv16i1_x = extractelement <vscale x 16 x i1> undef, i32 %x
+; RV32V-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %nxv32i1_x = extractelement <vscale x 32 x i1> undef, i32 %x
; RV32V-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i8_x = extractelement <2 x i8> undef, i32 %x
; RV32V-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i8_x = extractelement <4 x i8> undef, i32 %x
; RV32V-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i8_x = extractelement <8 x i8> undef, i32 %x
@@ -177,12 +177,12 @@ define void @extractelement_int(i32 %x) {
; RV64V-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i1_0 = extractelement <4 x i1> undef, i32 0
; RV64V-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i1_0 = extractelement <8 x i1> undef, i32 0
; RV64V-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16i1_0 = extractelement <16 x i1> undef, i32 0
-; RV64V-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32i1_0 = extractelement <32 x i1> undef, i32 0
+; RV64V-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v32i1_0 = extractelement <32 x i1> undef, i32 0
; RV64V-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv2i1_0 = extractelement <vscale x 2 x i1> undef, i32 0
; RV64V-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv4i1_0 = extractelement <vscale x 4 x i1> undef, i32 0
; RV64V-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv8i1_0 = extractelement <vscale x 8 x i1> undef, i32 0
-; RV64V-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv16i1_0 = extractelement <vscale x 16 x i1> undef, i32 0
-; RV64V-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv32i1_0 = extractelement <vscale x 32 x i1> undef, i32 0
+; RV64V-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %nxv16i1_0 = extractelement <vscale x 16 x i1> undef, i32 0
+; RV64V-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %nxv32i1_0 = extractelement <vscale x 32 x i1> undef, i32 0
; RV64V-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i8_0 = extractelement <2 x i8> undef, i32 0
; RV64V-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8_0 = extractelement <4 x i8> undef, i32 0
; RV64V-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8_0 = extractelement <8 x i8> undef, i32 0
@@ -231,12 +231,12 @@ define void @extractelement_int(i32 %x) {
; RV64V-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i1_1 = extractelement <4 x i1> undef, i32 1
; RV64V-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8i1_1 = extractelement <8 x i1> undef, i32 1
; RV64V-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16i1_1 = extractelement <16 x i1> undef, i32 1
-; RV64V-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v32i1_1 = extractelement <32 x i1> undef, i32 1
+; RV64V-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v32i1_1 = extractelement <32 x i1> undef, i32 1
; RV64V-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv2i1_1 = extractelement <vscale x 2 x i1> undef, i32 1
; RV64V-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv4i1_1 = extractelement <vscale x 4 x i1> undef, i32 1
; RV64V-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv8i1_1 = extractelement <vscale x 8 x i1> undef, i32 1
-; RV64V-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv16i1_1 = extractelement <vscale x 16 x i1> undef, i32 1
-; RV64V-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv32i1_1 = extractelement <vscale x 32 x i1> undef, i32 1
+; RV64V-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %nxv16i1_1 = extractelement <vscale x 16 x i1> undef, i32 1
+; RV64V-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %nxv32i1_1 = extractelement <vscale x 32 x i1> undef, i32 1
; RV64V-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i8_1 = extractelement <2 x i8> undef, i32 1
; RV64V-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i8_1 = extractelement <4 x i8> undef, i32 1
; RV64V-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i8_1 = extractelement <8 x i8> undef, i32 1
@@ -285,12 +285,12 @@ define void @extractelement_int(i32 %x) {
; RV64V-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i1_x = extractelement <4 x i1> undef, i32 %x
; RV64V-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8i1_x = extractelement <8 x i1> undef, i32 %x
; RV64V-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16i1_x = extractelement <16 x i1> undef, i32 %x
-; RV64V-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v32i1_x = extractelement <32 x i1> undef, i32 %x
+; RV64V-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v32i1_x = extractelement <32 x i1> undef, i32 %x
; RV64V-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv2i1_x = extractelement <vscale x 2 x i1> undef, i32 %x
; RV64V-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv4i1_x = extractelement <vscale x 4 x i1> undef, i32 %x
; RV64V-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv8i1_x = extractelement <vscale x 8 x i1> undef, i32 %x
-; RV64V-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv16i1_x = extractelement <vscale x 16 x i1> undef, i32 %x
-; RV64V-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv32i1_x = extractelement <vscale x 32 x i1> undef, i32 %x
+; RV64V-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %nxv16i1_x = extractelement <vscale x 16 x i1> undef, i32 %x
+; RV64V-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %nxv32i1_x = extractelement <vscale x 32 x i1> undef, i32 %x
; RV64V-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i8_x = extractelement <2 x i8> undef, i32 %x
; RV64V-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i8_x = extractelement <4 x i8> undef, i32 %x
; RV64V-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i8_x = extractelement <8 x i8> undef, i32 %x
@@ -341,13 +341,13 @@ define void @extractelement_int(i32 %x) {
; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2i1_0 = extractelement <2 x i1> undef, i32 0
; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i1_0 = extractelement <4 x i1> undef, i32 0
; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i1_0 = extractelement <8 x i1> undef, i32 0
-; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16i1_0 = extractelement <16 x i1> undef, i32 0
-; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32i1_0 = extractelement <32 x i1> undef, i32 0
+; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v16i1_0 = extractelement <16 x i1> undef, i32 0
+; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v32i1_0 = extractelement <32 x i1> undef, i32 0
; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv2i1_0 = extractelement <vscale x 2 x i1> undef, i32 0
; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv4i1_0 = extractelement <vscale x 4 x i1> undef, i32 0
; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv8i1_0 = extractelement <vscale x 8 x i1> undef, i32 0
-; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv16i1_0 = extractelement <vscale x 16 x i1> undef, i32 0
-; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv32i1_0 = extractelement <vscale x 32 x i1> undef, i32 0
+; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %nxv16i1_0 = extractelement <vscale x 16 x i1> undef, i32 0
+; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %nxv32i1_0 = extractelement <vscale x 32 x i1> undef, i32 0
; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i8_0 = extractelement <2 x i8> undef, i32 0
; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8_0 = extractelement <4 x i8> undef, i32 0
; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8_0 = extractelement <8 x i8> undef, i32 0
@@ -395,13 +395,13 @@ define void @extractelement_int(i32 %x) {
; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2i1_1 = extractelement <2 x i1> undef, i32 1
; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i1_1 = extractelement <4 x i1> undef, i32 1
; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8i1_1 = extractelement <8 x i1> undef, i32 1
-; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16i1_1 = extractelement <16 x i1> undef, i32 1
-; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v32i1_1 = extractelement <32 x i1> undef, i32 1
+; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v16i1_1 = extractelement <16 x i1> undef, i32 1
+; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v32i1_1 = extractelement <32 x i1> undef, i32 1
; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv2i1_1 = extractelement <vscale x 2 x i1> undef, i32 1
; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv4i1_1 = extractelement <vscale x 4 x i1> undef, i32 1
; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv8i1_1 = extractelement <vscale x 8 x i1> undef, i32 1
-; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv16i1_1 = extractelement <vscale x 16 x i1> undef, i32 1
-; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv32i1_1 = extractelement <vscale x 32 x i1> undef, i32 1
+; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %nxv16i1_1 = extractelement <vscale x 16 x i1> undef, i32 1
+; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %nxv32i1_1 = extractelement <vscale x 32 x i1> undef, i32 1
; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i8_1 = extractelement <2 x i8> undef, i32 1
; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i8_1 = extractelement <4 x i8> undef, i32 1
; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i8_1 = extractelement <8 x i8> undef, i32 1
@@ -449,13 +449,13 @@ define void @extractelement_int(i32 %x) {
; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2i1_x = extractelement <2 x i1> undef, i32 %x
; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i1_x = extractelement <4 x i1> undef, i32 %x
; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8i1_x = extractelement <8 x i1> undef, i32 %x
-; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16i1_x = extractelement <16 x i1> undef, i32 %x
-; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v32i1_x = extractelement <32 x i1> undef, i32 %x
+; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v16i1_x = extractelement <16 x i1> undef, i32 %x
+; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v32i1_x = extractelement <32 x i1> undef, i32 %x
; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv2i1_x = extractelement <vscale x 2 x i1> undef, i32 %x
; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv4i1_x = extractelement <vscale x 4 x i1> undef, i32 %x
; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv8i1_x = extractelement <vscale x 8 x i1> undef, i32 %x
-; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv16i1_x = extractelement <vscale x 16 x i1> undef, i32 %x
-; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv32i1_x = extractelement <vscale x 32 x i1> undef, i32 %x
+; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %nxv16i1_x = extractelement <vscale x 16 x i1> undef, i32 %x
+; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %nxv32i1_x = extractelement <vscale x 32 x i1> undef, i32 %x
; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i8_x = extractelement <2 x i8> undef, i32 %x
; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i8_x = extractelement <4 x i8> undef, i32 %x
; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i8_x = extractelement <8 x i8> undef, i32 %x
@@ -506,13 +506,13 @@ define void @extractelement_int(i32 %x) {
; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2i1_0 = extractelement <2 x i1> undef, i32 0
; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i1_0 = extractelement <4 x i1> undef, i32 0
; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i1_0 = extractelement <8 x i1> undef, i32 0
-; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16i1_0 = extractelement <16 x i1> undef, i32 0
-; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32i1_0 = extractelement <32 x i1> undef, i32 0
+; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v16i1_0 = extractelement <16 x i1> undef, i32 0
+; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v32i1_0 = extractelement <32 x i1> undef, i32 0
; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv2i1_0 = extractelement <vscale x 2 x i1> undef, i32 0
; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv4i1_0 = extractelement <vscale x 4 x i1> undef, i32 0
; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv8i1_0 = extractelement <vscale x 8 x i1> undef, i32 0
-; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv16i1_0 = extractelement <vscale x 16 x i1> undef, i32 0
-; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv32i1_0 = extractelement <vscale x 32 x i1> undef, i32 0
+; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %nxv16i1_0 = extractelement <vscale x 16 x i1> undef, i32 0
+; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %nxv32i1_0 = extractelement <vscale x 32 x i1> undef, i32 0
; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i8_0 = extractelement <2 x i8> undef, i32 0
; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8_0 = extractelement <4 x i8> undef, i32 0
; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8_0 = extractelement <8 x i8> undef, i32 0
@@ -560,13 +560,13 @@ define void @extractelement_int(i32 %x) {
; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2i1_1 = extractelement <2 x i1> undef, i32 1
; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i1_1 = extractelement <4 x i1> undef, i32 1
; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8i1_1 = extractelement <8 x i1> undef, i32 1
-; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16i1_1 = extractelement <16 x i1> undef, i32 1
-; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v32i1_1 = extractelement <32 x i1> undef, i32 1
+; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v16i1_1 = extractelement <16 x i1> undef, i32 1
+; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v32i1_1 = extractelement <32 x i1> undef, i32 1
; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv2i1_1 = extractelement <vscale x 2 x i1> undef, i32 1
; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv4i1_1 = extractelement <vscale x 4 x i1> undef, i32 1
; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv8i1_1 = extractelement <vscale x 8 x i1> undef, i32 1
-; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv16i1_1 = extractelement <vscale x 16 x i1> undef, i32 1
-; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv32i1_1 = extractelement <vscale x 32 x i1> undef, i32 1
+; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %nxv16i1_1 = extractelement <vscale x 16 x i1> undef, i32 1
+; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %nxv32i1_1 = extractelement <vscale x 32 x i1> undef, i32 1
; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i8_1 = extractelement <2 x i8> undef, i32 1
; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i8_1 = extractelement <4 x i8> undef, i32 1
; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i8_1 = extractelement <8 x i8> undef, i32 1
@@ -614,13 +614,13 @@ define void @extractelement_int(i32 %x) {
; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2i1_x = extractelement <2 x i1> undef, i32 %x
; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i1_x = extractelement <4 x i1> undef, i32 %x
; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8i1_x = extractelement <8 x i1> undef, i32 %x
-; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16i1_x = extractelement <16 x i1> undef, i32 %x
-; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v32i1_x = extractelement <32 x i1> undef, i32 %x
+; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v16i1_x = extractelement <16 x i1> undef, i32 %x
+; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v32i1_x = extractelement <32 x i1> undef, i32 %x
; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv2i1_x = extractelement <vscale x 2 x i1> undef, i32 %x
; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv4i1_x = extractelement <vscale x 4 x i1> undef, i32 %x
; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv8i1_x = extractelement <vscale x 8 x i1> undef, i32 %x
-; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv16i1_x = extractelement <vscale x 16 x i1> undef, i32 %x
-; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv32i1_x = extractelement <vscale x 32 x i1> undef, i32 %x
+; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %nxv16i1_x = extractelement <vscale x 16 x i1> undef, i32 %x
+; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %nxv32i1_x = extractelement <vscale x 32 x i1> undef, i32 %x
; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i8_x = extractelement <2 x i8> undef, i32 %x
; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i8_x = extractelement <4 x i8> undef, i32 %x
; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i8_x = extractelement <8 x i8> undef, i32 %x
diff --git a/llvm/test/Analysis/CostModel/RISCV/rvv-insertelement.ll b/llvm/test/Analysis/CostModel/RISCV/rvv-insertelement.ll
index 5387c8dc3594..6e1ae0216f76 100644
--- a/llvm/test/Analysis/CostModel/RISCV/rvv-insertelement.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/rvv-insertelement.ll
@@ -12,12 +12,12 @@ define void @insertelement_int(i32 %x) {
; RV32V-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v4i1_0 = insertelement <4 x i1> undef, i1 undef, i32 0
; RV32V-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v8i1_0 = insertelement <8 x i1> undef, i1 undef, i32 0
; RV32V-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v16i1_0 = insertelement <16 x i1> undef, i1 undef, i32 0
-; RV32V-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v32i1_0 = insertelement <32 x i1> undef, i1 undef, i32 0
+; RV32V-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v32i1_0 = insertelement <32 x i1> undef, i1 undef, i32 0
; RV32V-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %nxv2i1_0 = insertelement <vscale x 2 x i1> undef, i1 undef, i32 0
; RV32V-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %nxv4i1_0 = insertelement <vscale x 4 x i1> undef, i1 undef, i32 0
; RV32V-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %nxv8i1_0 = insertelement <vscale x 8 x i1> undef, i1 undef, i32 0
-; RV32V-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %nxv16i1_0 = insertelement <vscale x 16 x i1> undef, i1 undef, i32 0
-; RV32V-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %nxv32i1_0 = insertelement <vscale x 32 x i1> undef, i1 undef, i32 0
+; RV32V-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %nxv16i1_0 = insertelement <vscale x 16 x i1> undef, i1 undef, i32 0
+; RV32V-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %nxv32i1_0 = insertelement <vscale x 32 x i1> undef, i1 undef, i32 0
; RV32V-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i8_0 = insertelement <2 x i8> undef, i8 undef, i32 0
; RV32V-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8_0 = insertelement <4 x i8> undef, i8 undef, i32 0
; RV32V-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8_0 = insertelement <8 x i8> undef, i8 undef, i32 0
@@ -66,12 +66,12 @@ define void @insertelement_int(i32 %x) {
; RV32V-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v4i1_1 = insertelement <4 x i1> undef, i1 undef, i32 1
; RV32V-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v8i1_1 = insertelement <8 x i1> undef, i1 undef, i32 1
; RV32V-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v16i1_1 = insertelement <16 x i1> undef, i1 undef, i32 1
-; RV32V-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v32i1_1 = insertelement <32 x i1> undef, i1 undef, i32 1
+; RV32V-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v32i1_1 = insertelement <32 x i1> undef, i1 undef, i32 1
; RV32V-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %nxv2i1_1 = insertelement <vscale x 2 x i1> undef, i1 undef, i32 1
; RV32V-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %nxv4i1_1 = insertelement <vscale x 4 x i1> undef, i1 undef, i32 1
; RV32V-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %nxv8i1_1 = insertelement <vscale x 8 x i1> undef, i1 undef, i32 1
-; RV32V-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %nxv16i1_1 = insertelement <vscale x 16 x i1> undef, i1 undef, i32 1
-; RV32V-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %nxv32i1_1 = insertelement <vscale x 32 x i1> undef, i1 undef, i32 1
+; RV32V-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %nxv16i1_1 = insertelement <vscale x 16 x i1> undef, i1 undef, i32 1
+; RV32V-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %nxv32i1_1 = insertelement <vscale x 32 x i1> undef, i1 undef, i32 1
; RV32V-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i8_1 = insertelement <2 x i8> undef, i8 undef, i32 1
; RV32V-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i8_1 = insertelement <4 x i8> undef, i8 undef, i32 1
; RV32V-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i8_1 = insertelement <8 x i8> undef, i8 undef, i32 1
@@ -120,12 +120,12 @@ define void @insertelement_int(i32 %x) {
; RV32V-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v4i1_x = insertelement <4 x i1> undef, i1 undef, i32 %x
; RV32V-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v8i1_x = insertelement <8 x i1> undef, i1 undef, i32 %x
; RV32V-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v16i1_x = insertelement <16 x i1> undef, i1 undef, i32 %x
-; RV32V-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v32i1_x = insertelement <32 x i1> undef, i1 undef, i32 %x
+; RV32V-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v32i1_x = insertelement <32 x i1> undef, i1 undef, i32 %x
; RV32V-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %nxv2i1_x = insertelement <vscale x 2 x i1> undef, i1 undef, i32 %x
; RV32V-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %nxv4i1_x = insertelement <vscale x 4 x i1> undef, i1 undef, i32 %x
; RV32V-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %nxv8i1_x = insertelement <vscale x 8 x i1> undef, i1 undef, i32 %x
-; RV32V-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %nxv16i1_x = insertelement <vscale x 16 x i1> undef, i1 undef, i32 %x
-; RV32V-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %nxv32i1_x = insertelement <vscale x 32 x i1> undef, i1 undef, i32 %x
+; RV32V-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %nxv16i1_x = insertelement <vscale x 16 x i1> undef, i1 undef, i32 %x
+; RV32V-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %nxv32i1_x = insertelement <vscale x 32 x i1> undef, i1 undef, i32 %x
; RV32V-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2i8_x = insertelement <2 x i8> undef, i8 undef, i32 %x
; RV32V-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i8_x = insertelement <4 x i8> undef, i8 undef, i32 %x
; RV32V-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i8_x = insertelement <8 x i8> undef, i8 undef, i32 %x
@@ -177,12 +177,12 @@ define void @insertelement_int(i32 %x) {
; RV64V-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v4i1_0 = insertelement <4 x i1> undef, i1 undef, i32 0
; RV64V-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v8i1_0 = insertelement <8 x i1> undef, i1 undef, i32 0
; RV64V-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v16i1_0 = insertelement <16 x i1> undef, i1 undef, i32 0
-; RV64V-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v32i1_0 = insertelement <32 x i1> undef, i1 undef, i32 0
+; RV64V-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v32i1_0 = insertelement <32 x i1> undef, i1 undef, i32 0
; RV64V-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %nxv2i1_0 = insertelement <vscale x 2 x i1> undef, i1 undef, i32 0
; RV64V-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %nxv4i1_0 = insertelement <vscale x 4 x i1> undef, i1 undef, i32 0
; RV64V-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %nxv8i1_0 = insertelement <vscale x 8 x i1> undef, i1 undef, i32 0
-; RV64V-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %nxv16i1_0 = insertelement <vscale x 16 x i1> undef, i1 undef, i32 0
-; RV64V-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %nxv32i1_0 = insertelement <vscale x 32 x i1> undef, i1 undef, i32 0
+; RV64V-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %nxv16i1_0 = insertelement <vscale x 16 x i1> undef, i1 undef, i32 0
+; RV64V-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %nxv32i1_0 = insertelement <vscale x 32 x i1> undef, i1 undef, i32 0
; RV64V-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i8_0 = insertelement <2 x i8> undef, i8 undef, i32 0
; RV64V-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8_0 = insertelement <4 x i8> undef, i8 undef, i32 0
; RV64V-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8_0 = insertelement <8 x i8> undef, i8 undef, i32 0
@@ -231,12 +231,12 @@ define void @insertelement_int(i32 %x) {
; RV64V-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v4i1_1 = insertelement <4 x i1> undef, i1 undef, i32 1
; RV64V-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v8i1_1 = insertelement <8 x i1> undef, i1 undef, i32 1
; RV64V-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v16i1_1 = insertelement <16 x i1> undef, i1 undef, i32 1
-; RV64V-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v32i1_1 = insertelement <32 x i1> undef, i1 undef, i32 1
+; RV64V-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v32i1_1 = insertelement <32 x i1> undef, i1 undef, i32 1
; RV64V-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %nxv2i1_1 = insertelement <vscale x 2 x i1> undef, i1 undef, i32 1
; RV64V-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %nxv4i1_1 = insertelement <vscale x 4 x i1> undef, i1 undef, i32 1
; RV64V-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %nxv8i1_1 = insertelement <vscale x 8 x i1> undef, i1 undef, i32 1
-; RV64V-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %nxv16i1_1 = insertelement <vscale x 16 x i1> undef, i1 undef, i32 1
-; RV64V-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %nxv32i1_1 = insertelement <vscale x 32 x i1> undef, i1 undef, i32 1
+; RV64V-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %nxv16i1_1 = insertelement <vscale x 16 x i1> undef, i1 undef, i32 1
+; RV64V-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %nxv32i1_1 = insertelement <vscale x 32 x i1> undef, i1 undef, i32 1
; RV64V-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i8_1 = insertelement <2 x i8> undef, i8 undef, i32 1
; RV64V-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i8_1 = insertelement <4 x i8> undef, i8 undef, i32 1
; RV64V-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i8_1 = insertelement <8 x i8> undef, i8 undef, i32 1
@@ -285,12 +285,12 @@ define void @insertelement_int(i32 %x) {
; RV64V-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v4i1_x = insertelement <4 x i1> undef, i1 undef, i32 %x
; RV64V-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v8i1_x = insertelement <8 x i1> undef, i1 undef, i32 %x
; RV64V-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v16i1_x = insertelement <16 x i1> undef, i1 undef, i32 %x
-; RV64V-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v32i1_x = insertelement <32 x i1> undef, i1 undef, i32 %x
+; RV64V-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v32i1_x = insertelement <32 x i1> undef, i1 undef, i32 %x
; RV64V-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %nxv2i1_x = insertelement <vscale x 2 x i1> undef, i1 undef, i32 %x
; RV64V-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %nxv4i1_x = insertelement <vscale x 4 x i1> undef, i1 undef, i32 %x
; RV64V-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %nxv8i1_x = insertelement <vscale x 8 x i1> undef, i1 undef, i32 %x
-; RV64V-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %nxv16i1_x = insertelement <vscale x 16 x i1> undef, i1 undef, i32 %x
-; RV64V-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %nxv32i1_x = insertelement <vscale x 32 x i1> undef, i1 undef, i32 %x
+; RV64V-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %nxv16i1_x = insertelement <vscale x 16 x i1> undef, i1 undef, i32 %x
+; RV64V-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %nxv32i1_x = insertelement <vscale x 32 x i1> undef, i1 undef, i32 %x
; RV64V-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2i8_x = insertelement <2 x i8> undef, i8 undef, i32 %x
; RV64V-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i8_x = insertelement <4 x i8> undef, i8 undef, i32 %x
; RV64V-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i8_x = insertelement <8 x i8> undef, i8 undef, i32 %x
@@ -341,13 +341,13 @@ define void @insertelement_int(i32 %x) {
; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v2i1_0 = insertelement <2 x i1> undef, i1 undef, i32 0
; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v4i1_0 = insertelement <4 x i1> undef, i1 undef, i32 0
; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v8i1_0 = insertelement <8 x i1> undef, i1 undef, i32 0
-; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v16i1_0 = insertelement <16 x i1> undef, i1 undef, i32 0
-; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v32i1_0 = insertelement <32 x i1> undef, i1 undef, i32 0
+; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v16i1_0 = insertelement <16 x i1> undef, i1 undef, i32 0
+; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v32i1_0 = insertelement <32 x i1> undef, i1 undef, i32 0
; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %nxv2i1_0 = insertelement <vscale x 2 x i1> undef, i1 undef, i32 0
; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %nxv4i1_0 = insertelement <vscale x 4 x i1> undef, i1 undef, i32 0
; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %nxv8i1_0 = insertelement <vscale x 8 x i1> undef, i1 undef, i32 0
-; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %nxv16i1_0 = insertelement <vscale x 16 x i1> undef, i1 undef, i32 0
-; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %nxv32i1_0 = insertelement <vscale x 32 x i1> undef, i1 undef, i32 0
+; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %nxv16i1_0 = insertelement <vscale x 16 x i1> undef, i1 undef, i32 0
+; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %nxv32i1_0 = insertelement <vscale x 32 x i1> undef, i1 undef, i32 0
; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i8_0 = insertelement <2 x i8> undef, i8 undef, i32 0
; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8_0 = insertelement <4 x i8> undef, i8 undef, i32 0
; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8_0 = insertelement <8 x i8> undef, i8 undef, i32 0
@@ -395,13 +395,13 @@ define void @insertelement_int(i32 %x) {
; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v2i1_1 = insertelement <2 x i1> undef, i1 undef, i32 1
; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v4i1_1 = insertelement <4 x i1> undef, i1 undef, i32 1
; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v8i1_1 = insertelement <8 x i1> undef, i1 undef, i32 1
-; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v16i1_1 = insertelement <16 x i1> undef, i1 undef, i32 1
-; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v32i1_1 = insertelement <32 x i1> undef, i1 undef, i32 1
+; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16i1_1 = insertelement <16 x i1> undef, i1 undef, i32 1
+; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v32i1_1 = insertelement <32 x i1> undef, i1 undef, i32 1
; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %nxv2i1_1 = insertelement <vscale x 2 x i1> undef, i1 undef, i32 1
; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %nxv4i1_1 = insertelement <vscale x 4 x i1> undef, i1 undef, i32 1
; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %nxv8i1_1 = insertelement <vscale x 8 x i1> undef, i1 undef, i32 1
-; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %nxv16i1_1 = insertelement <vscale x 16 x i1> undef, i1 undef, i32 1
-; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %nxv32i1_1 = insertelement <vscale x 32 x i1> undef, i1 undef, i32 1
+; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %nxv16i1_1 = insertelement <vscale x 16 x i1> undef, i1 undef, i32 1
+; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %nxv32i1_1 = insertelement <vscale x 32 x i1> undef, i1 undef, i32 1
; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i8_1 = insertelement <2 x i8> undef, i8 undef, i32 1
; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i8_1 = insertelement <4 x i8> undef, i8 undef, i32 1
; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i8_1 = insertelement <8 x i8> undef, i8 undef, i32 1
@@ -449,13 +449,13 @@ define void @insertelement_int(i32 %x) {
; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v2i1_x = insertelement <2 x i1> undef, i1 undef, i32 %x
; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v4i1_x = insertelement <4 x i1> undef, i1 undef, i32 %x
; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v8i1_x = insertelement <8 x i1> undef, i1 undef, i32 %x
-; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v16i1_x = insertelement <16 x i1> undef, i1 undef, i32 %x
-; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v32i1_x = insertelement <32 x i1> undef, i1 undef, i32 %x
+; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v16i1_x = insertelement <16 x i1> undef, i1 undef, i32 %x
+; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %v32i1_x = insertelement <32 x i1> undef, i1 undef, i32 %x
; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %nxv2i1_x = insertelement <vscale x 2 x i1> undef, i1 undef, i32 %x
; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %nxv4i1_x = insertelement <vscale x 4 x i1> undef, i1 undef, i32 %x
; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %nxv8i1_x = insertelement <vscale x 8 x i1> undef, i1 undef, i32 %x
-; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %nxv16i1_x = insertelement <vscale x 16 x i1> undef, i1 undef, i32 %x
-; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %nxv32i1_x = insertelement <vscale x 32 x i1> undef, i1 undef, i32 %x
+; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %nxv16i1_x = insertelement <vscale x 16 x i1> undef, i1 undef, i32 %x
+; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %nxv32i1_x = insertelement <vscale x 32 x i1> undef, i1 undef, i32 %x
; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2i8_x = insertelement <2 x i8> undef, i8 undef, i32 %x
; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i8_x = insertelement <4 x i8> undef, i8 undef, i32 %x
; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i8_x = insertelement <8 x i8> undef, i8 undef, i32 %x
@@ -506,13 +506,13 @@ define void @insertelement_int(i32 %x) {
; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v2i1_0 = insertelement <2 x i1> undef, i1 undef, i32 0
; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v4i1_0 = insertelement <4 x i1> undef, i1 undef, i32 0
; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v8i1_0 = insertelement <8 x i1> undef, i1 undef, i32 0
-; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v16i1_0 = insertelement <16 x i1> undef, i1 undef, i32 0
-; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v32i1_0 = insertelement <32 x i1> undef, i1 undef, i32 0
+; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v16i1_0 = insertelement <16 x i1> undef, i1 undef, i32 0
+; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v32i1_0 = insertelement <32 x i1> undef, i1 undef, i32 0
; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %nxv2i1_0 = insertelement <vscale x 2 x i1> undef, i1 undef, i32 0
; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %nxv4i1_0 = insertelement <vscale x 4 x i1> undef, i1 undef, i32 0
; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %nxv8i1_0 = insertelement <vscale x 8 x i1> undef, i1 undef, i32 0
-; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %nxv16i1_0 = insertelement <vscale x 16 x i1> undef, i1 undef, i32 0
-; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %nxv32i1_0 = insertelement <vscale x 32 x i1> undef, i1 undef, i32 0
+; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %nxv16i1_0 = insertelement <vscale x 16 x i1> undef, i1 undef, i32 0
+; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %nxv32i1_0 = insertelement <vscale x 32 x i1> undef, i1 undef, i32 0
; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i8_0 = insertelement <2 x i8> undef, i8 undef, i32 0
; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8_0 = insertelement <4 x i8> undef, i8 undef, i32 0
; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8_0 = insertelement <8 x i8> undef, i8 undef, i32 0
@@ -560,13 +560,13 @@ define void @insertelement_int(i32 %x) {
; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v2i1_1 = insertelement <2 x i1> undef, i1 undef, i32 1
; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v4i1_1 = insertelement <4 x i1> undef, i1 undef, i32 1
; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v8i1_1 = insertelement <8 x i1> undef, i1 undef, i32 1
-; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v16i1_1 = insertelement <16 x i1> undef, i1 undef, i32 1
-; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v32i1_1 = insertelement <32 x i1> undef, i1 undef, i32 1
+; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16i1_1 = insertelement <16 x i1> undef, i1 undef, i32 1
+; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v32i1_1 = insertelement <32 x i1> undef, i1 undef, i32 1
; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %nxv2i1_1 = insertelement <vscale x 2 x i1> undef, i1 undef, i32 1
; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %nxv4i1_1 = insertelement <vscale x 4 x i1> undef, i1 undef, i32 1
; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %nxv8i1_1 = insertelement <vscale x 8 x i1> undef, i1 undef, i32 1
-; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %nxv16i1_1 = insertelement <vscale x 16 x i1> undef, i1 undef, i32 1
-; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %nxv32i1_1 = insertelement <vscale x 32 x i1> undef, i1 undef, i32 1
+; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %nxv16i1_1 = insertelement <vscale x 16 x i1> undef, i1 undef, i32 1
+; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %nxv32i1_1 = insertelement <vscale x 32 x i1> undef, i1 undef, i32 1
; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i8_1 = insertelement <2 x i8> undef, i8 undef, i32 1
; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i8_1 = insertelement <4 x i8> undef, i8 undef, i32 1
; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i8_1 = insertelement <8 x i8> undef, i8 undef, i32 1
@@ -614,13 +614,13 @@ define void @insertelement_int(i32 %x) {
; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v2i1_x = insertelement <2 x i1> undef, i1 undef, i32 %x
; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v4i1_x = insertelement <4 x i1> undef, i1 undef, i32 %x
; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v8i1_x = insertelement <8 x i1> undef, i1 undef, i32 %x
-; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v16i1_x = insertelement <16 x i1> undef, i1 undef, i32 %x
-; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v32i1_x = insertelement <32 x i1> undef, i1 undef, i32 %x
+; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v16i1_x = insertelement <16 x i1> undef, i1 undef, i32 %x
+; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %v32i1_x = insertelement <32 x i1> undef, i1 undef, i32 %x
; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %nxv2i1_x = insertelement <vscale x 2 x i1> undef, i1 undef, i32 %x
; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %nxv4i1_x = insertelement <vscale x 4 x i1> undef, i1 undef, i32 %x
; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %nxv8i1_x = insertelement <vscale x 8 x i1> undef, i1 undef, i32 %x
-; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %nxv16i1_x = insertelement <vscale x 16 x i1> undef, i1 undef, i32 %x
-; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %nxv32i1_x = insertelement <vscale x 32 x i1> undef, i1 undef, i32 %x
+; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %nxv16i1_x = insertelement <vscale x 16 x i1> undef, i1 undef, i32 %x
+; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %nxv32i1_x = insertelement <vscale x 32 x i1> undef, i1 undef, i32 %x
; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2i8_x = insertelement <2 x i8> undef, i8 undef, i32 %x
; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i8_x = insertelement <4 x i8> undef, i8 undef, i32 %x
; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i8_x = insertelement <8 x i8> undef, i8 undef, i32 %x
diff --git a/llvm/test/Analysis/CostModel/RISCV/shuffle-broadcast.ll b/llvm/test/Analysis/CostModel/RISCV/shuffle-broadcast.ll
index 46bf3152ac5b..b763198e98ba 100644
--- a/llvm/test/Analysis/CostModel/RISCV/shuffle-broadcast.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/shuffle-broadcast.ll
@@ -197,7 +197,7 @@ define void @broadcast_fixed() #0{
; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %41 = shufflevector <32 x i1> undef, <32 x i1> undef, <32 x i32> zeroinitializer
; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %42 = shufflevector <64 x i1> undef, <64 x i1> undef, <64 x i32> zeroinitializer
; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %43 = shufflevector <128 x i1> undef, <128 x i1> undef, <128 x i32> zeroinitializer
-; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %ins1 = insertelement <128 x i1> poison, i1 poison, i32 0
+; CHECK-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %ins1 = insertelement <128 x i1> poison, i1 poison, i32 0
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %44 = shufflevector <128 x i1> %ins1, <128 x i1> poison, <128 x i32> zeroinitializer
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ins2 = insertelement <2 x i8> poison, i8 3, i32 0
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %45 = shufflevector <2 x i8> %ins2, <2 x i8> undef, <2 x i32> zeroinitializer
diff --git a/llvm/test/Bitcode/compatibility-3.6.ll b/llvm/test/Bitcode/compatibility-3.6.ll
index b1f4abf7b8c5..2190e2fbccf2 100644
--- a/llvm/test/Bitcode/compatibility-3.6.ll
+++ b/llvm/test/Bitcode/compatibility-3.6.ll
@@ -1061,16 +1061,16 @@ define void @instructions.va_arg(i8* %v, ...) {
%ap2 = bitcast i8** %ap to i8*
call void @llvm.va_start(i8* %ap2)
- ; CHECK: call void @llvm.va_start(ptr %ap2)
+ ; CHECK: call void @llvm.va_start.p0(ptr %ap2)
va_arg i8* %ap2, i32
; CHECK: va_arg ptr %ap2, i32
call void @llvm.va_copy(i8* %v, i8* %ap2)
- ; CHECK: call void @llvm.va_copy(ptr %v, ptr %ap2)
+ ; CHECK: call void @llvm.va_copy.p0(ptr %v, ptr %ap2)
call void @llvm.va_end(i8* %ap2)
- ; CHECK: call void @llvm.va_end(ptr %ap2)
+ ; CHECK: call void @llvm.va_end.p0(ptr %ap2)
ret void
}
@@ -1178,11 +1178,11 @@ define void @intrinsics.codegen() {
; CHECK: attributes #27 = { uwtable }
; CHECK: attributes #28 = { "cpu"="cortex-a8" }
; CHECK: attributes #29 = { nocallback nofree nosync nounwind willreturn memory(none) }
-; CHECK: attributes #30 = { nocallback nofree nosync nounwind willreturn }
-; CHECK: attributes #31 = { nounwind memory(argmem: read) }
-; CHECK: attributes #32 = { nounwind memory(argmem: readwrite) }
-; CHECK: attributes #33 = { nocallback nofree nosync nounwind willreturn memory(read) }
-; CHECK: attributes #34 = { nocallback nounwind }
+; CHECK: attributes #30 = { nounwind memory(argmem: read) }
+; CHECK: attributes #31 = { nounwind memory(argmem: readwrite) }
+; CHECK: attributes #32 = { nocallback nofree nosync nounwind willreturn memory(read) }
+; CHECK: attributes #33 = { nocallback nounwind }
+; CHECK: attributes #34 = { nocallback nofree nosync nounwind willreturn }
; CHECK: attributes #35 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) }
; CHECK: attributes #36 = { builtin }
diff --git a/llvm/test/Bitcode/compatibility-3.7.ll b/llvm/test/Bitcode/compatibility-3.7.ll
index 91e55f6eda59..7e59b5c1be6e 100644
--- a/llvm/test/Bitcode/compatibility-3.7.ll
+++ b/llvm/test/Bitcode/compatibility-3.7.ll
@@ -1092,16 +1092,16 @@ define void @instructions.va_arg(i8* %v, ...) {
%ap2 = bitcast i8** %ap to i8*
call void @llvm.va_start(i8* %ap2)
- ; CHECK: call void @llvm.va_start(ptr %ap2)
+ ; CHECK: call void @llvm.va_start.p0(ptr %ap2)
va_arg i8* %ap2, i32
; CHECK: va_arg ptr %ap2, i32
call void @llvm.va_copy(i8* %v, i8* %ap2)
- ; CHECK: call void @llvm.va_copy(ptr %v, ptr %ap2)
+ ; CHECK: call void @llvm.va_copy.p0(ptr %v, ptr %ap2)
call void @llvm.va_end(i8* %ap2)
- ; CHECK: call void @llvm.va_end(ptr %ap2)
+ ; CHECK: call void @llvm.va_end.p0(ptr %ap2)
ret void
}
@@ -1241,11 +1241,11 @@ define void @misc.metadata() {
; CHECK: attributes #30 = { uwtable }
; CHECK: attributes #31 = { "cpu"="cortex-a8" }
; CHECK: attributes #32 = { nocallback nofree nosync nounwind willreturn memory(none) }
-; CHECK: attributes #33 = { nocallback nofree nosync nounwind willreturn }
-; CHECK: attributes #34 = { nounwind memory(argmem: read) }
-; CHECK: attributes #35 = { nounwind memory(argmem: readwrite) }
-; CHECK: attributes #36 = { nocallback nofree nosync nounwind willreturn memory(read) }
-; CHECK: attributes #37 = { nocallback nounwind }
+; CHECK: attributes #33 = { nounwind memory(argmem: read) }
+; CHECK: attributes #34 = { nounwind memory(argmem: readwrite) }
+; CHECK: attributes #35 = { nocallback nofree nosync nounwind willreturn memory(read) }
+; CHECK: attributes #36 = { nocallback nounwind }
+; CHECK: attributes #37 = { nocallback nofree nosync nounwind willreturn }
; CHECK: attributes #38 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) }
; CHECK: attributes #39 = { builtin }
diff --git a/llvm/test/Bitcode/compatibility-3.8.ll b/llvm/test/Bitcode/compatibility-3.8.ll
index aa4d8b14968c..ebd1f2fff8c9 100644
--- a/llvm/test/Bitcode/compatibility-3.8.ll
+++ b/llvm/test/Bitcode/compatibility-3.8.ll
@@ -1247,16 +1247,16 @@ define void @instructions.va_arg(i8* %v, ...) {
%ap2 = bitcast i8** %ap to i8*
call void @llvm.va_start(i8* %ap2)
- ; CHECK: call void @llvm.va_start(ptr %ap2)
+ ; CHECK: call void @llvm.va_start.p0(ptr %ap2)
va_arg i8* %ap2, i32
; CHECK: va_arg ptr %ap2, i32
call void @llvm.va_copy(i8* %v, i8* %ap2)
- ; CHECK: call void @llvm.va_copy(ptr %v, ptr %ap2)
+ ; CHECK: call void @llvm.va_copy.p0(ptr %v, ptr %ap2)
call void @llvm.va_end(i8* %ap2)
- ; CHECK: call void @llvm.va_end(ptr %ap2)
+ ; CHECK: call void @llvm.va_end.p0(ptr %ap2)
ret void
}
@@ -1551,11 +1551,11 @@ normal:
; CHECK: attributes #33 = { memory(inaccessiblemem: readwrite) }
; CHECK: attributes #34 = { memory(argmem: readwrite, inaccessiblemem: readwrite) }
; CHECK: attributes #35 = { nocallback nofree nosync nounwind willreturn memory(none) }
-; CHECK: attributes #36 = { nocallback nofree nosync nounwind willreturn }
-; CHECK: attributes #37 = { nounwind memory(argmem: read) }
-; CHECK: attributes #38 = { nounwind memory(argmem: readwrite) }
-; CHECK: attributes #39 = { nocallback nofree nosync nounwind willreturn memory(read) }
-; CHECK: attributes #40 = { nocallback nounwind }
+; CHECK: attributes #36 = { nounwind memory(argmem: read) }
+; CHECK: attributes #37 = { nounwind memory(argmem: readwrite) }
+; CHECK: attributes #38 = { nocallback nofree nosync nounwind willreturn memory(read) }
+; CHECK: attributes #39 = { nocallback nounwind }
+; CHECK: attributes #40 = { nocallback nofree nosync nounwind willreturn }
; CHECK: attributes #41 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) }
; CHECK: attributes #42 = { builtin }
diff --git a/llvm/test/Bitcode/compatibility-3.9.ll b/llvm/test/Bitcode/compatibility-3.9.ll
index e3c84f6e6007..c34f04ceb0de 100644
--- a/llvm/test/Bitcode/compatibility-3.9.ll
+++ b/llvm/test/Bitcode/compatibility-3.9.ll
@@ -1318,16 +1318,16 @@ define void @instructions.va_arg(i8* %v, ...) {
%ap2 = bitcast i8** %ap to i8*
call void @llvm.va_start(i8* %ap2)
- ; CHECK: call void @llvm.va_start(ptr %ap2)
+ ; CHECK: call void @llvm.va_start.p0(ptr %ap2)
va_arg i8* %ap2, i32
; CHECK: va_arg ptr %ap2, i32
call void @llvm.va_copy(i8* %v, i8* %ap2)
- ; CHECK: call void @llvm.va_copy(ptr %v, ptr %ap2)
+ ; CHECK: call void @llvm.va_copy.p0(ptr %v, ptr %ap2)
call void @llvm.va_end(i8* %ap2)
- ; CHECK: call void @llvm.va_end(ptr %ap2)
+ ; CHECK: call void @llvm.va_end.p0(ptr %ap2)
ret void
}
@@ -1624,11 +1624,11 @@ declare void @f.writeonly() writeonly
; CHECK: attributes #33 = { memory(inaccessiblemem: readwrite) }
; CHECK: attributes #34 = { memory(argmem: readwrite, inaccessiblemem: readwrite) }
; CHECK: attributes #35 = { nocallback nofree nosync nounwind willreturn memory(none) }
-; CHECK: attributes #36 = { nocallback nofree nosync nounwind willreturn }
-; CHECK: attributes #37 = { nounwind memory(argmem: read) }
-; CHECK: attributes #38 = { nounwind memory(argmem: readwrite) }
-; CHECK: attributes #39 = { nocallback nofree nosync nounwind willreturn memory(read) }
-; CHECK: attributes #40 = { nocallback nounwind }
+; CHECK: attributes #36 = { nounwind memory(argmem: read) }
+; CHECK: attributes #37 = { nounwind memory(argmem: readwrite) }
+; CHECK: attributes #38 = { nocallback nofree nosync nounwind willreturn memory(read) }
+; CHECK: attributes #39 = { nocallback nounwind }
+; CHECK: attributes #40 = { nocallback nofree nosync nounwind willreturn }
; CHECK: attributes #41 = { memory(write) }
; CHECK: attributes #42 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) }
; CHECK: attributes #43 = { builtin }
diff --git a/llvm/test/Bitcode/compatibility-4.0.ll b/llvm/test/Bitcode/compatibility-4.0.ll
index 06cb842059a4..05bffda1d117 100644
--- a/llvm/test/Bitcode/compatibility-4.0.ll
+++ b/llvm/test/Bitcode/compatibility-4.0.ll
@@ -1318,16 +1318,16 @@ define void @instructions.va_arg(i8* %v, ...) {
%ap2 = bitcast i8** %ap to i8*
call void @llvm.va_start(i8* %ap2)
- ; CHECK: call void @llvm.va_start(ptr %ap2)
+ ; CHECK: call void @llvm.va_start.p0(ptr %ap2)
va_arg i8* %ap2, i32
; CHECK: va_arg ptr %ap2, i32
call void @llvm.va_copy(i8* %v, i8* %ap2)
- ; CHECK: call void @llvm.va_copy(ptr %v, ptr %ap2)
+ ; CHECK: call void @llvm.va_copy.p0(ptr %v, ptr %ap2)
call void @llvm.va_end(i8* %ap2)
- ; CHECK: call void @llvm.va_end(ptr %ap2)
+ ; CHECK: call void @llvm.va_end.p0(ptr %ap2)
ret void
}
@@ -1649,11 +1649,11 @@ define i8** @constexpr() {
; CHECK: attributes #33 = { memory(inaccessiblemem: readwrite) }
; CHECK: attributes #34 = { memory(argmem: readwrite, inaccessiblemem: readwrite) }
; CHECK: attributes #35 = { nocallback nofree nosync nounwind willreturn memory(none) }
-; CHECK: attributes #36 = { nocallback nofree nosync nounwind willreturn }
-; CHECK: attributes #37 = { nounwind memory(argmem: read) }
-; CHECK: attributes #38 = { nounwind memory(argmem: readwrite) }
-; CHECK: attributes #39 = { nocallback nofree nosync nounwind willreturn memory(read) }
-; CHECK: attributes #40 = { nocallback nounwind }
+; CHECK: attributes #36 = { nounwind memory(argmem: read) }
+; CHECK: attributes #37 = { nounwind memory(argmem: readwrite) }
+; CHECK: attributes #38 = { nocallback nofree nosync nounwind willreturn memory(read) }
+; CHECK: attributes #39 = { nocallback nounwind }
+; CHECK: attributes #40 = { nocallback nofree nosync nounwind willreturn }
; CHECK: attributes #41 = { memory(write) }
; CHECK: attributes #42 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) }
; CHECK: attributes #43 = { builtin }
diff --git a/llvm/test/Bitcode/compatibility-5.0.ll b/llvm/test/Bitcode/compatibility-5.0.ll
index f9ae558917cd..0c872289c62b 100644
--- a/llvm/test/Bitcode/compatibility-5.0.ll
+++ b/llvm/test/Bitcode/compatibility-5.0.ll
@@ -1330,16 +1330,16 @@ define void @instructions.va_arg(i8* %v, ...) {
%ap2 = bitcast i8** %ap to i8*
call void @llvm.va_start(i8* %ap2)
- ; CHECK: call void @llvm.va_start(ptr %ap2)
+ ; CHECK: call void @llvm.va_start.p0(ptr %ap2)
va_arg i8* %ap2, i32
; CHECK: va_arg ptr %ap2, i32
call void @llvm.va_copy(i8* %v, i8* %ap2)
- ; CHECK: call void @llvm.va_copy(ptr %v, ptr %ap2)
+ ; CHECK: call void @llvm.va_copy.p0(ptr %v, ptr %ap2)
call void @llvm.va_end(i8* %ap2)
- ; CHECK: call void @llvm.va_end(ptr %ap2)
+ ; CHECK: call void @llvm.va_end.p0(ptr %ap2)
ret void
}
@@ -1664,11 +1664,11 @@ define i8** @constexpr() {
; CHECK: attributes #33 = { memory(inaccessiblemem: readwrite) }
; CHECK: attributes #34 = { memory(argmem: readwrite, inaccessiblemem: readwrite) }
; CHECK: attributes #35 = { nocallback nofree nosync nounwind willreturn memory(none) }
-; CHECK: attributes #36 = { nocallback nofree nosync nounwind willreturn }
-; CHECK: attributes #37 = { nounwind memory(argmem: read) }
-; CHECK: attributes #38 = { nounwind memory(argmem: readwrite) }
-; CHECK: attributes #39 = { nocallback nofree nosync nounwind willreturn memory(read) }
-; CHECK: attributes #40 = { nocallback nounwind }
+; CHECK: attributes #36 = { nounwind memory(argmem: read) }
+; CHECK: attributes #37 = { nounwind memory(argmem: readwrite) }
+; CHECK: attributes #38 = { nocallback nofree nosync nounwind willreturn memory(read) }
+; CHECK: attributes #39 = { nocallback nounwind }
+; CHECK: attributes #40 = { nocallback nofree nosync nounwind willreturn }
; CHECK: attributes #41 = { memory(write) }
; CHECK: attributes #42 = { speculatable }
; CHECK: attributes #43 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) }
diff --git a/llvm/test/Bitcode/compatibility-6.0.ll b/llvm/test/Bitcode/compatibility-6.0.ll
index 1458e1b639ad..44c680885be3 100644
--- a/llvm/test/Bitcode/compatibility-6.0.ll
+++ b/llvm/test/Bitcode/compatibility-6.0.ll
@@ -1340,16 +1340,16 @@ define void @instructions.va_arg(i8* %v, ...) {
%ap2 = bitcast i8** %ap to i8*
call void @llvm.va_start(i8* %ap2)
- ; CHECK: call void @llvm.va_start(ptr %ap2)
+ ; CHECK: call void @llvm.va_start.p0(ptr %ap2)
va_arg i8* %ap2, i32
; CHECK: va_arg ptr %ap2, i32
call void @llvm.va_copy(i8* %v, i8* %ap2)
- ; CHECK: call void @llvm.va_copy(ptr %v, ptr %ap2)
+ ; CHECK: call void @llvm.va_copy.p0(ptr %v, ptr %ap2)
call void @llvm.va_end(i8* %ap2)
- ; CHECK: call void @llvm.va_end(ptr %ap2)
+ ; CHECK: call void @llvm.va_end.p0(ptr %ap2)
ret void
}
@@ -1674,11 +1674,11 @@ define i8** @constexpr() {
; CHECK: attributes #33 = { memory(inaccessiblemem: readwrite) }
; CHECK: attributes #34 = { memory(argmem: readwrite, inaccessiblemem: readwrite) }
; CHECK: attributes #35 = { nocallback nofree nosync nounwind willreturn memory(none) }
-; CHECK: attributes #36 = { nocallback nofree nosync nounwind willreturn }
-; CHECK: attributes #37 = { nounwind memory(argmem: read) }
-; CHECK: attributes #38 = { nounwind memory(argmem: readwrite) }
-; CHECK: attributes #39 = { nocallback nofree nosync nounwind willreturn memory(read) }
-; CHECK: attributes #40 = { nocallback nounwind }
+; CHECK: attributes #36 = { nounwind memory(argmem: read) }
+; CHECK: attributes #37 = { nounwind memory(argmem: readwrite) }
+; CHECK: attributes #38 = { nocallback nofree nosync nounwind willreturn memory(read) }
+; CHECK: attributes #39 = { nocallback nounwind }
+; CHECK: attributes #40 = { nocallback nofree nosync nounwind willreturn }
; CHECK: attributes #41 = { memory(write) }
; CHECK: attributes #42 = { speculatable }
; CHECK: attributes #43 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) }
diff --git a/llvm/test/Bitcode/compatibility.ll b/llvm/test/Bitcode/compatibility.ll
index fa8b0520567a..b374924516d6 100644
--- a/llvm/test/Bitcode/compatibility.ll
+++ b/llvm/test/Bitcode/compatibility.ll
@@ -1648,16 +1648,16 @@ define void @instructions.va_arg(ptr %v, ...) {
%ap = alloca ptr
call void @llvm.va_start(ptr %ap)
- ; CHECK: call void @llvm.va_start(ptr %ap)
+ ; CHECK: call void @llvm.va_start.p0(ptr %ap)
va_arg ptr %ap, i32
; CHECK: va_arg ptr %ap, i32
call void @llvm.va_copy(ptr %v, ptr %ap)
- ; CHECK: call void @llvm.va_copy(ptr %v, ptr %ap)
+ ; CHECK: call void @llvm.va_copy.p0(ptr %v, ptr %ap)
call void @llvm.va_end(ptr %ap)
- ; CHECK: call void @llvm.va_end(ptr %ap)
+ ; CHECK: call void @llvm.va_end.p0(ptr %ap)
ret void
}
@@ -2091,12 +2091,12 @@ define float @nofpclass_callsites(float %arg) {
; CHECK: attributes #33 = { memory(inaccessiblemem: readwrite) }
; CHECK: attributes #34 = { memory(argmem: readwrite, inaccessiblemem: readwrite) }
; CHECK: attributes #35 = { nocallback nofree nosync nounwind willreturn memory(none) }
-; CHECK: attributes #36 = { nocallback nofree nosync nounwind willreturn }
-; CHECK: attributes #37 = { nounwind memory(argmem: read) }
-; CHECK: attributes #38 = { nounwind memory(argmem: readwrite) }
-; CHECK: attributes #39 = { nocallback nofree nosync nounwind willreturn memory(read) }
-; CHECK: attributes #40 = { nocallback nounwind }
-; CHECK: attributes #41 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) }
+; CHECK: attributes #36 = { nounwind memory(argmem: read) }
+; CHECK: attributes #37 = { nounwind memory(argmem: readwrite) }
+; CHECK: attributes #38 = { nocallback nofree nosync nounwind willreturn memory(read) }
+; CHECK: attributes #39 = { nocallback nounwind }
+; CHECK: attributes #40 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) }
+; CHECK: attributes #41 = { nocallback nofree nosync nounwind willreturn }
; CHECK: attributes #42 = { memory(write) }
; CHECK: attributes #43 = { speculatable }
; CHECK: attributes #44 = { strictfp }
diff --git a/llvm/test/Bitcode/thinlto-function-summary.ll b/llvm/test/Bitcode/thinlto-function-summary.ll
index 799759ebcac1..13c6611843d6 100644
--- a/llvm/test/Bitcode/thinlto-function-summary.ll
+++ b/llvm/test/Bitcode/thinlto-function-summary.ll
@@ -13,9 +13,9 @@
; "variadic"
; BC-NEXT: <FUNCTION op0=46 op1=8
; "llvm.va_start"
-; BC-NEXT: <FUNCTION op0=54 op1=13
+; BC-NEXT: <FUNCTION op0=54 op1=16
; "f"
-; BC-NEXT: <ALIAS op0=67 op1=1
+; BC-NEXT: <ALIAS op0=70 op1=1
; BC: <GLOBALVAL_SUMMARY_BLOCK
; BC-NEXT: <VERSION
; BC-NEXT: <FLAGS
@@ -26,7 +26,7 @@
; BC-NEXT: <ALIAS {{.*}} op0=6 op1=0 op2=3
; BC-NEXT: </GLOBALVAL_SUMMARY_BLOCK
; BC: <STRTAB_BLOCK
-; BC-NEXT: blob data = 'hfoobaranon.{{................................}}.0variadicllvm.va_startf{{.*}}'
+; BC-NEXT: blob data = 'hfoobaranon.{{................................}}.0variadicllvm.va_start.p{{[0-9]+}}f{{.*}}'
; RUN: opt -passes=name-anon-globals -module-summary < %s | llvm-dis | FileCheck %s
diff --git a/llvm/test/Bitcode/variableArgumentIntrinsic.3.2.ll b/llvm/test/Bitcode/variableArgumentIntrinsic.3.2.ll
index fad7b8ea6a58..fd3f500de791 100644
--- a/llvm/test/Bitcode/variableArgumentIntrinsic.3.2.ll
+++ b/llvm/test/Bitcode/variableArgumentIntrinsic.3.2.ll
@@ -10,7 +10,7 @@ define i32 @varArgIntrinsic(i32 %X, ...) {
%ap = alloca i8*
%ap2 = bitcast i8** %ap to i8*
-; CHECK: call void @llvm.va_start(ptr %ap2)
+; CHECK: call void @llvm.va_start.p0(ptr %ap2)
call void @llvm.va_start(i8* %ap2)
; CHECK-NEXT: %tmp = va_arg ptr %ap, i32
@@ -19,12 +19,12 @@ define i32 @varArgIntrinsic(i32 %X, ...) {
%aq = alloca i8*
%aq2 = bitcast i8** %aq to i8*
-; CHECK: call void @llvm.va_copy(ptr %aq2, ptr %ap2)
+; CHECK: call void @llvm.va_copy.p0(ptr %aq2, ptr %ap2)
call void @llvm.va_copy(i8* %aq2, i8* %ap2)
-; CHECK-NEXT: call void @llvm.va_end(ptr %aq2)
+; CHECK-NEXT: call void @llvm.va_end.p0(ptr %aq2)
call void @llvm.va_end(i8* %aq2)
-; CHECK-NEXT: call void @llvm.va_end(ptr %ap2)
+; CHECK-NEXT: call void @llvm.va_end.p0(ptr %ap2)
call void @llvm.va_end(i8* %ap2)
ret i32 %tmp
}
diff --git a/llvm/test/CodeGen/AArch64/arm64-abi_align.ll b/llvm/test/CodeGen/AArch64/arm64-abi_align.ll
index 089e171e5a4a..c9fd2d38e27a 100644
--- a/llvm/test/CodeGen/AArch64/arm64-abi_align.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-abi_align.ll
@@ -518,4 +518,6 @@ attributes #5 = { nobuiltin }
!1 = !{!"omnipotent char", !2}
!2 = !{!"Simple C/C++ TBAA"}
!3 = !{!"short", !1}
-!4 = !{i64 0, i64 4, !0, i64 4, i64 2, !3, i64 8, i64 4, !0, i64 12, i64 2, !3, i64 16, i64 4, !0, i64 20, i64 2, !3}
+!4 = !{i64 0, i64 4, !5, i64 4, i64 2, !6, i64 8, i64 4, !5, i64 12, i64 2, !6, i64 16, i64 4, !5, i64 20, i64 2, !6}
+!5 = !{!0, !0, i64 0}
+!6 = !{!3, !3, i64 0}
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pown.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pown.ll
index 942f459ea6b8..8ddaf243db92 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pown.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pown.ll
@@ -808,7 +808,7 @@ define float @test_pown_fast_f32_nobuiltin(float %x, i32 %y) {
; CHECK-LABEL: define float @test_pown_fast_f32_nobuiltin
; CHECK-SAME: (float [[X:%.*]], i32 [[Y:%.*]]) {
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[CALL:%.*]] = tail call fast float @_Z4pownfi(float [[X]], i32 [[Y]]) #[[ATTR3:[0-9]+]]
+; CHECK-NEXT: [[CALL:%.*]] = tail call fast float @_Z4pownfi(float [[X]], i32 [[Y]]) #[[ATTR4:[0-9]+]]
; CHECK-NEXT: ret float [[CALL]]
;
entry:
@@ -820,11 +820,11 @@ define float @test_pown_fast_f32_strictfp(float %x, i32 %y) #1 {
; CHECK-LABEL: define float @test_pown_fast_f32_strictfp
; CHECK-SAME: (float [[X:%.*]], i32 [[Y:%.*]]) #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[__FABS:%.*]] = call fast float @llvm.fabs.f32(float [[X]])
-; CHECK-NEXT: [[__LOG2:%.*]] = call fast float @llvm.log2.f32(float [[__FABS]])
-; CHECK-NEXT: [[POWNI2F:%.*]] = sitofp i32 [[Y]] to float
-; CHECK-NEXT: [[__YLOGX:%.*]] = fmul fast float [[__LOG2]], [[POWNI2F]]
-; CHECK-NEXT: [[__EXP2:%.*]] = call fast float @llvm.exp2.f32(float [[__YLOGX]])
+; CHECK-NEXT: [[__FABS:%.*]] = call fast float @llvm.fabs.f32(float [[X]]) #[[ATTR0]]
+; CHECK-NEXT: [[__LOG2:%.*]] = call fast float @llvm.log2.f32(float [[__FABS]]) #[[ATTR0]]
+; CHECK-NEXT: [[POWNI2F:%.*]] = call fast float @llvm.experimental.constrained.sitofp.f32.i32(i32 [[Y]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR0]]
+; CHECK-NEXT: [[__YLOGX:%.*]] = call fast float @llvm.experimental.constrained.fmul.f32(float [[POWNI2F]], float [[__LOG2]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR0]]
+; CHECK-NEXT: [[__EXP2:%.*]] = call fast float @llvm.exp2.f32(float [[__YLOGX]]) #[[ATTR0]]
; CHECK-NEXT: [[__YEVEN:%.*]] = shl i32 [[Y]], 31
; CHECK-NEXT: [[TMP0:%.*]] = bitcast float [[X]] to i32
; CHECK-NEXT: [[__POW_SIGN:%.*]] = and i32 [[__YEVEN]], [[TMP0]]
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-rootn.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-rootn.ll
index 2ffa647d1869..2e64a3456c24 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-rootn.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-rootn.ll
@@ -896,7 +896,7 @@ define float @test_rootn_f32__y_neg2__strictfp(float %x) #1 {
; CHECK-LABEL: define float @test_rootn_f32__y_neg2__strictfp(
; CHECK-SAME: float [[X:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[__ROOTN2RSQRT:%.*]] = call float @_Z5rsqrtf(float [[X]])
+; CHECK-NEXT: [[__ROOTN2RSQRT:%.*]] = call float @_Z5rsqrtf(float [[X]]) #[[ATTR0]]
; CHECK-NEXT: ret float [[__ROOTN2RSQRT]]
;
entry:
diff --git a/llvm/test/CodeGen/AMDGPU/global_atomics_i32_system.ll b/llvm/test/CodeGen/AMDGPU/global_atomics_i32_system.ll
index 76ec1cc84f55..99d02ffaa523 100644
--- a/llvm/test/CodeGen/AMDGPU/global_atomics_i32_system.ll
+++ b/llvm/test/CodeGen/AMDGPU/global_atomics_i32_system.ll
@@ -358,65 +358,6 @@ define amdgpu_gfx i32 @global_atomic_xchg_i32_ret_offset_scalar(ptr addrspace(1)
; ---------------------------------------------------------------------
define void @global_atomic_xchg_f32_noret(ptr addrspace(1) %ptr, float %in) {
-; GCN1-LABEL: global_atomic_xchg_f32_noret:
-; GCN1: ; %bb.0:
-; GCN1-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN1-NEXT: global_load_dword v3, v[0:1]
-; GCN1-NEXT: s_mov_b64 s[4:5], 0
-; GCN1-NEXT: .LBB0_1: ; %atomicrmw.start
-; GCN1-NEXT: ; =>This Inner Loop Header: Depth=1
-; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN1-NEXT: global_atomic_cmpswap v4, v[0:1], v[2:3] glc
-; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN1-NEXT: buffer_wbinvl1_vol
-; GCN1-NEXT: v_cmp_eq_u32_e32 vcc, v4, v3
-; GCN1-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
-; GCN1-NEXT: v_mov_b32_e32 v3, v4
-; GCN1-NEXT: s_andn2_b64 exec, exec, s[4:5]
-; GCN1-NEXT: s_cbranch_execnz .LBB0_1
-; GCN1-NEXT: ; %bb.2: ; %atomicrmw.end
-; GCN1-NEXT: s_or_b64 exec, exec, s[4:5]
-; GCN1-NEXT: s_setpc_b64 s[30:31]
-;
-; GCN2-LABEL: global_atomic_xchg_f32_noret:
-; GCN2: ; %bb.0:
-; GCN2-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN2-NEXT: global_load_dword v3, v[0:1]
-; GCN2-NEXT: s_mov_b64 s[4:5], 0
-; GCN2-NEXT: .LBB0_1: ; %atomicrmw.start
-; GCN2-NEXT: ; =>This Inner Loop Header: Depth=1
-; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN2-NEXT: global_atomic_cmpswap v4, v[0:1], v[2:3] glc
-; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN2-NEXT: buffer_wbinvl1_vol
-; GCN2-NEXT: v_cmp_eq_u32_e32 vcc, v4, v3
-; GCN2-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
-; GCN2-NEXT: v_mov_b32_e32 v3, v4
-; GCN2-NEXT: s_andn2_b64 exec, exec, s[4:5]
-; GCN2-NEXT: s_cbranch_execnz .LBB0_1
-; GCN2-NEXT: ; %bb.2: ; %atomicrmw.end
-; GCN2-NEXT: s_or_b64 exec, exec, s[4:5]
-; GCN2-NEXT: s_setpc_b64 s[30:31]
-;
-; GCN3-LABEL: global_atomic_xchg_f32_noret:
-; GCN3: ; %bb.0:
-; GCN3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN3-NEXT: global_load_dword v3, v[0:1]
-; GCN3-NEXT: s_mov_b64 s[4:5], 0
-; GCN3-NEXT: .LBB0_1: ; %atomicrmw.start
-; GCN3-NEXT: ; =>This Inner Loop Header: Depth=1
-; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN3-NEXT: global_atomic_cmpswap v4, v[0:1], v[2:3] glc
-; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN3-NEXT: buffer_wbinvl1_vol
-; GCN3-NEXT: v_cmp_eq_u32_e32 vcc, v4, v3
-; GCN3-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
-; GCN3-NEXT: v_mov_b32_e32 v3, v4
-; GCN3-NEXT: s_andn2_b64 exec, exec, s[4:5]
-; GCN3-NEXT: s_cbranch_execnz .LBB0_1
-; GCN3-NEXT: ; %bb.2: ; %atomicrmw.end
-; GCN3-NEXT: s_or_b64 exec, exec, s[4:5]
-; GCN3-NEXT: s_setpc_b64 s[30:31]
; SI-LABEL: global_atomic_xchg_f32_noret:
; SI: ; %bb.0:
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -450,69 +391,6 @@ define void @global_atomic_xchg_f32_noret(ptr addrspace(1) %ptr, float %in) {
}
define void @global_atomic_xchg_f32_noret_offset(ptr addrspace(1) %out, float %in) {
-; GCN1-LABEL: global_atomic_xchg_f32_noret_offset:
-; GCN1: ; %bb.0:
-; GCN1-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN1-NEXT: v_add_f32_e32 v0, vcc, 16, v0
-; GCN1-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
-; GCN1-NEXT: global_load_dword v3, v[0:1]
-; GCN1-NEXT: s_mov_b64 s[4:5], 0
-; GCN1-NEXT: .LBB1_1: ; %atomicrmw.start
-; GCN1-NEXT: ; =>This Inner Loop Header: Depth=1
-; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN1-NEXT: global_atomic_cmpswap v4, v[0:1], v[2:3] glc
-; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN1-NEXT: buffer_wbinvl1_vol
-; GCN1-NEXT: v_cmp_eq_u32_e32 vcc, v4, v3
-; GCN1-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
-; GCN1-NEXT: v_mov_b32_e32 v3, v4
-; GCN1-NEXT: s_andn2_b64 exec, exec, s[4:5]
-; GCN1-NEXT: s_cbranch_execnz .LBB1_1
-; GCN1-NEXT: ; %bb.2: ; %atomicrmw.end
-; GCN1-NEXT: s_or_b64 exec, exec, s[4:5]
-; GCN1-NEXT: s_setpc_b64 s[30:31]
-;
-; GCN2-LABEL: global_atomic_xchg_f32_noret_offset:
-; GCN2: ; %bb.0:
-; GCN2-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN2-NEXT: v_add_u32_e32 v0, vcc, 16, v0
-; GCN2-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
-; GCN2-NEXT: global_load_dword v3, v[0:1]
-; GCN2-NEXT: s_mov_b64 s[4:5], 0
-; GCN2-NEXT: .LBB1_1: ; %atomicrmw.start
-; GCN2-NEXT: ; =>This Inner Loop Header: Depth=1
-; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN2-NEXT: global_atomic_cmpswap v4, v[0:1], v[2:3] glc
-; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN2-NEXT: buffer_wbinvl1_vol
-; GCN2-NEXT: v_cmp_eq_u32_e32 vcc, v4, v3
-; GCN2-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
-; GCN2-NEXT: v_mov_b32_e32 v3, v4
-; GCN2-NEXT: s_andn2_b64 exec, exec, s[4:5]
-; GCN2-NEXT: s_cbranch_execnz .LBB1_1
-; GCN2-NEXT: ; %bb.2: ; %atomicrmw.end
-; GCN2-NEXT: s_or_b64 exec, exec, s[4:5]
-; GCN2-NEXT: s_setpc_b64 s[30:31]
-;
-; GCN3-LABEL: global_atomic_xchg_f32_noret_offset:
-; GCN3: ; %bb.0:
-; GCN3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN3-NEXT: global_load_dword v3, v[0:1] offset:16
-; GCN3-NEXT: s_mov_b64 s[4:5], 0
-; GCN3-NEXT: .LBB1_1: ; %atomicrmw.start
-; GCN3-NEXT: ; =>This Inner Loop Header: Depth=1
-; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN3-NEXT: global_atomic_cmpswap v4, v[0:1], v[2:3] offset:16 glc
-; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN3-NEXT: buffer_wbinvl1_vol
-; GCN3-NEXT: v_cmp_eq_u32_e32 vcc, v4, v3
-; GCN3-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
-; GCN3-NEXT: v_mov_b32_e32 v3, v4
-; GCN3-NEXT: s_andn2_b64 exec, exec, s[4:5]
-; GCN3-NEXT: s_cbranch_execnz .LBB1_1
-; GCN3-NEXT: ; %bb.2: ; %atomicrmw.end
-; GCN3-NEXT: s_or_b64 exec, exec, s[4:5]
-; GCN3-NEXT: s_setpc_b64 s[30:31]
; SI-LABEL: global_atomic_xchg_f32_noret_offset:
; SI: ; %bb.0:
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -549,71 +427,6 @@ define void @global_atomic_xchg_f32_noret_offset(ptr addrspace(1) %out, float %i
}
define float @global_atomic_xchg_f32_ret(ptr addrspace(1) %ptr, float %in) {
-; GCN1-LABEL: global_atomic_xchg_f32_ret:
-; GCN1: ; %bb.0:
-; GCN1-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN1-NEXT: global_load_dword v4, v[0:1]
-; GCN1-NEXT: s_mov_b64 s[4:5], 0
-; GCN1-NEXT: .LBB2_1: ; %atomicrmw.start
-; GCN1-NEXT: ; =>This Inner Loop Header: Depth=1
-; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN1-NEXT: v_mov_b32_e32 v3, v4
-; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN1-NEXT: global_atomic_cmpswap v4, v[0:1], v[2:3] glc
-; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN1-NEXT: buffer_wbinvl1_vol
-; GCN1-NEXT: v_cmp_eq_u32_e32 vcc, v4, v3
-; GCN1-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
-; GCN1-NEXT: s_andn2_b64 exec, exec, s[4:5]
-; GCN1-NEXT: s_cbranch_execnz .LBB2_1
-; GCN1-NEXT: ; %bb.2: ; %atomicrmw.end
-; GCN1-NEXT: s_or_b64 exec, exec, s[4:5]
-; GCN1-NEXT: v_mov_b32_e32 v0, v4
-; GCN1-NEXT: s_setpc_b64 s[30:31]
-;
-; GCN2-LABEL: global_atomic_xchg_f32_ret:
-; GCN2: ; %bb.0:
-; GCN2-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN2-NEXT: global_load_dword v4, v[0:1]
-; GCN2-NEXT: s_mov_b64 s[4:5], 0
-; GCN2-NEXT: .LBB2_1: ; %atomicrmw.start
-; GCN2-NEXT: ; =>This Inner Loop Header: Depth=1
-; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN2-NEXT: v_mov_b32_e32 v3, v4
-; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN2-NEXT: global_atomic_cmpswap v4, v[0:1], v[2:3] glc
-; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN2-NEXT: buffer_wbinvl1_vol
-; GCN2-NEXT: v_cmp_eq_u32_e32 vcc, v4, v3
-; GCN2-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
-; GCN2-NEXT: s_andn2_b64 exec, exec, s[4:5]
-; GCN2-NEXT: s_cbranch_execnz .LBB2_1
-; GCN2-NEXT: ; %bb.2: ; %atomicrmw.end
-; GCN2-NEXT: s_or_b64 exec, exec, s[4:5]
-; GCN2-NEXT: v_mov_b32_e32 v0, v4
-; GCN2-NEXT: s_setpc_b64 s[30:31]
-;
-; GCN3-LABEL: global_atomic_xchg_f32_ret:
-; GCN3: ; %bb.0:
-; GCN3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN3-NEXT: global_load_dword v4, v[0:1]
-; GCN3-NEXT: s_mov_b64 s[4:5], 0
-; GCN3-NEXT: .LBB2_1: ; %atomicrmw.start
-; GCN3-NEXT: ; =>This Inner Loop Header: Depth=1
-; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN3-NEXT: v_mov_b32_e32 v3, v4
-; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN3-NEXT: global_atomic_cmpswap v4, v[0:1], v[2:3] glc
-; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN3-NEXT: buffer_wbinvl1_vol
-; GCN3-NEXT: v_cmp_eq_u32_e32 vcc, v4, v3
-; GCN3-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
-; GCN3-NEXT: s_andn2_b64 exec, exec, s[4:5]
-; GCN3-NEXT: s_cbranch_execnz .LBB2_1
-; GCN3-NEXT: ; %bb.2: ; %atomicrmw.end
-; GCN3-NEXT: s_or_b64 exec, exec, s[4:5]
-; GCN3-NEXT: v_mov_b32_e32 v0, v4
-; GCN3-NEXT: s_setpc_b64 s[30:31]
; SI-LABEL: global_atomic_xchg_f32_ret:
; SI: ; %bb.0:
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -648,73 +461,6 @@ define float @global_atomic_xchg_f32_ret(ptr addrspace(1) %ptr, float %in) {
}
define float @global_atomic_xchg_f32_ret_offset(ptr addrspace(1) %out, float %in) {
-; GCN1-LABEL: global_atomic_xchg_f32_ret_offset:
-; GCN1: ; %bb.0:
-; GCN1-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN1-NEXT: v_add_f32_e32 v4, vcc, 16, v0
-; GCN1-NEXT: v_addc_u32_e32 v5, vcc, 0, v1, vcc
-; GCN1-NEXT: global_load_dword v0, v[4:5]
-; GCN1-NEXT: s_mov_b64 s[4:5], 0
-; GCN1-NEXT: .LBB3_1: ; %atomicrmw.start
-; GCN1-NEXT: ; =>This Inner Loop Header: Depth=1
-; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN1-NEXT: v_mov_b32_e32 v3, v0
-; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN1-NEXT: global_atomic_cmpswap v0, v[4:5], v[2:3] glc
-; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN1-NEXT: buffer_wbinvl1_vol
-; GCN1-NEXT: v_cmp_eq_u32_e32 vcc, v0, v3
-; GCN1-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
-; GCN1-NEXT: s_andn2_b64 exec, exec, s[4:5]
-; GCN1-NEXT: s_cbranch_execnz .LBB3_1
-; GCN1-NEXT: ; %bb.2: ; %atomicrmw.end
-; GCN1-NEXT: s_or_b64 exec, exec, s[4:5]
-; GCN1-NEXT: s_setpc_b64 s[30:31]
-;
-; GCN2-LABEL: global_atomic_xchg_f32_ret_offset:
-; GCN2: ; %bb.0:
-; GCN2-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN2-NEXT: v_add_u32_e32 v4, vcc, 16, v0
-; GCN2-NEXT: v_addc_u32_e32 v5, vcc, 0, v1, vcc
-; GCN2-NEXT: global_load_dword v0, v[4:5]
-; GCN2-NEXT: s_mov_b64 s[4:5], 0
-; GCN2-NEXT: .LBB3_1: ; %atomicrmw.start
-; GCN2-NEXT: ; =>This Inner Loop Header: Depth=1
-; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN2-NEXT: v_mov_b32_e32 v3, v0
-; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN2-NEXT: global_atomic_cmpswap v0, v[4:5], v[2:3] glc
-; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN2-NEXT: buffer_wbinvl1_vol
-; GCN2-NEXT: v_cmp_eq_u32_e32 vcc, v0, v3
-; GCN2-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
-; GCN2-NEXT: s_andn2_b64 exec, exec, s[4:5]
-; GCN2-NEXT: s_cbranch_execnz .LBB3_1
-; GCN2-NEXT: ; %bb.2: ; %atomicrmw.end
-; GCN2-NEXT: s_or_b64 exec, exec, s[4:5]
-; GCN2-NEXT: s_setpc_b64 s[30:31]
-;
-; GCN3-LABEL: global_atomic_xchg_f32_ret_offset:
-; GCN3: ; %bb.0:
-; GCN3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN3-NEXT: global_load_dword v4, v[0:1] offset:16
-; GCN3-NEXT: s_mov_b64 s[4:5], 0
-; GCN3-NEXT: .LBB3_1: ; %atomicrmw.start
-; GCN3-NEXT: ; =>This Inner Loop Header: Depth=1
-; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN3-NEXT: v_mov_b32_e32 v3, v4
-; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN3-NEXT: global_atomic_cmpswap v4, v[0:1], v[2:3] offset:16 glc
-; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN3-NEXT: buffer_wbinvl1_vol
-; GCN3-NEXT: v_cmp_eq_u32_e32 vcc, v4, v3
-; GCN3-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
-; GCN3-NEXT: s_andn2_b64 exec, exec, s[4:5]
-; GCN3-NEXT: s_cbranch_execnz .LBB3_1
-; GCN3-NEXT: ; %bb.2: ; %atomicrmw.end
-; GCN3-NEXT: s_or_b64 exec, exec, s[4:5]
-; GCN3-NEXT: v_mov_b32_e32 v0, v4
-; GCN3-NEXT: s_setpc_b64 s[30:31]
; SI-LABEL: global_atomic_xchg_f32_ret_offset:
; SI: ; %bb.0:
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -752,80 +498,6 @@ define float @global_atomic_xchg_f32_ret_offset(ptr addrspace(1) %out, float %in
}
define amdgpu_gfx void @global_atomic_xchg_f32_noret_scalar(ptr addrspace(1) inreg %ptr, float inreg %in) {
-; GCN1-LABEL: global_atomic_xchg_f32_noret_scalar:
-; GCN1: ; %bb.0:
-; GCN1-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN1-NEXT: v_mov_b32_e32 v0, s4
-; GCN1-NEXT: v_mov_b32_e32 v1, s5
-; GCN1-NEXT: global_load_dword v1, v[0:1]
-; GCN1-NEXT: s_mov_b64 s[34:35], 0
-; GCN1-NEXT: .LBB4_1: ; %atomicrmw.start
-; GCN1-NEXT: ; =>This Inner Loop Header: Depth=1
-; GCN1-NEXT: v_mov_b32_e32 v2, s4
-; GCN1-NEXT: v_mov_b32_e32 v0, s6
-; GCN1-NEXT: v_mov_b32_e32 v3, s5
-; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN1-NEXT: global_atomic_cmpswap v0, v[2:3], v[0:1] glc
-; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN1-NEXT: buffer_wbinvl1_vol
-; GCN1-NEXT: v_cmp_eq_u32_e32 vcc, v0, v1
-; GCN1-NEXT: s_or_b64 s[34:35], vcc, s[34:35]
-; GCN1-NEXT: v_mov_b32_e32 v1, v0
-; GCN1-NEXT: s_andn2_b64 exec, exec, s[34:35]
-; GCN1-NEXT: s_cbranch_execnz .LBB4_1
-; GCN1-NEXT: ; %bb.2: ; %atomicrmw.end
-; GCN1-NEXT: s_or_b64 exec, exec, s[34:35]
-; GCN1-NEXT: s_setpc_b64 s[30:31]
-;
-; GCN2-LABEL: global_atomic_xchg_f32_noret_scalar:
-; GCN2: ; %bb.0:
-; GCN2-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN2-NEXT: v_mov_b32_e32 v0, s4
-; GCN2-NEXT: v_mov_b32_e32 v1, s5
-; GCN2-NEXT: global_load_dword v1, v[0:1]
-; GCN2-NEXT: s_mov_b64 s[34:35], 0
-; GCN2-NEXT: .LBB4_1: ; %atomicrmw.start
-; GCN2-NEXT: ; =>This Inner Loop Header: Depth=1
-; GCN2-NEXT: v_mov_b32_e32 v2, s4
-; GCN2-NEXT: v_mov_b32_e32 v0, s6
-; GCN2-NEXT: v_mov_b32_e32 v3, s5
-; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN2-NEXT: global_atomic_cmpswap v0, v[2:3], v[0:1] glc
-; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN2-NEXT: buffer_wbinvl1_vol
-; GCN2-NEXT: v_cmp_eq_u32_e32 vcc, v0, v1
-; GCN2-NEXT: s_or_b64 s[34:35], vcc, s[34:35]
-; GCN2-NEXT: v_mov_b32_e32 v1, v0
-; GCN2-NEXT: s_andn2_b64 exec, exec, s[34:35]
-; GCN2-NEXT: s_cbranch_execnz .LBB4_1
-; GCN2-NEXT: ; %bb.2: ; %atomicrmw.end
-; GCN2-NEXT: s_or_b64 exec, exec, s[34:35]
-; GCN2-NEXT: s_setpc_b64 s[30:31]
-;
-; GCN3-LABEL: global_atomic_xchg_f32_noret_scalar:
-; GCN3: ; %bb.0:
-; GCN3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN3-NEXT: v_mov_b32_e32 v0, s4
-; GCN3-NEXT: v_mov_b32_e32 v1, s5
-; GCN3-NEXT: global_load_dword v1, v[0:1]
-; GCN3-NEXT: s_mov_b64 s[34:35], 0
-; GCN3-NEXT: .LBB4_1: ; %atomicrmw.start
-; GCN3-NEXT: ; =>This Inner Loop Header: Depth=1
-; GCN3-NEXT: v_mov_b32_e32 v2, s4
-; GCN3-NEXT: v_mov_b32_e32 v0, s6
-; GCN3-NEXT: v_mov_b32_e32 v3, s5
-; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN3-NEXT: global_atomic_cmpswap v0, v[2:3], v[0:1] glc
-; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN3-NEXT: buffer_wbinvl1_vol
-; GCN3-NEXT: v_cmp_eq_u32_e32 vcc, v0, v1
-; GCN3-NEXT: s_or_b64 s[34:35], vcc, s[34:35]
-; GCN3-NEXT: v_mov_b32_e32 v1, v0
-; GCN3-NEXT: s_andn2_b64 exec, exec, s[34:35]
-; GCN3-NEXT: s_cbranch_execnz .LBB4_1
-; GCN3-NEXT: ; %bb.2: ; %atomicrmw.end
-; GCN3-NEXT: s_or_b64 exec, exec, s[34:35]
-; GCN3-NEXT: s_setpc_b64 s[30:31]
; SI-LABEL: global_atomic_xchg_f32_noret_scalar:
; SI: ; %bb.0:
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -876,84 +548,6 @@ define amdgpu_gfx void @global_atomic_xchg_f32_noret_scalar(ptr addrspace(1) inr
}
define amdgpu_gfx void @global_atomic_xchg_f32_noret_offset_scalar(ptr addrspace(1) inreg %out, float inreg %in) {
-; GCN1-LABEL: global_atomic_xchg_f32_noret_offset_scalar:
-; GCN1: ; %bb.0:
-; GCN1-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN1-NEXT: s_add_u32 s34, s4, 16
-; GCN1-NEXT: s_addc_u32 s35, s5, 0
-; GCN1-NEXT: v_mov_b32_e32 v0, s34
-; GCN1-NEXT: v_mov_b32_e32 v1, s35
-; GCN1-NEXT: global_load_dword v1, v[0:1]
-; GCN1-NEXT: s_mov_b64 s[36:37], 0
-; GCN1-NEXT: .LBB5_1: ; %atomicrmw.start
-; GCN1-NEXT: ; =>This Inner Loop Header: Depth=1
-; GCN1-NEXT: v_mov_b32_e32 v2, s34
-; GCN1-NEXT: v_mov_b32_e32 v0, s6
-; GCN1-NEXT: v_mov_b32_e32 v3, s35
-; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN1-NEXT: global_atomic_cmpswap v0, v[2:3], v[0:1] glc
-; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN1-NEXT: buffer_wbinvl1_vol
-; GCN1-NEXT: v_cmp_eq_u32_e32 vcc, v0, v1
-; GCN1-NEXT: s_or_b64 s[36:37], vcc, s[36:37]
-; GCN1-NEXT: v_mov_b32_e32 v1, v0
-; GCN1-NEXT: s_andn2_b64 exec, exec, s[36:37]
-; GCN1-NEXT: s_cbranch_execnz .LBB5_1
-; GCN1-NEXT: ; %bb.2: ; %atomicrmw.end
-; GCN1-NEXT: s_or_b64 exec, exec, s[36:37]
-; GCN1-NEXT: s_setpc_b64 s[30:31]
-;
-; GCN2-LABEL: global_atomic_xchg_f32_noret_offset_scalar:
-; GCN2: ; %bb.0:
-; GCN2-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN2-NEXT: s_add_u32 s34, s4, 16
-; GCN2-NEXT: s_addc_u32 s35, s5, 0
-; GCN2-NEXT: v_mov_b32_e32 v0, s34
-; GCN2-NEXT: v_mov_b32_e32 v1, s35
-; GCN2-NEXT: global_load_dword v1, v[0:1]
-; GCN2-NEXT: s_mov_b64 s[36:37], 0
-; GCN2-NEXT: .LBB5_1: ; %atomicrmw.start
-; GCN2-NEXT: ; =>This Inner Loop Header: Depth=1
-; GCN2-NEXT: v_mov_b32_e32 v2, s34
-; GCN2-NEXT: v_mov_b32_e32 v0, s6
-; GCN2-NEXT: v_mov_b32_e32 v3, s35
-; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN2-NEXT: global_atomic_cmpswap v0, v[2:3], v[0:1] glc
-; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN2-NEXT: buffer_wbinvl1_vol
-; GCN2-NEXT: v_cmp_eq_u32_e32 vcc, v0, v1
-; GCN2-NEXT: s_or_b64 s[36:37], vcc, s[36:37]
-; GCN2-NEXT: v_mov_b32_e32 v1, v0
-; GCN2-NEXT: s_andn2_b64 exec, exec, s[36:37]
-; GCN2-NEXT: s_cbranch_execnz .LBB5_1
-; GCN2-NEXT: ; %bb.2: ; %atomicrmw.end
-; GCN2-NEXT: s_or_b64 exec, exec, s[36:37]
-; GCN2-NEXT: s_setpc_b64 s[30:31]
-;
-; GCN3-LABEL: global_atomic_xchg_f32_noret_offset_scalar:
-; GCN3: ; %bb.0:
-; GCN3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN3-NEXT: v_mov_b32_e32 v0, s4
-; GCN3-NEXT: v_mov_b32_e32 v1, s5
-; GCN3-NEXT: global_load_dword v1, v[0:1] offset:16
-; GCN3-NEXT: s_mov_b64 s[34:35], 0
-; GCN3-NEXT: .LBB5_1: ; %atomicrmw.start
-; GCN3-NEXT: ; =>This Inner Loop Header: Depth=1
-; GCN3-NEXT: v_mov_b32_e32 v2, s4
-; GCN3-NEXT: v_mov_b32_e32 v0, s6
-; GCN3-NEXT: v_mov_b32_e32 v3, s5
-; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN3-NEXT: global_atomic_cmpswap v0, v[2:3], v[0:1] offset:16 glc
-; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN3-NEXT: buffer_wbinvl1_vol
-; GCN3-NEXT: v_cmp_eq_u32_e32 vcc, v0, v1
-; GCN3-NEXT: s_or_b64 s[34:35], vcc, s[34:35]
-; GCN3-NEXT: v_mov_b32_e32 v1, v0
-; GCN3-NEXT: s_andn2_b64 exec, exec, s[34:35]
-; GCN3-NEXT: s_cbranch_execnz .LBB5_1
-; GCN3-NEXT: ; %bb.2: ; %atomicrmw.end
-; GCN3-NEXT: s_or_b64 exec, exec, s[34:35]
-; GCN3-NEXT: s_setpc_b64 s[30:31]
; SI-LABEL: global_atomic_xchg_f32_noret_offset_scalar:
; SI: ; %bb.0:
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1007,83 +601,6 @@ define amdgpu_gfx void @global_atomic_xchg_f32_noret_offset_scalar(ptr addrspace
}
define amdgpu_gfx float @global_atomic_xchg_f32_ret_scalar(ptr addrspace(1) inreg %ptr, float inreg %in) {
-; GCN1-LABEL: global_atomic_xchg_f32_ret_scalar:
-; GCN1: ; %bb.0:
-; GCN1-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN1-NEXT: v_mov_b32_e32 v0, s4
-; GCN1-NEXT: v_mov_b32_e32 v1, s5
-; GCN1-NEXT: global_load_dword v0, v[0:1]
-; GCN1-NEXT: s_mov_b64 s[34:35], 0
-; GCN1-NEXT: .LBB6_1: ; %atomicrmw.start
-; GCN1-NEXT: ; =>This Inner Loop Header: Depth=1
-; GCN1-NEXT: v_mov_b32_e32 v3, s4
-; GCN1-NEXT: v_mov_b32_e32 v1, s6
-; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN1-NEXT: v_mov_b32_e32 v2, v0
-; GCN1-NEXT: v_mov_b32_e32 v4, s5
-; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN1-NEXT: global_atomic_cmpswap v0, v[3:4], v[1:2] glc
-; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN1-NEXT: buffer_wbinvl1_vol
-; GCN1-NEXT: v_cmp_eq_u32_e32 vcc, v0, v2
-; GCN1-NEXT: s_or_b64 s[34:35], vcc, s[34:35]
-; GCN1-NEXT: s_andn2_b64 exec, exec, s[34:35]
-; GCN1-NEXT: s_cbranch_execnz .LBB6_1
-; GCN1-NEXT: ; %bb.2: ; %atomicrmw.end
-; GCN1-NEXT: s_or_b64 exec, exec, s[34:35]
-; GCN1-NEXT: s_setpc_b64 s[30:31]
-;
-; GCN2-LABEL: global_atomic_xchg_f32_ret_scalar:
-; GCN2: ; %bb.0:
-; GCN2-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN2-NEXT: v_mov_b32_e32 v0, s4
-; GCN2-NEXT: v_mov_b32_e32 v1, s5
-; GCN2-NEXT: global_load_dword v0, v[0:1]
-; GCN2-NEXT: s_mov_b64 s[34:35], 0
-; GCN2-NEXT: .LBB6_1: ; %atomicrmw.start
-; GCN2-NEXT: ; =>This Inner Loop Header: Depth=1
-; GCN2-NEXT: v_mov_b32_e32 v3, s4
-; GCN2-NEXT: v_mov_b32_e32 v1, s6
-; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN2-NEXT: v_mov_b32_e32 v2, v0
-; GCN2-NEXT: v_mov_b32_e32 v4, s5
-; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN2-NEXT: global_atomic_cmpswap v0, v[3:4], v[1:2] glc
-; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN2-NEXT: buffer_wbinvl1_vol
-; GCN2-NEXT: v_cmp_eq_u32_e32 vcc, v0, v2
-; GCN2-NEXT: s_or_b64 s[34:35], vcc, s[34:35]
-; GCN2-NEXT: s_andn2_b64 exec, exec, s[34:35]
-; GCN2-NEXT: s_cbranch_execnz .LBB6_1
-; GCN2-NEXT: ; %bb.2: ; %atomicrmw.end
-; GCN2-NEXT: s_or_b64 exec, exec, s[34:35]
-; GCN2-NEXT: s_setpc_b64 s[30:31]
-;
-; GCN3-LABEL: global_atomic_xchg_f32_ret_scalar:
-; GCN3: ; %bb.0:
-; GCN3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN3-NEXT: v_mov_b32_e32 v0, s4
-; GCN3-NEXT: v_mov_b32_e32 v1, s5
-; GCN3-NEXT: global_load_dword v0, v[0:1]
-; GCN3-NEXT: s_mov_b64 s[34:35], 0
-; GCN3-NEXT: .LBB6_1: ; %atomicrmw.start
-; GCN3-NEXT: ; =>This Inner Loop Header: Depth=1
-; GCN3-NEXT: v_mov_b32_e32 v3, s4
-; GCN3-NEXT: v_mov_b32_e32 v1, s6
-; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN3-NEXT: v_mov_b32_e32 v2, v0
-; GCN3-NEXT: v_mov_b32_e32 v4, s5
-; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN3-NEXT: global_atomic_cmpswap v0, v[3:4], v[1:2] glc
-; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN3-NEXT: buffer_wbinvl1_vol
-; GCN3-NEXT: v_cmp_eq_u32_e32 vcc, v0, v2
-; GCN3-NEXT: s_or_b64 s[34:35], vcc, s[34:35]
-; GCN3-NEXT: s_andn2_b64 exec, exec, s[34:35]
-; GCN3-NEXT: s_cbranch_execnz .LBB6_1
-; GCN3-NEXT: ; %bb.2: ; %atomicrmw.end
-; GCN3-NEXT: s_or_b64 exec, exec, s[34:35]
-; GCN3-NEXT: s_setpc_b64 s[30:31]
; SI-LABEL: global_atomic_xchg_f32_ret_scalar:
; SI: ; %bb.0:
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1134,87 +651,6 @@ define amdgpu_gfx float @global_atomic_xchg_f32_ret_scalar(ptr addrspace(1) inre
}
define amdgpu_gfx float @global_atomic_xchg_f32_ret_offset_scalar(ptr addrspace(1) inreg %out, float inreg %in) {
-; GCN1-LABEL: global_atomic_xchg_f32_ret_offset_scalar:
-; GCN1: ; %bb.0:
-; GCN1-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN1-NEXT: s_add_u32 s34, s4, 16
-; GCN1-NEXT: s_addc_u32 s35, s5, 0
-; GCN1-NEXT: v_mov_b32_e32 v0, s34
-; GCN1-NEXT: v_mov_b32_e32 v1, s35
-; GCN1-NEXT: global_load_dword v0, v[0:1]
-; GCN1-NEXT: s_mov_b64 s[36:37], 0
-; GCN1-NEXT: .LBB7_1: ; %atomicrmw.start
-; GCN1-NEXT: ; =>This Inner Loop Header: Depth=1
-; GCN1-NEXT: v_mov_b32_e32 v3, s34
-; GCN1-NEXT: v_mov_b32_e32 v1, s6
-; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN1-NEXT: v_mov_b32_e32 v2, v0
-; GCN1-NEXT: v_mov_b32_e32 v4, s35
-; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN1-NEXT: global_atomic_cmpswap v0, v[3:4], v[1:2] glc
-; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN1-NEXT: buffer_wbinvl1_vol
-; GCN1-NEXT: v_cmp_eq_u32_e32 vcc, v0, v2
-; GCN1-NEXT: s_or_b64 s[36:37], vcc, s[36:37]
-; GCN1-NEXT: s_andn2_b64 exec, exec, s[36:37]
-; GCN1-NEXT: s_cbranch_execnz .LBB7_1
-; GCN1-NEXT: ; %bb.2: ; %atomicrmw.end
-; GCN1-NEXT: s_or_b64 exec, exec, s[36:37]
-; GCN1-NEXT: s_setpc_b64 s[30:31]
-;
-; GCN2-LABEL: global_atomic_xchg_f32_ret_offset_scalar:
-; GCN2: ; %bb.0:
-; GCN2-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN2-NEXT: s_add_u32 s34, s4, 16
-; GCN2-NEXT: s_addc_u32 s35, s5, 0
-; GCN2-NEXT: v_mov_b32_e32 v0, s34
-; GCN2-NEXT: v_mov_b32_e32 v1, s35
-; GCN2-NEXT: global_load_dword v0, v[0:1]
-; GCN2-NEXT: s_mov_b64 s[36:37], 0
-; GCN2-NEXT: .LBB7_1: ; %atomicrmw.start
-; GCN2-NEXT: ; =>This Inner Loop Header: Depth=1
-; GCN2-NEXT: v_mov_b32_e32 v3, s34
-; GCN2-NEXT: v_mov_b32_e32 v1, s6
-; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN2-NEXT: v_mov_b32_e32 v2, v0
-; GCN2-NEXT: v_mov_b32_e32 v4, s35
-; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN2-NEXT: global_atomic_cmpswap v0, v[3:4], v[1:2] glc
-; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN2-NEXT: buffer_wbinvl1_vol
-; GCN2-NEXT: v_cmp_eq_u32_e32 vcc, v0, v2
-; GCN2-NEXT: s_or_b64 s[36:37], vcc, s[36:37]
-; GCN2-NEXT: s_andn2_b64 exec, exec, s[36:37]
-; GCN2-NEXT: s_cbranch_execnz .LBB7_1
-; GCN2-NEXT: ; %bb.2: ; %atomicrmw.end
-; GCN2-NEXT: s_or_b64 exec, exec, s[36:37]
-; GCN2-NEXT: s_setpc_b64 s[30:31]
-;
-; GCN3-LABEL: global_atomic_xchg_f32_ret_offset_scalar:
-; GCN3: ; %bb.0:
-; GCN3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN3-NEXT: v_mov_b32_e32 v0, s4
-; GCN3-NEXT: v_mov_b32_e32 v1, s5
-; GCN3-NEXT: global_load_dword v0, v[0:1] offset:16
-; GCN3-NEXT: s_mov_b64 s[34:35], 0
-; GCN3-NEXT: .LBB7_1: ; %atomicrmw.start
-; GCN3-NEXT: ; =>This Inner Loop Header: Depth=1
-; GCN3-NEXT: v_mov_b32_e32 v3, s4
-; GCN3-NEXT: v_mov_b32_e32 v1, s6
-; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN3-NEXT: v_mov_b32_e32 v2, v0
-; GCN3-NEXT: v_mov_b32_e32 v4, s5
-; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN3-NEXT: global_atomic_cmpswap v0, v[3:4], v[1:2] offset:16 glc
-; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN3-NEXT: buffer_wbinvl1_vol
-; GCN3-NEXT: v_cmp_eq_u32_e32 vcc, v0, v2
-; GCN3-NEXT: s_or_b64 s[34:35], vcc, s[34:35]
-; GCN3-NEXT: s_andn2_b64 exec, exec, s[34:35]
-; GCN3-NEXT: s_cbranch_execnz .LBB7_1
-; GCN3-NEXT: ; %bb.2: ; %atomicrmw.end
-; GCN3-NEXT: s_or_b64 exec, exec, s[34:35]
-; GCN3-NEXT: s_setpc_b64 s[30:31]
; SI-LABEL: global_atomic_xchg_f32_ret_offset_scalar:
; SI: ; %bb.0:
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
diff --git a/llvm/test/CodeGen/AMDGPU/global_atomics_i64_system.ll b/llvm/test/CodeGen/AMDGPU/global_atomics_i64_system.ll
index d137f471910d..380ce7f3b939 100644
--- a/llvm/test/CodeGen/AMDGPU/global_atomics_i64_system.ll
+++ b/llvm/test/CodeGen/AMDGPU/global_atomics_i64_system.ll
@@ -372,65 +372,6 @@ define amdgpu_gfx i64 @global_atomic_xchg_i64_ret_offset_scalar(ptr addrspace(1)
; ---------------------------------------------------------------------
define void @global_atomic_xchg_f64_noret(ptr addrspace(1) %ptr, double %in) {
-; GCN1-LABEL: global_atomic_xchg_f64_noret:
-; GCN1: ; %bb.0:
-; GCN1-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN1-NEXT: global_load_dword v3, v[0:1]
-; GCN1-NEXT: s_mov_b64 s[4:5], 0
-; GCN1-NEXT: .LBB0_1: ; %atomicrmw.start
-; GCN1-NEXT: ; =>This Inner Loop Header: Depth=1
-; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN1-NEXT: global_atomic_cmpswap v4, v[0:1], v[2:3] glc
-; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN1-NEXT: buffer_wbinvl1_vol
-; GCN1-NEXT: v_cmp_eq_u32_e32 vcc, v4, v3
-; GCN1-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
-; GCN1-NEXT: v_mov_b32_e32 v3, v4
-; GCN1-NEXT: s_andn2_b64 exec, exec, s[4:5]
-; GCN1-NEXT: s_cbranch_execnz .LBB0_1
-; GCN1-NEXT: ; %bb.2: ; %atomicrmw.end
-; GCN1-NEXT: s_or_b64 exec, exec, s[4:5]
-; GCN1-NEXT: s_setpc_b64 s[30:31]
-;
-; GCN2-LABEL: global_atomic_xchg_f64_noret:
-; GCN2: ; %bb.0:
-; GCN2-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN2-NEXT: global_load_dword v3, v[0:1]
-; GCN2-NEXT: s_mov_b64 s[4:5], 0
-; GCN2-NEXT: .LBB0_1: ; %atomicrmw.start
-; GCN2-NEXT: ; =>This Inner Loop Header: Depth=1
-; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN2-NEXT: global_atomic_cmpswap v4, v[0:1], v[2:3] glc
-; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN2-NEXT: buffer_wbinvl1_vol
-; GCN2-NEXT: v_cmp_eq_u32_e32 vcc, v4, v3
-; GCN2-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
-; GCN2-NEXT: v_mov_b32_e32 v3, v4
-; GCN2-NEXT: s_andn2_b64 exec, exec, s[4:5]
-; GCN2-NEXT: s_cbranch_execnz .LBB0_1
-; GCN2-NEXT: ; %bb.2: ; %atomicrmw.end
-; GCN2-NEXT: s_or_b64 exec, exec, s[4:5]
-; GCN2-NEXT: s_setpc_b64 s[30:31]
-;
-; GCN3-LABEL: global_atomic_xchg_f64_noret:
-; GCN3: ; %bb.0:
-; GCN3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN3-NEXT: global_load_dword v3, v[0:1]
-; GCN3-NEXT: s_mov_b64 s[4:5], 0
-; GCN3-NEXT: .LBB0_1: ; %atomicrmw.start
-; GCN3-NEXT: ; =>This Inner Loop Header: Depth=1
-; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN3-NEXT: global_atomic_cmpswap v4, v[0:1], v[2:3] glc
-; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN3-NEXT: buffer_wbinvl1_vol
-; GCN3-NEXT: v_cmp_eq_u32_e32 vcc, v4, v3
-; GCN3-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
-; GCN3-NEXT: v_mov_b32_e32 v3, v4
-; GCN3-NEXT: s_andn2_b64 exec, exec, s[4:5]
-; GCN3-NEXT: s_cbranch_execnz .LBB0_1
-; GCN3-NEXT: ; %bb.2: ; %atomicrmw.end
-; GCN3-NEXT: s_or_b64 exec, exec, s[4:5]
-; GCN3-NEXT: s_setpc_b64 s[30:31]
; SI-LABEL: global_atomic_xchg_f64_noret:
; SI: ; %bb.0:
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -464,69 +405,6 @@ define void @global_atomic_xchg_f64_noret(ptr addrspace(1) %ptr, double %in) {
}
define void @global_atomic_xchg_f64_noret_offset(ptr addrspace(1) %out, double %in) {
-; GCN1-LABEL: global_atomic_xchg_f64_noret_offset:
-; GCN1: ; %bb.0:
-; GCN1-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN1-NEXT: v_add_f64_e32 v0, vcc, 16, v0
-; GCN1-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
-; GCN1-NEXT: global_load_dword v3, v[0:1]
-; GCN1-NEXT: s_mov_b64 s[4:5], 0
-; GCN1-NEXT: .LBB1_1: ; %atomicrmw.start
-; GCN1-NEXT: ; =>This Inner Loop Header: Depth=1
-; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN1-NEXT: global_atomic_cmpswap v4, v[0:1], v[2:3] glc
-; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN1-NEXT: buffer_wbinvl1_vol
-; GCN1-NEXT: v_cmp_eq_u32_e32 vcc, v4, v3
-; GCN1-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
-; GCN1-NEXT: v_mov_b32_e32 v3, v4
-; GCN1-NEXT: s_andn2_b64 exec, exec, s[4:5]
-; GCN1-NEXT: s_cbranch_execnz .LBB1_1
-; GCN1-NEXT: ; %bb.2: ; %atomicrmw.end
-; GCN1-NEXT: s_or_b64 exec, exec, s[4:5]
-; GCN1-NEXT: s_setpc_b64 s[30:31]
-;
-; GCN2-LABEL: global_atomic_xchg_f64_noret_offset:
-; GCN2: ; %bb.0:
-; GCN2-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN2-NEXT: v_add_u32_e32 v0, vcc, 16, v0
-; GCN2-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
-; GCN2-NEXT: global_load_dword v3, v[0:1]
-; GCN2-NEXT: s_mov_b64 s[4:5], 0
-; GCN2-NEXT: .LBB1_1: ; %atomicrmw.start
-; GCN2-NEXT: ; =>This Inner Loop Header: Depth=1
-; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN2-NEXT: global_atomic_cmpswap v4, v[0:1], v[2:3] glc
-; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN2-NEXT: buffer_wbinvl1_vol
-; GCN2-NEXT: v_cmp_eq_u32_e32 vcc, v4, v3
-; GCN2-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
-; GCN2-NEXT: v_mov_b32_e32 v3, v4
-; GCN2-NEXT: s_andn2_b64 exec, exec, s[4:5]
-; GCN2-NEXT: s_cbranch_execnz .LBB1_1
-; GCN2-NEXT: ; %bb.2: ; %atomicrmw.end
-; GCN2-NEXT: s_or_b64 exec, exec, s[4:5]
-; GCN2-NEXT: s_setpc_b64 s[30:31]
-;
-; GCN3-LABEL: global_atomic_xchg_f64_noret_offset:
-; GCN3: ; %bb.0:
-; GCN3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN3-NEXT: global_load_dword v3, v[0:1] offset:16
-; GCN3-NEXT: s_mov_b64 s[4:5], 0
-; GCN3-NEXT: .LBB1_1: ; %atomicrmw.start
-; GCN3-NEXT: ; =>This Inner Loop Header: Depth=1
-; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN3-NEXT: global_atomic_cmpswap v4, v[0:1], v[2:3] offset:16 glc
-; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN3-NEXT: buffer_wbinvl1_vol
-; GCN3-NEXT: v_cmp_eq_u32_e32 vcc, v4, v3
-; GCN3-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
-; GCN3-NEXT: v_mov_b32_e32 v3, v4
-; GCN3-NEXT: s_andn2_b64 exec, exec, s[4:5]
-; GCN3-NEXT: s_cbranch_execnz .LBB1_1
-; GCN3-NEXT: ; %bb.2: ; %atomicrmw.end
-; GCN3-NEXT: s_or_b64 exec, exec, s[4:5]
-; GCN3-NEXT: s_setpc_b64 s[30:31]
; SI-LABEL: global_atomic_xchg_f64_noret_offset:
; SI: ; %bb.0:
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -563,71 +441,6 @@ define void @global_atomic_xchg_f64_noret_offset(ptr addrspace(1) %out, double %
}
define double @global_atomic_xchg_f64_ret(ptr addrspace(1) %ptr, double %in) {
-; GCN1-LABEL: global_atomic_xchg_f64_ret:
-; GCN1: ; %bb.0:
-; GCN1-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN1-NEXT: global_load_dword v4, v[0:1]
-; GCN1-NEXT: s_mov_b64 s[4:5], 0
-; GCN1-NEXT: .LBB2_1: ; %atomicrmw.start
-; GCN1-NEXT: ; =>This Inner Loop Header: Depth=1
-; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN1-NEXT: v_mov_b32_e32 v3, v4
-; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN1-NEXT: global_atomic_cmpswap v4, v[0:1], v[2:3] glc
-; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN1-NEXT: buffer_wbinvl1_vol
-; GCN1-NEXT: v_cmp_eq_u32_e32 vcc, v4, v3
-; GCN1-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
-; GCN1-NEXT: s_andn2_b64 exec, exec, s[4:5]
-; GCN1-NEXT: s_cbranch_execnz .LBB2_1
-; GCN1-NEXT: ; %bb.2: ; %atomicrmw.end
-; GCN1-NEXT: s_or_b64 exec, exec, s[4:5]
-; GCN1-NEXT: v_mov_b32_e32 v0, v4
-; GCN1-NEXT: s_setpc_b64 s[30:31]
-;
-; GCN2-LABEL: global_atomic_xchg_f64_ret:
-; GCN2: ; %bb.0:
-; GCN2-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN2-NEXT: global_load_dword v4, v[0:1]
-; GCN2-NEXT: s_mov_b64 s[4:5], 0
-; GCN2-NEXT: .LBB2_1: ; %atomicrmw.start
-; GCN2-NEXT: ; =>This Inner Loop Header: Depth=1
-; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN2-NEXT: v_mov_b32_e32 v3, v4
-; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN2-NEXT: global_atomic_cmpswap v4, v[0:1], v[2:3] glc
-; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN2-NEXT: buffer_wbinvl1_vol
-; GCN2-NEXT: v_cmp_eq_u32_e32 vcc, v4, v3
-; GCN2-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
-; GCN2-NEXT: s_andn2_b64 exec, exec, s[4:5]
-; GCN2-NEXT: s_cbranch_execnz .LBB2_1
-; GCN2-NEXT: ; %bb.2: ; %atomicrmw.end
-; GCN2-NEXT: s_or_b64 exec, exec, s[4:5]
-; GCN2-NEXT: v_mov_b32_e32 v0, v4
-; GCN2-NEXT: s_setpc_b64 s[30:31]
-;
-; GCN3-LABEL: global_atomic_xchg_f64_ret:
-; GCN3: ; %bb.0:
-; GCN3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN3-NEXT: global_load_dword v4, v[0:1]
-; GCN3-NEXT: s_mov_b64 s[4:5], 0
-; GCN3-NEXT: .LBB2_1: ; %atomicrmw.start
-; GCN3-NEXT: ; =>This Inner Loop Header: Depth=1
-; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN3-NEXT: v_mov_b32_e32 v3, v4
-; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN3-NEXT: global_atomic_cmpswap v4, v[0:1], v[2:3] glc
-; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN3-NEXT: buffer_wbinvl1_vol
-; GCN3-NEXT: v_cmp_eq_u32_e32 vcc, v4, v3
-; GCN3-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
-; GCN3-NEXT: s_andn2_b64 exec, exec, s[4:5]
-; GCN3-NEXT: s_cbranch_execnz .LBB2_1
-; GCN3-NEXT: ; %bb.2: ; %atomicrmw.end
-; GCN3-NEXT: s_or_b64 exec, exec, s[4:5]
-; GCN3-NEXT: v_mov_b32_e32 v0, v4
-; GCN3-NEXT: s_setpc_b64 s[30:31]
; SI-LABEL: global_atomic_xchg_f64_ret:
; SI: ; %bb.0:
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -663,73 +476,6 @@ define double @global_atomic_xchg_f64_ret(ptr addrspace(1) %ptr, double %in) {
}
define double @global_atomic_xchg_f64_ret_offset(ptr addrspace(1) %out, double %in) {
-; GCN1-LABEL: global_atomic_xchg_f64_ret_offset:
-; GCN1: ; %bb.0:
-; GCN1-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN1-NEXT: v_add_f64_e32 v4, vcc, 16, v0
-; GCN1-NEXT: v_addc_u32_e32 v5, vcc, 0, v1, vcc
-; GCN1-NEXT: global_load_dword v0, v[4:5]
-; GCN1-NEXT: s_mov_b64 s[4:5], 0
-; GCN1-NEXT: .LBB3_1: ; %atomicrmw.start
-; GCN1-NEXT: ; =>This Inner Loop Header: Depth=1
-; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN1-NEXT: v_mov_b32_e32 v3, v0
-; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN1-NEXT: global_atomic_cmpswap v0, v[4:5], v[2:3] glc
-; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN1-NEXT: buffer_wbinvl1_vol
-; GCN1-NEXT: v_cmp_eq_u32_e32 vcc, v0, v3
-; GCN1-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
-; GCN1-NEXT: s_andn2_b64 exec, exec, s[4:5]
-; GCN1-NEXT: s_cbranch_execnz .LBB3_1
-; GCN1-NEXT: ; %bb.2: ; %atomicrmw.end
-; GCN1-NEXT: s_or_b64 exec, exec, s[4:5]
-; GCN1-NEXT: s_setpc_b64 s[30:31]
-;
-; GCN2-LABEL: global_atomic_xchg_f64_ret_offset:
-; GCN2: ; %bb.0:
-; GCN2-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN2-NEXT: v_add_u32_e32 v4, vcc, 16, v0
-; GCN2-NEXT: v_addc_u32_e32 v5, vcc, 0, v1, vcc
-; GCN2-NEXT: global_load_dword v0, v[4:5]
-; GCN2-NEXT: s_mov_b64 s[4:5], 0
-; GCN2-NEXT: .LBB3_1: ; %atomicrmw.start
-; GCN2-NEXT: ; =>This Inner Loop Header: Depth=1
-; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN2-NEXT: v_mov_b32_e32 v3, v0
-; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN2-NEXT: global_atomic_cmpswap v0, v[4:5], v[2:3] glc
-; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN2-NEXT: buffer_wbinvl1_vol
-; GCN2-NEXT: v_cmp_eq_u32_e32 vcc, v0, v3
-; GCN2-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
-; GCN2-NEXT: s_andn2_b64 exec, exec, s[4:5]
-; GCN2-NEXT: s_cbranch_execnz .LBB3_1
-; GCN2-NEXT: ; %bb.2: ; %atomicrmw.end
-; GCN2-NEXT: s_or_b64 exec, exec, s[4:5]
-; GCN2-NEXT: s_setpc_b64 s[30:31]
-;
-; GCN3-LABEL: global_atomic_xchg_f64_ret_offset:
-; GCN3: ; %bb.0:
-; GCN3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN3-NEXT: global_load_dword v4, v[0:1] offset:16
-; GCN3-NEXT: s_mov_b64 s[4:5], 0
-; GCN3-NEXT: .LBB3_1: ; %atomicrmw.start
-; GCN3-NEXT: ; =>This Inner Loop Header: Depth=1
-; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN3-NEXT: v_mov_b32_e32 v3, v4
-; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN3-NEXT: global_atomic_cmpswap v4, v[0:1], v[2:3] offset:16 glc
-; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN3-NEXT: buffer_wbinvl1_vol
-; GCN3-NEXT: v_cmp_eq_u32_e32 vcc, v4, v3
-; GCN3-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
-; GCN3-NEXT: s_andn2_b64 exec, exec, s[4:5]
-; GCN3-NEXT: s_cbranch_execnz .LBB3_1
-; GCN3-NEXT: ; %bb.2: ; %atomicrmw.end
-; GCN3-NEXT: s_or_b64 exec, exec, s[4:5]
-; GCN3-NEXT: v_mov_b32_e32 v0, v4
-; GCN3-NEXT: s_setpc_b64 s[30:31]
; SI-LABEL: global_atomic_xchg_f64_ret_offset:
; SI: ; %bb.0:
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -768,80 +514,6 @@ define double @global_atomic_xchg_f64_ret_offset(ptr addrspace(1) %out, double %
}
define amdgpu_gfx void @global_atomic_xchg_f64_noret_scalar(ptr addrspace(1) inreg %ptr, double inreg %in) {
-; GCN1-LABEL: global_atomic_xchg_f64_noret_scalar:
-; GCN1: ; %bb.0:
-; GCN1-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN1-NEXT: v_mov_b32_e32 v0, s4
-; GCN1-NEXT: v_mov_b32_e32 v1, s5
-; GCN1-NEXT: global_load_dword v1, v[0:1]
-; GCN1-NEXT: s_mov_b64 s[34:35], 0
-; GCN1-NEXT: .LBB4_1: ; %atomicrmw.start
-; GCN1-NEXT: ; =>This Inner Loop Header: Depth=1
-; GCN1-NEXT: v_mov_b32_e32 v2, s4
-; GCN1-NEXT: v_mov_b32_e32 v0, s6
-; GCN1-NEXT: v_mov_b32_e32 v3, s5
-; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN1-NEXT: global_atomic_cmpswap v0, v[2:3], v[0:1] glc
-; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN1-NEXT: buffer_wbinvl1_vol
-; GCN1-NEXT: v_cmp_eq_u32_e32 vcc, v0, v1
-; GCN1-NEXT: s_or_b64 s[34:35], vcc, s[34:35]
-; GCN1-NEXT: v_mov_b32_e32 v1, v0
-; GCN1-NEXT: s_andn2_b64 exec, exec, s[34:35]
-; GCN1-NEXT: s_cbranch_execnz .LBB4_1
-; GCN1-NEXT: ; %bb.2: ; %atomicrmw.end
-; GCN1-NEXT: s_or_b64 exec, exec, s[34:35]
-; GCN1-NEXT: s_setpc_b64 s[30:31]
-;
-; GCN2-LABEL: global_atomic_xchg_f64_noret_scalar:
-; GCN2: ; %bb.0:
-; GCN2-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN2-NEXT: v_mov_b32_e32 v0, s4
-; GCN2-NEXT: v_mov_b32_e32 v1, s5
-; GCN2-NEXT: global_load_dword v1, v[0:1]
-; GCN2-NEXT: s_mov_b64 s[34:35], 0
-; GCN2-NEXT: .LBB4_1: ; %atomicrmw.start
-; GCN2-NEXT: ; =>This Inner Loop Header: Depth=1
-; GCN2-NEXT: v_mov_b32_e32 v2, s4
-; GCN2-NEXT: v_mov_b32_e32 v0, s6
-; GCN2-NEXT: v_mov_b32_e32 v3, s5
-; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN2-NEXT: global_atomic_cmpswap v0, v[2:3], v[0:1] glc
-; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN2-NEXT: buffer_wbinvl1_vol
-; GCN2-NEXT: v_cmp_eq_u32_e32 vcc, v0, v1
-; GCN2-NEXT: s_or_b64 s[34:35], vcc, s[34:35]
-; GCN2-NEXT: v_mov_b32_e32 v1, v0
-; GCN2-NEXT: s_andn2_b64 exec, exec, s[34:35]
-; GCN2-NEXT: s_cbranch_execnz .LBB4_1
-; GCN2-NEXT: ; %bb.2: ; %atomicrmw.end
-; GCN2-NEXT: s_or_b64 exec, exec, s[34:35]
-; GCN2-NEXT: s_setpc_b64 s[30:31]
-;
-; GCN3-LABEL: global_atomic_xchg_f64_noret_scalar:
-; GCN3: ; %bb.0:
-; GCN3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN3-NEXT: v_mov_b32_e32 v0, s4
-; GCN3-NEXT: v_mov_b32_e32 v1, s5
-; GCN3-NEXT: global_load_dword v1, v[0:1]
-; GCN3-NEXT: s_mov_b64 s[34:35], 0
-; GCN3-NEXT: .LBB4_1: ; %atomicrmw.start
-; GCN3-NEXT: ; =>This Inner Loop Header: Depth=1
-; GCN3-NEXT: v_mov_b32_e32 v2, s4
-; GCN3-NEXT: v_mov_b32_e32 v0, s6
-; GCN3-NEXT: v_mov_b32_e32 v3, s5
-; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN3-NEXT: global_atomic_cmpswap v0, v[2:3], v[0:1] glc
-; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN3-NEXT: buffer_wbinvl1_vol
-; GCN3-NEXT: v_cmp_eq_u32_e32 vcc, v0, v1
-; GCN3-NEXT: s_or_b64 s[34:35], vcc, s[34:35]
-; GCN3-NEXT: v_mov_b32_e32 v1, v0
-; GCN3-NEXT: s_andn2_b64 exec, exec, s[34:35]
-; GCN3-NEXT: s_cbranch_execnz .LBB4_1
-; GCN3-NEXT: ; %bb.2: ; %atomicrmw.end
-; GCN3-NEXT: s_or_b64 exec, exec, s[34:35]
-; GCN3-NEXT: s_setpc_b64 s[30:31]
; SI-LABEL: global_atomic_xchg_f64_noret_scalar:
; SI: ; %bb.0:
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -896,84 +568,6 @@ define amdgpu_gfx void @global_atomic_xchg_f64_noret_scalar(ptr addrspace(1) inr
}
define amdgpu_gfx void @global_atomic_xchg_f64_noret_offset_scalar(ptr addrspace(1) inreg %out, double inreg %in) {
-; GCN1-LABEL: global_atomic_xchg_f64_noret_offset_scalar:
-; GCN1: ; %bb.0:
-; GCN1-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN1-NEXT: s_add_u32 s34, s4, 16
-; GCN1-NEXT: s_addc_u32 s35, s5, 0
-; GCN1-NEXT: v_mov_b32_e32 v0, s34
-; GCN1-NEXT: v_mov_b32_e32 v1, s35
-; GCN1-NEXT: global_load_dword v1, v[0:1]
-; GCN1-NEXT: s_mov_b64 s[36:37], 0
-; GCN1-NEXT: .LBB5_1: ; %atomicrmw.start
-; GCN1-NEXT: ; =>This Inner Loop Header: Depth=1
-; GCN1-NEXT: v_mov_b32_e32 v2, s34
-; GCN1-NEXT: v_mov_b32_e32 v0, s6
-; GCN1-NEXT: v_mov_b32_e32 v3, s35
-; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN1-NEXT: global_atomic_cmpswap v0, v[2:3], v[0:1] glc
-; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN1-NEXT: buffer_wbinvl1_vol
-; GCN1-NEXT: v_cmp_eq_u32_e32 vcc, v0, v1
-; GCN1-NEXT: s_or_b64 s[36:37], vcc, s[36:37]
-; GCN1-NEXT: v_mov_b32_e32 v1, v0
-; GCN1-NEXT: s_andn2_b64 exec, exec, s[36:37]
-; GCN1-NEXT: s_cbranch_execnz .LBB5_1
-; GCN1-NEXT: ; %bb.2: ; %atomicrmw.end
-; GCN1-NEXT: s_or_b64 exec, exec, s[36:37]
-; GCN1-NEXT: s_setpc_b64 s[30:31]
-;
-; GCN2-LABEL: global_atomic_xchg_f64_noret_offset_scalar:
-; GCN2: ; %bb.0:
-; GCN2-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN2-NEXT: s_add_u32 s34, s4, 16
-; GCN2-NEXT: s_addc_u32 s35, s5, 0
-; GCN2-NEXT: v_mov_b32_e32 v0, s34
-; GCN2-NEXT: v_mov_b32_e32 v1, s35
-; GCN2-NEXT: global_load_dword v1, v[0:1]
-; GCN2-NEXT: s_mov_b64 s[36:37], 0
-; GCN2-NEXT: .LBB5_1: ; %atomicrmw.start
-; GCN2-NEXT: ; =>This Inner Loop Header: Depth=1
-; GCN2-NEXT: v_mov_b32_e32 v2, s34
-; GCN2-NEXT: v_mov_b32_e32 v0, s6
-; GCN2-NEXT: v_mov_b32_e32 v3, s35
-; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN2-NEXT: global_atomic_cmpswap v0, v[2:3], v[0:1] glc
-; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN2-NEXT: buffer_wbinvl1_vol
-; GCN2-NEXT: v_cmp_eq_u32_e32 vcc, v0, v1
-; GCN2-NEXT: s_or_b64 s[36:37], vcc, s[36:37]
-; GCN2-NEXT: v_mov_b32_e32 v1, v0
-; GCN2-NEXT: s_andn2_b64 exec, exec, s[36:37]
-; GCN2-NEXT: s_cbranch_execnz .LBB5_1
-; GCN2-NEXT: ; %bb.2: ; %atomicrmw.end
-; GCN2-NEXT: s_or_b64 exec, exec, s[36:37]
-; GCN2-NEXT: s_setpc_b64 s[30:31]
-;
-; GCN3-LABEL: global_atomic_xchg_f64_noret_offset_scalar:
-; GCN3: ; %bb.0:
-; GCN3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN3-NEXT: v_mov_b32_e32 v0, s4
-; GCN3-NEXT: v_mov_b32_e32 v1, s5
-; GCN3-NEXT: global_load_dword v1, v[0:1] offset:16
-; GCN3-NEXT: s_mov_b64 s[34:35], 0
-; GCN3-NEXT: .LBB5_1: ; %atomicrmw.start
-; GCN3-NEXT: ; =>This Inner Loop Header: Depth=1
-; GCN3-NEXT: v_mov_b32_e32 v2, s4
-; GCN3-NEXT: v_mov_b32_e32 v0, s6
-; GCN3-NEXT: v_mov_b32_e32 v3, s5
-; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN3-NEXT: global_atomic_cmpswap v0, v[2:3], v[0:1] offset:16 glc
-; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN3-NEXT: buffer_wbinvl1_vol
-; GCN3-NEXT: v_cmp_eq_u32_e32 vcc, v0, v1
-; GCN3-NEXT: s_or_b64 s[34:35], vcc, s[34:35]
-; GCN3-NEXT: v_mov_b32_e32 v1, v0
-; GCN3-NEXT: s_andn2_b64 exec, exec, s[34:35]
-; GCN3-NEXT: s_cbranch_execnz .LBB5_1
-; GCN3-NEXT: ; %bb.2: ; %atomicrmw.end
-; GCN3-NEXT: s_or_b64 exec, exec, s[34:35]
-; GCN3-NEXT: s_setpc_b64 s[30:31]
; SI-LABEL: global_atomic_xchg_f64_noret_offset_scalar:
; SI: ; %bb.0:
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1029,83 +623,6 @@ define amdgpu_gfx void @global_atomic_xchg_f64_noret_offset_scalar(ptr addrspace
}
define amdgpu_gfx double @global_atomic_xchg_f64_ret_scalar(ptr addrspace(1) inreg %ptr, double inreg %in) {
-; GCN1-LABEL: global_atomic_xchg_f64_ret_scalar:
-; GCN1: ; %bb.0:
-; GCN1-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN1-NEXT: v_mov_b32_e32 v0, s4
-; GCN1-NEXT: v_mov_b32_e32 v1, s5
-; GCN1-NEXT: global_load_dword v0, v[0:1]
-; GCN1-NEXT: s_mov_b64 s[34:35], 0
-; GCN1-NEXT: .LBB6_1: ; %atomicrmw.start
-; GCN1-NEXT: ; =>This Inner Loop Header: Depth=1
-; GCN1-NEXT: v_mov_b32_e32 v3, s4
-; GCN1-NEXT: v_mov_b32_e32 v1, s6
-; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN1-NEXT: v_mov_b32_e32 v2, v0
-; GCN1-NEXT: v_mov_b32_e32 v4, s5
-; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN1-NEXT: global_atomic_cmpswap v0, v[3:4], v[1:2] glc
-; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN1-NEXT: buffer_wbinvl1_vol
-; GCN1-NEXT: v_cmp_eq_u32_e32 vcc, v0, v2
-; GCN1-NEXT: s_or_b64 s[34:35], vcc, s[34:35]
-; GCN1-NEXT: s_andn2_b64 exec, exec, s[34:35]
-; GCN1-NEXT: s_cbranch_execnz .LBB6_1
-; GCN1-NEXT: ; %bb.2: ; %atomicrmw.end
-; GCN1-NEXT: s_or_b64 exec, exec, s[34:35]
-; GCN1-NEXT: s_setpc_b64 s[30:31]
-;
-; GCN2-LABEL: global_atomic_xchg_f64_ret_scalar:
-; GCN2: ; %bb.0:
-; GCN2-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN2-NEXT: v_mov_b32_e32 v0, s4
-; GCN2-NEXT: v_mov_b32_e32 v1, s5
-; GCN2-NEXT: global_load_dword v0, v[0:1]
-; GCN2-NEXT: s_mov_b64 s[34:35], 0
-; GCN2-NEXT: .LBB6_1: ; %atomicrmw.start
-; GCN2-NEXT: ; =>This Inner Loop Header: Depth=1
-; GCN2-NEXT: v_mov_b32_e32 v3, s4
-; GCN2-NEXT: v_mov_b32_e32 v1, s6
-; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN2-NEXT: v_mov_b32_e32 v2, v0
-; GCN2-NEXT: v_mov_b32_e32 v4, s5
-; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN2-NEXT: global_atomic_cmpswap v0, v[3:4], v[1:2] glc
-; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN2-NEXT: buffer_wbinvl1_vol
-; GCN2-NEXT: v_cmp_eq_u32_e32 vcc, v0, v2
-; GCN2-NEXT: s_or_b64 s[34:35], vcc, s[34:35]
-; GCN2-NEXT: s_andn2_b64 exec, exec, s[34:35]
-; GCN2-NEXT: s_cbranch_execnz .LBB6_1
-; GCN2-NEXT: ; %bb.2: ; %atomicrmw.end
-; GCN2-NEXT: s_or_b64 exec, exec, s[34:35]
-; GCN2-NEXT: s_setpc_b64 s[30:31]
-;
-; GCN3-LABEL: global_atomic_xchg_f64_ret_scalar:
-; GCN3: ; %bb.0:
-; GCN3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN3-NEXT: v_mov_b32_e32 v0, s4
-; GCN3-NEXT: v_mov_b32_e32 v1, s5
-; GCN3-NEXT: global_load_dword v0, v[0:1]
-; GCN3-NEXT: s_mov_b64 s[34:35], 0
-; GCN3-NEXT: .LBB6_1: ; %atomicrmw.start
-; GCN3-NEXT: ; =>This Inner Loop Header: Depth=1
-; GCN3-NEXT: v_mov_b32_e32 v3, s4
-; GCN3-NEXT: v_mov_b32_e32 v1, s6
-; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN3-NEXT: v_mov_b32_e32 v2, v0
-; GCN3-NEXT: v_mov_b32_e32 v4, s5
-; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN3-NEXT: global_atomic_cmpswap v0, v[3:4], v[1:2] glc
-; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN3-NEXT: buffer_wbinvl1_vol
-; GCN3-NEXT: v_cmp_eq_u32_e32 vcc, v0, v2
-; GCN3-NEXT: s_or_b64 s[34:35], vcc, s[34:35]
-; GCN3-NEXT: s_andn2_b64 exec, exec, s[34:35]
-; GCN3-NEXT: s_cbranch_execnz .LBB6_1
-; GCN3-NEXT: ; %bb.2: ; %atomicrmw.end
-; GCN3-NEXT: s_or_b64 exec, exec, s[34:35]
-; GCN3-NEXT: s_setpc_b64 s[30:31]
; SI-LABEL: global_atomic_xchg_f64_ret_scalar:
; SI: ; %bb.0:
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1160,87 +677,6 @@ define amdgpu_gfx double @global_atomic_xchg_f64_ret_scalar(ptr addrspace(1) inr
}
define amdgpu_gfx double @global_atomic_xchg_f64_ret_offset_scalar(ptr addrspace(1) inreg %out, double inreg %in) {
-; GCN1-LABEL: global_atomic_xchg_f64_ret_offset_scalar:
-; GCN1: ; %bb.0:
-; GCN1-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN1-NEXT: s_add_u32 s34, s4, 16
-; GCN1-NEXT: s_addc_u32 s35, s5, 0
-; GCN1-NEXT: v_mov_b32_e32 v0, s34
-; GCN1-NEXT: v_mov_b32_e32 v1, s35
-; GCN1-NEXT: global_load_dword v0, v[0:1]
-; GCN1-NEXT: s_mov_b64 s[36:37], 0
-; GCN1-NEXT: .LBB7_1: ; %atomicrmw.start
-; GCN1-NEXT: ; =>This Inner Loop Header: Depth=1
-; GCN1-NEXT: v_mov_b32_e32 v3, s34
-; GCN1-NEXT: v_mov_b32_e32 v1, s6
-; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN1-NEXT: v_mov_b32_e32 v2, v0
-; GCN1-NEXT: v_mov_b32_e32 v4, s35
-; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN1-NEXT: global_atomic_cmpswap v0, v[3:4], v[1:2] glc
-; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN1-NEXT: buffer_wbinvl1_vol
-; GCN1-NEXT: v_cmp_eq_u32_e32 vcc, v0, v2
-; GCN1-NEXT: s_or_b64 s[36:37], vcc, s[36:37]
-; GCN1-NEXT: s_andn2_b64 exec, exec, s[36:37]
-; GCN1-NEXT: s_cbranch_execnz .LBB7_1
-; GCN1-NEXT: ; %bb.2: ; %atomicrmw.end
-; GCN1-NEXT: s_or_b64 exec, exec, s[36:37]
-; GCN1-NEXT: s_setpc_b64 s[30:31]
-;
-; GCN2-LABEL: global_atomic_xchg_f64_ret_offset_scalar:
-; GCN2: ; %bb.0:
-; GCN2-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN2-NEXT: s_add_u32 s34, s4, 16
-; GCN2-NEXT: s_addc_u32 s35, s5, 0
-; GCN2-NEXT: v_mov_b32_e32 v0, s34
-; GCN2-NEXT: v_mov_b32_e32 v1, s35
-; GCN2-NEXT: global_load_dword v0, v[0:1]
-; GCN2-NEXT: s_mov_b64 s[36:37], 0
-; GCN2-NEXT: .LBB7_1: ; %atomicrmw.start
-; GCN2-NEXT: ; =>This Inner Loop Header: Depth=1
-; GCN2-NEXT: v_mov_b32_e32 v3, s34
-; GCN2-NEXT: v_mov_b32_e32 v1, s6
-; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN2-NEXT: v_mov_b32_e32 v2, v0
-; GCN2-NEXT: v_mov_b32_e32 v4, s35
-; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN2-NEXT: global_atomic_cmpswap v0, v[3:4], v[1:2] glc
-; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN2-NEXT: buffer_wbinvl1_vol
-; GCN2-NEXT: v_cmp_eq_u32_e32 vcc, v0, v2
-; GCN2-NEXT: s_or_b64 s[36:37], vcc, s[36:37]
-; GCN2-NEXT: s_andn2_b64 exec, exec, s[36:37]
-; GCN2-NEXT: s_cbranch_execnz .LBB7_1
-; GCN2-NEXT: ; %bb.2: ; %atomicrmw.end
-; GCN2-NEXT: s_or_b64 exec, exec, s[36:37]
-; GCN2-NEXT: s_setpc_b64 s[30:31]
-;
-; GCN3-LABEL: global_atomic_xchg_f64_ret_offset_scalar:
-; GCN3: ; %bb.0:
-; GCN3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN3-NEXT: v_mov_b32_e32 v0, s4
-; GCN3-NEXT: v_mov_b32_e32 v1, s5
-; GCN3-NEXT: global_load_dword v0, v[0:1] offset:16
-; GCN3-NEXT: s_mov_b64 s[34:35], 0
-; GCN3-NEXT: .LBB7_1: ; %atomicrmw.start
-; GCN3-NEXT: ; =>This Inner Loop Header: Depth=1
-; GCN3-NEXT: v_mov_b32_e32 v3, s4
-; GCN3-NEXT: v_mov_b32_e32 v1, s6
-; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN3-NEXT: v_mov_b32_e32 v2, v0
-; GCN3-NEXT: v_mov_b32_e32 v4, s5
-; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN3-NEXT: global_atomic_cmpswap v0, v[3:4], v[1:2] offset:16 glc
-; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN3-NEXT: buffer_wbinvl1_vol
-; GCN3-NEXT: v_cmp_eq_u32_e32 vcc, v0, v2
-; GCN3-NEXT: s_or_b64 s[34:35], vcc, s[34:35]
-; GCN3-NEXT: s_andn2_b64 exec, exec, s[34:35]
-; GCN3-NEXT: s_cbranch_execnz .LBB7_1
-; GCN3-NEXT: ; %bb.2: ; %atomicrmw.end
-; GCN3-NEXT: s_or_b64 exec, exec, s[34:35]
-; GCN3-NEXT: s_setpc_b64 s[30:31]
; SI-LABEL: global_atomic_xchg_f64_ret_offset_scalar:
; SI: ; %bb.0:
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
diff --git a/llvm/test/CodeGen/RISCV/rvv/binop-zext.ll b/llvm/test/CodeGen/RISCV/rvv/binop-zext.ll
new file mode 100644
index 000000000000..e050240f0de1
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/binop-zext.ll
@@ -0,0 +1,146 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s
+
+; Check that we perform binary arithmetic in a narrower type where possible, via
+; combineBinOpOfZExt or otherwise.
+
+define <vscale x 8 x i32> @add(<vscale x 8 x i8> %a, <vscale x 8 x i8> %b) {
+; CHECK-LABEL: add:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma
+; CHECK-NEXT: vwaddu.vv v12, v8, v9
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT: vzext.vf2 v8, v12
+; CHECK-NEXT: ret
+ %a.zext = zext <vscale x 8 x i8> %a to <vscale x 8 x i32>
+ %b.zext = zext <vscale x 8 x i8> %b to <vscale x 8 x i32>
+ %add = add <vscale x 8 x i32> %a.zext, %b.zext
+ ret <vscale x 8 x i32> %add
+}
+
+define <vscale x 8 x i32> @sub(<vscale x 8 x i8> %a, <vscale x 8 x i8> %b) {
+; CHECK-LABEL: sub:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma
+; CHECK-NEXT: vwsubu.vv v12, v8, v9
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT: vsext.vf2 v8, v12
+; CHECK-NEXT: ret
+ %a.zext = zext <vscale x 8 x i8> %a to <vscale x 8 x i32>
+ %b.zext = zext <vscale x 8 x i8> %b to <vscale x 8 x i32>
+ %sub = sub <vscale x 8 x i32> %a.zext, %b.zext
+ ret <vscale x 8 x i32> %sub
+}
+
+define <vscale x 8 x i32> @mul(<vscale x 8 x i8> %a, <vscale x 8 x i8> %b) {
+; CHECK-LABEL: mul:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma
+; CHECK-NEXT: vwmulu.vv v12, v8, v9
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT: vzext.vf2 v8, v12
+; CHECK-NEXT: ret
+ %a.zext = zext <vscale x 8 x i8> %a to <vscale x 8 x i32>
+ %b.zext = zext <vscale x 8 x i8> %b to <vscale x 8 x i32>
+ %mul = mul <vscale x 8 x i32> %a.zext, %b.zext
+ ret <vscale x 8 x i32> %mul
+}
+
+define <vscale x 8 x i32> @sdiv(<vscale x 8 x i8> %a, <vscale x 8 x i8> %b) {
+; CHECK-LABEL: sdiv:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma
+; CHECK-NEXT: vzext.vf4 v12, v8
+; CHECK-NEXT: vzext.vf4 v16, v9
+; CHECK-NEXT: vdivu.vv v8, v12, v16
+; CHECK-NEXT: ret
+ %a.zext = zext <vscale x 8 x i8> %a to <vscale x 8 x i32>
+ %b.zext = zext <vscale x 8 x i8> %b to <vscale x 8 x i32>
+ %sdiv = sdiv <vscale x 8 x i32> %a.zext, %b.zext
+ ret <vscale x 8 x i32> %sdiv
+}
+
+define <vscale x 8 x i32> @udiv(<vscale x 8 x i8> %a, <vscale x 8 x i8> %b) {
+; CHECK-LABEL: udiv:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma
+; CHECK-NEXT: vzext.vf4 v12, v8
+; CHECK-NEXT: vzext.vf4 v16, v9
+; CHECK-NEXT: vdivu.vv v8, v12, v16
+; CHECK-NEXT: ret
+ %a.zext = zext <vscale x 8 x i8> %a to <vscale x 8 x i32>
+ %b.zext = zext <vscale x 8 x i8> %b to <vscale x 8 x i32>
+ %udiv = udiv <vscale x 8 x i32> %a.zext, %b.zext
+ ret <vscale x 8 x i32> %udiv
+}
+
+define <vscale x 8 x i32> @srem(<vscale x 8 x i8> %a, <vscale x 8 x i8> %b) {
+; CHECK-LABEL: srem:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma
+; CHECK-NEXT: vzext.vf4 v12, v8
+; CHECK-NEXT: vzext.vf4 v16, v9
+; CHECK-NEXT: vremu.vv v8, v12, v16
+; CHECK-NEXT: ret
+ %a.zext = zext <vscale x 8 x i8> %a to <vscale x 8 x i32>
+ %b.zext = zext <vscale x 8 x i8> %b to <vscale x 8 x i32>
+ %srem = srem <vscale x 8 x i32> %a.zext, %b.zext
+ ret <vscale x 8 x i32> %srem
+}
+
+define <vscale x 8 x i32> @urem(<vscale x 8 x i8> %a, <vscale x 8 x i8> %b) {
+; CHECK-LABEL: urem:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma
+; CHECK-NEXT: vzext.vf4 v12, v8
+; CHECK-NEXT: vzext.vf4 v16, v9
+; CHECK-NEXT: vremu.vv v8, v12, v16
+; CHECK-NEXT: ret
+ %a.zext = zext <vscale x 8 x i8> %a to <vscale x 8 x i32>
+ %b.zext = zext <vscale x 8 x i8> %b to <vscale x 8 x i32>
+ %urem = urem <vscale x 8 x i32> %a.zext, %b.zext
+ ret <vscale x 8 x i32> %urem
+}
+
+define <vscale x 8 x i32> @and(<vscale x 8 x i8> %a, <vscale x 8 x i8> %b) {
+; CHECK-LABEL: and:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma
+; CHECK-NEXT: vand.vv v12, v8, v9
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT: vzext.vf4 v8, v12
+; CHECK-NEXT: ret
+ %a.zext = zext <vscale x 8 x i8> %a to <vscale x 8 x i32>
+ %b.zext = zext <vscale x 8 x i8> %b to <vscale x 8 x i32>
+ %shl = and <vscale x 8 x i32> %a.zext, %b.zext
+ ret <vscale x 8 x i32> %shl
+}
+
+define <vscale x 8 x i32> @or(<vscale x 8 x i8> %a, <vscale x 8 x i8> %b) {
+; CHECK-LABEL: or:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma
+; CHECK-NEXT: vor.vv v12, v8, v9
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT: vzext.vf4 v8, v12
+; CHECK-NEXT: ret
+ %a.zext = zext <vscale x 8 x i8> %a to <vscale x 8 x i32>
+ %b.zext = zext <vscale x 8 x i8> %b to <vscale x 8 x i32>
+ %or = or <vscale x 8 x i32> %a.zext, %b.zext
+ ret <vscale x 8 x i32> %or
+}
+
+define <vscale x 8 x i32> @xor(<vscale x 8 x i8> %a, <vscale x 8 x i8> %b) {
+; CHECK-LABEL: xor:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma
+; CHECK-NEXT: vxor.vv v12, v8, v9
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT: vzext.vf4 v8, v12
+; CHECK-NEXT: ret
+ %a.zext = zext <vscale x 8 x i8> %a to <vscale x 8 x i32>
+ %b.zext = zext <vscale x 8 x i8> %b to <vscale x 8 x i32>
+ %xor = xor <vscale x 8 x i32> %a.zext, %b.zext
+ ret <vscale x 8 x i32> %xor
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/callee-saved-regs.ll b/llvm/test/CodeGen/RISCV/rvv/callee-saved-regs.ll
new file mode 100644
index 000000000000..84936d88e187
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/callee-saved-regs.ll
@@ -0,0 +1,95 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -mattr=+m -mattr=+v -O2 < %s \
+; RUN: | FileCheck --check-prefix=SPILL-O2 %s
+
+define <vscale x 1 x i32> @test_vector_std(<vscale x 1 x i32> %va) nounwind {
+; SPILL-O2-LABEL: test_vector_std:
+; SPILL-O2: # %bb.0: # %entry
+; SPILL-O2-NEXT: addi sp, sp, -16
+; SPILL-O2-NEXT: csrr a0, vlenb
+; SPILL-O2-NEXT: slli a0, a0, 1
+; SPILL-O2-NEXT: sub sp, sp, a0
+; SPILL-O2-NEXT: addi a0, sp, 16
+; SPILL-O2-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill
+; SPILL-O2-NEXT: #APP
+; SPILL-O2-NEXT: #NO_APP
+; SPILL-O2-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
+; SPILL-O2-NEXT: csrr a0, vlenb
+; SPILL-O2-NEXT: slli a0, a0, 1
+; SPILL-O2-NEXT: add sp, sp, a0
+; SPILL-O2-NEXT: addi sp, sp, 16
+; SPILL-O2-NEXT: ret
+entry:
+ call void asm sideeffect "",
+ "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"()
+
+ ret <vscale x 1 x i32> %va
+}
+
+define riscv_vector_cc <vscale x 1 x i32> @test_vector_callee(<vscale x 1 x i32> %va) nounwind {
+; SPILL-O2-LABEL: test_vector_callee:
+; SPILL-O2: # %bb.0: # %entry
+; SPILL-O2-NEXT: addi sp, sp, -16
+; SPILL-O2-NEXT: csrr a0, vlenb
+; SPILL-O2-NEXT: slli a0, a0, 4
+; SPILL-O2-NEXT: sub sp, sp, a0
+; SPILL-O2-NEXT: csrr a0, vlenb
+; SPILL-O2-NEXT: slli a1, a0, 4
+; SPILL-O2-NEXT: sub a0, a1, a0
+; SPILL-O2-NEXT: add a0, sp, a0
+; SPILL-O2-NEXT: addi a0, a0, 16
+; SPILL-O2-NEXT: vs1r.v v1, (a0) # Unknown-size Folded Spill
+; SPILL-O2-NEXT: csrr a0, vlenb
+; SPILL-O2-NEXT: li a1, 13
+; SPILL-O2-NEXT: mul a0, a0, a1
+; SPILL-O2-NEXT: add a0, sp, a0
+; SPILL-O2-NEXT: addi a0, a0, 16
+; SPILL-O2-NEXT: vs2r.v v2, (a0) # Unknown-size Folded Spill
+; SPILL-O2-NEXT: csrr a0, vlenb
+; SPILL-O2-NEXT: slli a1, a0, 3
+; SPILL-O2-NEXT: add a0, a1, a0
+; SPILL-O2-NEXT: add a0, sp, a0
+; SPILL-O2-NEXT: addi a0, a0, 16
+; SPILL-O2-NEXT: vs4r.v v4, (a0) # Unknown-size Folded Spill
+; SPILL-O2-NEXT: csrr a0, vlenb
+; SPILL-O2-NEXT: add a0, sp, a0
+; SPILL-O2-NEXT: addi a0, a0, 16
+; SPILL-O2-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
+; SPILL-O2-NEXT: addi a0, sp, 16
+; SPILL-O2-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill
+; SPILL-O2-NEXT: #APP
+; SPILL-O2-NEXT: #NO_APP
+; SPILL-O2-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
+; SPILL-O2-NEXT: csrr a0, vlenb
+; SPILL-O2-NEXT: slli a1, a0, 4
+; SPILL-O2-NEXT: sub a0, a1, a0
+; SPILL-O2-NEXT: add a0, sp, a0
+; SPILL-O2-NEXT: addi a0, a0, 16
+; SPILL-O2-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload
+; SPILL-O2-NEXT: csrr a0, vlenb
+; SPILL-O2-NEXT: li a1, 13
+; SPILL-O2-NEXT: mul a0, a0, a1
+; SPILL-O2-NEXT: add a0, sp, a0
+; SPILL-O2-NEXT: addi a0, a0, 16
+; SPILL-O2-NEXT: vl2r.v v2, (a0) # Unknown-size Folded Reload
+; SPILL-O2-NEXT: csrr a0, vlenb
+; SPILL-O2-NEXT: slli a1, a0, 3
+; SPILL-O2-NEXT: add a0, a1, a0
+; SPILL-O2-NEXT: add a0, sp, a0
+; SPILL-O2-NEXT: addi a0, a0, 16
+; SPILL-O2-NEXT: vl4r.v v4, (a0) # Unknown-size Folded Reload
+; SPILL-O2-NEXT: csrr a0, vlenb
+; SPILL-O2-NEXT: add a0, sp, a0
+; SPILL-O2-NEXT: addi a0, a0, 16
+; SPILL-O2-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
+; SPILL-O2-NEXT: csrr a0, vlenb
+; SPILL-O2-NEXT: slli a0, a0, 4
+; SPILL-O2-NEXT: add sp, sp, a0
+; SPILL-O2-NEXT: addi sp, sp, 16
+; SPILL-O2-NEXT: ret
+entry:
+ call void asm sideeffect "",
+ "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"()
+
+ ret <vscale x 1 x i32> %va
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll
index a4aef577bc9a..571e2df13c26 100644
--- a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll
@@ -1187,3 +1187,19 @@ define <vscale x 2 x i32> @vmerge_larger_vl_false_becomes_tail(<vscale x 2 x i32
%b = call <vscale x 2 x i32> @llvm.riscv.vmerge.nxv2i32.nxv2i32(<vscale x 2 x i32> poison, <vscale x 2 x i32> %false, <vscale x 2 x i32> %a, <vscale x 2 x i1> %m, i64 3)
ret <vscale x 2 x i32> %b
}
+
+; Test widening pseudos with their TIED variant (passthru same as first op).
+define <vscale x 2 x i64> @vpmerge_vwsub.w_tied(<vscale x 2 x i64> %passthru, <vscale x 2 x i64> %x, <vscale x 2 x i32> %y, <vscale x 2 x i1> %mask, i32 zeroext %vl) {
+; CHECK-LABEL: vpmerge_vwsub.w_tied:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, ma
+; CHECK-NEXT: vmv2r.v v10, v8
+; CHECK-NEXT: vwsub.wv v10, v10, v12
+; CHECK-NEXT: vsetvli zero, zero, e64, m2, tu, ma
+; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0
+; CHECK-NEXT: ret
+ %vl.zext = zext i32 %vl to i64
+ %a = call <vscale x 2 x i64> @llvm.riscv.vwsub.w.nxv2i64.nxv2i32(<vscale x 2 x i64> %passthru, <vscale x 2 x i64> %passthru, <vscale x 2 x i32> %y, i64 %vl.zext)
+ %b = call <vscale x 2 x i64> @llvm.vp.merge.nxv2i64(<vscale x 2 x i1> %mask, <vscale x 2 x i64> %a, <vscale x 2 x i64> %passthru, i32 %vl)
+ ret <vscale x 2 x i64> %b
+}
diff --git a/llvm/test/CodeGen/Thumb2/mve-vpt-optimisations.mir b/llvm/test/CodeGen/Thumb2/mve-vpt-optimisations.mir
index f28311e6563f..f9b175ed80fb 100644
--- a/llvm/test/CodeGen/Thumb2/mve-vpt-optimisations.mir
+++ b/llvm/test/CodeGen/Thumb2/mve-vpt-optimisations.mir
@@ -1,5 +1,5 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+armv8.1-m.main,+hwdiv,+mve.fp,+ras,+thumb-mode -run-pass arm-mve-vpt-opts %s -o - | FileCheck %s
+# RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+armv8.1-m.main,+hwdiv,+mve.fp,+ras,+thumb-mode -run-pass arm-mve-vpt-opts -verify-machineinstrs %s -o - | FileCheck %s
---
name: vcmp_with_opposite_cond
@@ -1021,3 +1021,26 @@ body: |
%16:mqpr = MVE_VORR %15, %15, 1, %10, $noreg, undef %16
%17:mqpr = MVE_VORR %16, %16, 1, %11, $noreg, undef %17
...
+---
+name: reuse_kill_flags
+alignment: 4
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: reuse_kill_flags
+ ; CHECK: [[t2MOVi:%[0-9]+]]:tgpreven = t2MOVi 0, 14 /* CC::al */, $noreg, $noreg
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vccr = COPY [[t2MOVi]]
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:mqpr = IMPLICIT_DEF
+ ; CHECK-NEXT: [[MVE_VORR:%[0-9]+]]:mqpr = MVE_VORR [[DEF]], [[DEF]], 1, [[COPY]], $noreg, undef [[MVE_VORR]]
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:mqpr = IMPLICIT_DEF
+ ; CHECK-NEXT: [[MVE_VORR1:%[0-9]+]]:mqpr = MVE_VORR [[DEF1]], [[DEF1]], 1, killed [[COPY]], $noreg, undef [[MVE_VORR1]]
+ ; CHECK-NEXT: tBX_RET 14 /* CC::al */, $noreg, implicit [[DEF1]]
+ %0:tgpreven = t2MOVi 0, 14, $noreg, $noreg
+ %1:vccr = COPY %0:tgpreven
+ %2:mqpr = IMPLICIT_DEF
+ %3:mqpr = MVE_VORR %2:mqpr, %2:mqpr, 1, killed %1, $noreg, undef %3
+ %4:vccr = COPY %0:tgpreven
+ %5:mqpr = IMPLICIT_DEF
+ %6:mqpr = MVE_VORR %5:mqpr, %5:mqpr, 1, killed %4, $noreg, undef %6
+ tBX_RET 14 /* CC::al */, $noreg, implicit %5:mqpr
+
+...
diff --git a/llvm/test/CodeGen/X86/combine-pavg.ll b/llvm/test/CodeGen/X86/combine-pavg.ll
index 9bb7fec7eeac..7a8ddf5178d3 100644
--- a/llvm/test/CodeGen/X86/combine-pavg.ll
+++ b/llvm/test/CodeGen/X86/combine-pavg.ll
@@ -80,3 +80,33 @@ define <16 x i8> @combine_pavgw_knownbits(<8 x i16> %a0, <8 x i16> %a1, <8 x i16
%trunc = trunc <16 x i16> %shuffle to <16 x i8>
ret <16 x i8> %trunc
}
+
+define <8 x i16> @combine_pavgw_demandedelts(<8 x i16> %a0, <8 x i16> %a1) {
+; SSE-LABEL: combine_pavgw_demandedelts:
+; SSE: # %bb.0:
+; SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
+; SSE-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,8,9,8,9,12,13,12,13]
+; SSE-NEXT: pavgw %xmm1, %xmm0
+; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
+; SSE-NEXT: retq
+;
+; AVX1-LABEL: combine_pavgw_demandedelts:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
+; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,8,9,8,9,12,13,12,13]
+; AVX1-NEXT: vpavgw %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: combine_pavgw_demandedelts:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpbroadcastw %xmm1, %xmm1
+; AVX2-NEXT: vpbroadcastw %xmm0, %xmm0
+; AVX2-NEXT: vpavgw %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: retq
+ %s0 = shufflevector <8 x i16> %a0, <8 x i16> poison, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
+ %avg = tail call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %s0, <8 x i16> %a1)
+ %shuffle = shufflevector <8 x i16> %avg, <8 x i16> poison, <8 x i32> zeroinitializer
+ ret <8 x i16> %shuffle
+}
+
diff --git a/llvm/test/CodeGen/X86/extractelement-load.ll b/llvm/test/CodeGen/X86/extractelement-load.ll
index e3e1cdcd7f56..022b25a24153 100644
--- a/llvm/test/CodeGen/X86/extractelement-load.ll
+++ b/llvm/test/CodeGen/X86/extractelement-load.ll
@@ -10,20 +10,13 @@ define i32 @t(ptr %val) nounwind {
; X86-SSE2-LABEL: t:
; X86-SSE2: # %bb.0:
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = mem[2,3,2,3]
-; X86-SSE2-NEXT: movd %xmm0, %eax
+; X86-SSE2-NEXT: movl 8(%eax), %eax
; X86-SSE2-NEXT: retl
;
-; X64-SSSE3-LABEL: t:
-; X64-SSSE3: # %bb.0:
-; X64-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = mem[2,3,2,3]
-; X64-SSSE3-NEXT: movd %xmm0, %eax
-; X64-SSSE3-NEXT: retq
-;
-; X64-AVX-LABEL: t:
-; X64-AVX: # %bb.0:
-; X64-AVX-NEXT: movl 8(%rdi), %eax
-; X64-AVX-NEXT: retq
+; X64-LABEL: t:
+; X64: # %bb.0:
+; X64-NEXT: movl 8(%rdi), %eax
+; X64-NEXT: retq
%tmp2 = load <2 x i64>, ptr %val, align 16 ; <<2 x i64>> [#uses=1]
%tmp3 = bitcast <2 x i64> %tmp2 to <4 x i32> ; <<4 x i32>> [#uses=1]
%tmp4 = extractelement <4 x i32> %tmp3, i32 2 ; <i32> [#uses=1]
@@ -83,11 +76,9 @@ bb:
define i64 @t4(ptr %a) {
; X86-SSE2-LABEL: t4:
; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-SSE2-NEXT: movdqa (%eax), %xmm0
-; X86-SSE2-NEXT: movd %xmm0, %eax
-; X86-SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
-; X86-SSE2-NEXT: movd %xmm0, %edx
+; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-SSE2-NEXT: movl (%ecx), %eax
+; X86-SSE2-NEXT: movl 4(%ecx), %edx
; X86-SSE2-NEXT: retl
;
; X64-LABEL: t4:
@@ -286,35 +277,25 @@ entry:
define i32 @PR85419(ptr %p0) {
; X86-SSE2-LABEL: PR85419:
; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-SSE2-NEXT: movdqa (%eax), %xmm0
-; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
-; X86-SSE2-NEXT: movd %xmm1, %ecx
-; X86-SSE2-NEXT: xorl %edx, %edx
-; X86-SSE2-NEXT: orl (%eax), %ecx
-; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
-; X86-SSE2-NEXT: movd %xmm0, %eax
-; X86-SSE2-NEXT: cmovel %edx, %eax
+; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-SSE2-NEXT: movl (%ecx), %edx
+; X86-SSE2-NEXT: xorl %eax, %eax
+; X86-SSE2-NEXT: orl 4(%ecx), %edx
+; X86-SSE2-NEXT: je .LBB8_2
+; X86-SSE2-NEXT: # %bb.1:
+; X86-SSE2-NEXT: movl 8(%ecx), %eax
+; X86-SSE2-NEXT: .LBB8_2:
; X86-SSE2-NEXT: retl
;
-; X64-SSSE3-LABEL: PR85419:
-; X64-SSSE3: # %bb.0:
-; X64-SSSE3-NEXT: xorl %ecx, %ecx
-; X64-SSSE3-NEXT: cmpq $0, (%rdi)
-; X64-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = mem[2,3,2,3]
-; X64-SSSE3-NEXT: movd %xmm0, %eax
-; X64-SSSE3-NEXT: cmovel %ecx, %eax
-; X64-SSSE3-NEXT: retq
-;
-; X64-AVX-LABEL: PR85419:
-; X64-AVX: # %bb.0:
-; X64-AVX-NEXT: xorl %eax, %eax
-; X64-AVX-NEXT: cmpq $0, (%rdi)
-; X64-AVX-NEXT: je .LBB8_2
-; X64-AVX-NEXT: # %bb.1:
-; X64-AVX-NEXT: movl 8(%rdi), %eax
-; X64-AVX-NEXT: .LBB8_2:
-; X64-AVX-NEXT: retq
+; X64-LABEL: PR85419:
+; X64: # %bb.0:
+; X64-NEXT: xorl %eax, %eax
+; X64-NEXT: cmpq $0, (%rdi)
+; X64-NEXT: je .LBB8_2
+; X64-NEXT: # %bb.1:
+; X64-NEXT: movl 8(%rdi), %eax
+; X64-NEXT: .LBB8_2:
+; X64-NEXT: retq
%load = load <2 x i64>, ptr %p0, align 16
%vecext.i = extractelement <2 x i64> %load, i64 0
%cmp = icmp eq i64 %vecext.i, 0
@@ -443,35 +424,35 @@ define i32 @main() nounwind {
; X86-SSE2: # %bb.0:
; X86-SSE2-NEXT: pushl %ebp
; X86-SSE2-NEXT: movl %esp, %ebp
+; X86-SSE2-NEXT: pushl %edi
; X86-SSE2-NEXT: pushl %esi
; X86-SSE2-NEXT: andl $-32, %esp
; X86-SSE2-NEXT: subl $64, %esp
-; X86-SSE2-NEXT: movdqa zero, %xmm0
-; X86-SSE2-NEXT: movaps n1+16, %xmm1
-; X86-SSE2-NEXT: movaps n1, %xmm2
-; X86-SSE2-NEXT: movaps %xmm2, zero
-; X86-SSE2-NEXT: movaps %xmm1, zero+16
-; X86-SSE2-NEXT: movaps {{.*#+}} xmm1 = [2,2,2,2]
-; X86-SSE2-NEXT: movaps %xmm1, {{[0-9]+}}(%esp)
-; X86-SSE2-NEXT: movaps %xmm1, (%esp)
-; X86-SSE2-NEXT: movdqa (%esp), %xmm1
-; X86-SSE2-NEXT: movaps {{[0-9]+}}(%esp), %xmm2
-; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3]
-; X86-SSE2-NEXT: movd %xmm2, %eax
-; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3]
-; X86-SSE2-NEXT: movd %xmm2, %ecx
+; X86-SSE2-NEXT: movaps n1+16, %xmm0
+; X86-SSE2-NEXT: movaps n1, %xmm1
+; X86-SSE2-NEXT: movl zero+4, %ecx
+; X86-SSE2-NEXT: movl zero+8, %eax
+; X86-SSE2-NEXT: movaps %xmm1, zero
+; X86-SSE2-NEXT: movaps %xmm0, zero+16
+; X86-SSE2-NEXT: movaps {{.*#+}} xmm0 = [2,2,2,2]
+; X86-SSE2-NEXT: movaps %xmm0, {{[0-9]+}}(%esp)
+; X86-SSE2-NEXT: movaps %xmm0, (%esp)
+; X86-SSE2-NEXT: movdqa (%esp), %xmm0
+; X86-SSE2-NEXT: movaps {{[0-9]+}}(%esp), %xmm1
+; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
+; X86-SSE2-NEXT: movd %xmm1, %esi
; X86-SSE2-NEXT: xorl %edx, %edx
-; X86-SSE2-NEXT: divl %ecx
-; X86-SSE2-NEXT: movl %eax, %ecx
+; X86-SSE2-NEXT: divl %esi
+; X86-SSE2-NEXT: movl %eax, %esi
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
-; X86-SSE2-NEXT: movd %xmm0, %eax
-; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1]
-; X86-SSE2-NEXT: movd %xmm0, %esi
+; X86-SSE2-NEXT: movd %xmm0, %edi
+; X86-SSE2-NEXT: movl %ecx, %eax
; X86-SSE2-NEXT: xorl %edx, %edx
-; X86-SSE2-NEXT: divl %esi
-; X86-SSE2-NEXT: addl %ecx, %eax
-; X86-SSE2-NEXT: leal -4(%ebp), %esp
+; X86-SSE2-NEXT: divl %edi
+; X86-SSE2-NEXT: addl %esi, %eax
+; X86-SSE2-NEXT: leal -8(%ebp), %esp
; X86-SSE2-NEXT: popl %esi
+; X86-SSE2-NEXT: popl %edi
; X86-SSE2-NEXT: popl %ebp
; X86-SSE2-NEXT: retl
;
@@ -481,31 +462,29 @@ define i32 @main() nounwind {
; X64-SSSE3-NEXT: movq %rsp, %rbp
; X64-SSSE3-NEXT: andq $-32, %rsp
; X64-SSSE3-NEXT: subq $64, %rsp
-; X64-SSSE3-NEXT: movdqa zero(%rip), %xmm0
; X64-SSSE3-NEXT: movq n1@GOTPCREL(%rip), %rax
-; X64-SSSE3-NEXT: movaps (%rax), %xmm1
-; X64-SSSE3-NEXT: movaps 16(%rax), %xmm2
-; X64-SSSE3-NEXT: movaps %xmm1, zero(%rip)
-; X64-SSSE3-NEXT: movaps %xmm2, zero+16(%rip)
-; X64-SSSE3-NEXT: movaps {{.*#+}} xmm1 = [2,2,2,2]
-; X64-SSSE3-NEXT: movaps %xmm1, {{[0-9]+}}(%rsp)
-; X64-SSSE3-NEXT: movaps %xmm1, (%rsp)
-; X64-SSSE3-NEXT: movdqa (%rsp), %xmm1
-; X64-SSSE3-NEXT: movaps {{[0-9]+}}(%rsp), %xmm2
-; X64-SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3]
-; X64-SSSE3-NEXT: movd %xmm2, %eax
-; X64-SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3]
-; X64-SSSE3-NEXT: movd %xmm2, %ecx
+; X64-SSSE3-NEXT: movaps (%rax), %xmm0
+; X64-SSSE3-NEXT: movaps 16(%rax), %xmm1
+; X64-SSSE3-NEXT: movl zero+4(%rip), %ecx
+; X64-SSSE3-NEXT: movl zero+8(%rip), %eax
+; X64-SSSE3-NEXT: movaps %xmm0, zero(%rip)
+; X64-SSSE3-NEXT: movaps %xmm1, zero+16(%rip)
+; X64-SSSE3-NEXT: movaps {{.*#+}} xmm0 = [2,2,2,2]
+; X64-SSSE3-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
+; X64-SSSE3-NEXT: movaps %xmm0, (%rsp)
+; X64-SSSE3-NEXT: movdqa (%rsp), %xmm0
+; X64-SSSE3-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1
+; X64-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
+; X64-SSSE3-NEXT: movd %xmm1, %esi
; X64-SSSE3-NEXT: xorl %edx, %edx
-; X64-SSSE3-NEXT: divl %ecx
-; X64-SSSE3-NEXT: movl %eax, %ecx
+; X64-SSSE3-NEXT: divl %esi
+; X64-SSSE3-NEXT: movl %eax, %esi
; X64-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
-; X64-SSSE3-NEXT: movd %xmm0, %eax
-; X64-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1]
-; X64-SSSE3-NEXT: movd %xmm0, %esi
+; X64-SSSE3-NEXT: movd %xmm0, %edi
+; X64-SSSE3-NEXT: movl %ecx, %eax
; X64-SSSE3-NEXT: xorl %edx, %edx
-; X64-SSSE3-NEXT: divl %esi
-; X64-SSSE3-NEXT: addl %ecx, %eax
+; X64-SSSE3-NEXT: divl %edi
+; X64-SSSE3-NEXT: addl %esi, %eax
; X64-SSSE3-NEXT: movq %rbp, %rsp
; X64-SSSE3-NEXT: popq %rbp
; X64-SSSE3-NEXT: retq
diff --git a/llvm/test/CodeGen/X86/huge-stack-offset.ll b/llvm/test/CodeGen/X86/huge-stack-offset.ll
index 68dcfa748b0c..e825328ccd89 100644
--- a/llvm/test/CodeGen/X86/huge-stack-offset.ll
+++ b/llvm/test/CodeGen/X86/huge-stack-offset.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -mtriple=x86_64-linux-unknown | FileCheck %s --check-prefix=CHECK-64
-; RUN: llc < %s -mtriple=i386-linux-unknown | FileCheck %s --check-prefix=CHECK-32
+; RUN: llc < %s -mtriple=x86_64-linux-unknown -verify-machineinstrs | FileCheck %s --check-prefix=CHECK-64
+; RUN: llc < %s -mtriple=i386-linux-unknown -verify-machineinstrs | FileCheck %s --check-prefix=CHECK-32
; Test that a large stack offset uses a single add/sub instruction to
; adjust the stack pointer.
diff --git a/llvm/test/CodeGen/X86/huge-stack-offset2.ll b/llvm/test/CodeGen/X86/huge-stack-offset2.ll
index 3bf0260cc12a..053643eb3686 100644
--- a/llvm/test/CodeGen/X86/huge-stack-offset2.ll
+++ b/llvm/test/CodeGen/X86/huge-stack-offset2.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s --check-prefix=CHECK
+; RUN: llc < %s -mtriple=x86_64-linux -verify-machineinstrs | FileCheck %s --check-prefix=CHECK
; Test how we handle pathologically large stack frames when RAX is live through
; the prologue and epilogue.
diff --git a/llvm/test/CodeGen/X86/masked_store.ll b/llvm/test/CodeGen/X86/masked_store.ll
index 898b34e969b1..6aa0a81c9020 100644
--- a/llvm/test/CodeGen/X86/masked_store.ll
+++ b/llvm/test/CodeGen/X86/masked_store.ll
@@ -12,7 +12,7 @@
; vXf64
;
-define void @store_v1f64_v1i64(<1 x i64> %trigger, ptr %addr, <1 x double> %val) {
+define void @store_v1f64_v1i64(<1 x i64> %trigger, ptr %addr, <1 x double> %val) nounwind {
; SSE-LABEL: store_v1f64_v1i64:
; SSE: ## %bb.0:
; SSE-NEXT: testq %rdi, %rdi
@@ -46,7 +46,7 @@ define void @store_v1f64_v1i64(<1 x i64> %trigger, ptr %addr, <1 x double> %val)
ret void
}
-define void @store_v2f64_v2i64(<2 x i64> %trigger, ptr %addr, <2 x double> %val) {
+define void @store_v2f64_v2i64(<2 x i64> %trigger, ptr %addr, <2 x double> %val) nounwind {
; SSE-LABEL: store_v2f64_v2i64:
; SSE: ## %bb.0:
; SSE-NEXT: movmskpd %xmm0, %eax
@@ -106,7 +106,7 @@ define void @store_v2f64_v2i64(<2 x i64> %trigger, ptr %addr, <2 x double> %val)
ret void
}
-define void @store_v4f64_v4i64(<4 x i64> %trigger, ptr %addr, <4 x double> %val) {
+define void @store_v4f64_v4i64(<4 x i64> %trigger, ptr %addr, <4 x double> %val) nounwind {
; SSE2-LABEL: store_v4f64_v4i64:
; SSE2: ## %bb.0:
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,3],xmm1[1,3]
@@ -222,7 +222,7 @@ define void @store_v4f64_v4i64(<4 x i64> %trigger, ptr %addr, <4 x double> %val)
; vXf32
;
-define void @store_v2f32_v2i32(<2 x i32> %trigger, ptr %addr, <2 x float> %val) {
+define void @store_v2f32_v2i32(<2 x i32> %trigger, ptr %addr, <2 x float> %val) nounwind {
; SSE2-LABEL: store_v2f32_v2i32:
; SSE2: ## %bb.0:
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
@@ -314,7 +314,7 @@ define void @store_v2f32_v2i32(<2 x i32> %trigger, ptr %addr, <2 x float> %val)
ret void
}
-define void @store_v4f32_v4i32(<4 x float> %x, ptr %ptr, <4 x float> %y, <4 x i32> %mask) {
+define void @store_v4f32_v4i32(<4 x float> %x, ptr %ptr, <4 x float> %y, <4 x i32> %mask) nounwind {
; SSE2-LABEL: store_v4f32_v4i32:
; SSE2: ## %bb.0:
; SSE2-NEXT: movmskps %xmm2, %eax
@@ -425,7 +425,7 @@ define void @store_v4f32_v4i32(<4 x float> %x, ptr %ptr, <4 x float> %y, <4 x i3
ret void
}
-define void @store_v8f32_v8i32(<8 x float> %x, ptr %ptr, <8 x float> %y, <8 x i32> %mask) {
+define void @store_v8f32_v8i32(<8 x float> %x, ptr %ptr, <8 x float> %y, <8 x i32> %mask) nounwind {
; SSE2-LABEL: store_v8f32_v8i32:
; SSE2: ## %bb.0:
; SSE2-NEXT: packssdw %xmm5, %xmm4
@@ -605,7 +605,7 @@ define void @store_v8f32_v8i32(<8 x float> %x, ptr %ptr, <8 x float> %y, <8 x i3
ret void
}
-define void @store_v16f32_v16i32(<16 x float> %x, ptr %ptr, <16 x float> %y, <16 x i32> %mask) {
+define void @store_v16f32_v16i32(<16 x float> %x, ptr %ptr, <16 x float> %y, <16 x i32> %mask) nounwind {
; SSE2-LABEL: store_v16f32_v16i32:
; SSE2: ## %bb.0:
; SSE2-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm4
@@ -914,7 +914,7 @@ define void @store_v16f32_v16i32(<16 x float> %x, ptr %ptr, <16 x float> %y, <16
; vXi64
;
-define void @store_v2i64_v2i64(<2 x i64> %trigger, ptr %addr, <2 x i64> %val) {
+define void @store_v2i64_v2i64(<2 x i64> %trigger, ptr %addr, <2 x i64> %val) nounwind {
; SSE2-LABEL: store_v2i64_v2i64:
; SSE2: ## %bb.0:
; SSE2-NEXT: movmskpd %xmm0, %eax
@@ -998,7 +998,7 @@ define void @store_v2i64_v2i64(<2 x i64> %trigger, ptr %addr, <2 x i64> %val) {
ret void
}
-define void @store_v4i64_v4i64(<4 x i64> %trigger, ptr %addr, <4 x i64> %val) {
+define void @store_v4i64_v4i64(<4 x i64> %trigger, ptr %addr, <4 x i64> %val) nounwind {
; SSE2-LABEL: store_v4i64_v4i64:
; SSE2: ## %bb.0:
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,3],xmm1[1,3]
@@ -1122,7 +1122,7 @@ define void @store_v4i64_v4i64(<4 x i64> %trigger, ptr %addr, <4 x i64> %val) {
; vXi32
;
-define void @store_v1i32_v1i32(<1 x i32> %trigger, ptr %addr, <1 x i32> %val) {
+define void @store_v1i32_v1i32(<1 x i32> %trigger, ptr %addr, <1 x i32> %val) nounwind {
; SSE-LABEL: store_v1i32_v1i32:
; SSE: ## %bb.0:
; SSE-NEXT: testl %edi, %edi
@@ -1156,7 +1156,7 @@ define void @store_v1i32_v1i32(<1 x i32> %trigger, ptr %addr, <1 x i32> %val) {
ret void
}
-define void @store_v2i32_v2i32(<2 x i32> %trigger, ptr %addr, <2 x i32> %val) {
+define void @store_v2i32_v2i32(<2 x i32> %trigger, ptr %addr, <2 x i32> %val) nounwind {
; SSE2-LABEL: store_v2i32_v2i32:
; SSE2: ## %bb.0:
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
@@ -1256,7 +1256,7 @@ define void @store_v2i32_v2i32(<2 x i32> %trigger, ptr %addr, <2 x i32> %val) {
ret void
}
-define void @store_v4i32_v4i32(<4 x i32> %trigger, ptr %addr, <4 x i32> %val) {
+define void @store_v4i32_v4i32(<4 x i32> %trigger, ptr %addr, <4 x i32> %val) nounwind {
; SSE2-LABEL: store_v4i32_v4i32:
; SSE2: ## %bb.0:
; SSE2-NEXT: pxor %xmm2, %xmm2
@@ -1370,7 +1370,7 @@ define void @store_v4i32_v4i32(<4 x i32> %trigger, ptr %addr, <4 x i32> %val) {
ret void
}
-define void @store_v8i32_v8i32(<8 x i32> %trigger, ptr %addr, <8 x i32> %val) {
+define void @store_v8i32_v8i32(<8 x i32> %trigger, ptr %addr, <8 x i32> %val) nounwind {
; SSE2-LABEL: store_v8i32_v8i32:
; SSE2: ## %bb.0:
; SSE2-NEXT: pxor %xmm4, %xmm4
@@ -1560,7 +1560,7 @@ define void @store_v8i32_v8i32(<8 x i32> %trigger, ptr %addr, <8 x i32> %val) {
; vXi16
;
-define void @store_v8i16_v8i16(<8 x i16> %trigger, ptr %addr, <8 x i16> %val) {
+define void @store_v8i16_v8i16(<8 x i16> %trigger, ptr %addr, <8 x i16> %val) nounwind {
; SSE2-LABEL: store_v8i16_v8i16:
; SSE2: ## %bb.0:
; SSE2-NEXT: pxor %xmm2, %xmm2
@@ -1907,7 +1907,7 @@ define void @store_v8i16_v8i16(<8 x i16> %trigger, ptr %addr, <8 x i16> %val) {
ret void
}
-define void @store_v16i16_v16i16(<16 x i16> %trigger, ptr %addr, <16 x i16> %val) {
+define void @store_v16i16_v16i16(<16 x i16> %trigger, ptr %addr, <16 x i16> %val) nounwind {
; SSE2-LABEL: store_v16i16_v16i16:
; SSE2: ## %bb.0:
; SSE2-NEXT: pxor %xmm4, %xmm4
@@ -2676,7 +2676,7 @@ define void @store_v16i16_v16i16(<16 x i16> %trigger, ptr %addr, <16 x i16> %val
; vXi8
;
-define void @store_v16i8_v16i8(<16 x i8> %trigger, ptr %addr, <16 x i8> %val) {
+define void @store_v16i8_v16i8(<16 x i8> %trigger, ptr %addr, <16 x i8> %val) nounwind {
; SSE2-LABEL: store_v16i8_v16i8:
; SSE2: ## %bb.0:
; SSE2-NEXT: pxor %xmm2, %xmm2
@@ -3273,7 +3273,7 @@ define void @store_v16i8_v16i8(<16 x i8> %trigger, ptr %addr, <16 x i8> %val) {
ret void
}
-define void @store_v32i8_v32i8(<32 x i8> %trigger, ptr %addr, <32 x i8> %val) {
+define void @store_v32i8_v32i8(<32 x i8> %trigger, ptr %addr, <32 x i8> %val) nounwind {
; SSE2-LABEL: store_v32i8_v32i8:
; SSE2: ## %bb.0:
; SSE2-NEXT: pxor %xmm4, %xmm4
@@ -4670,7 +4670,7 @@ define void @store_v32i8_v32i8(<32 x i8> %trigger, ptr %addr, <32 x i8> %val) {
;;; Stores with Constant Masks
-define void @mstore_constmask_v4i32_v4i32(<4 x i32> %trigger, ptr %addr, <4 x i32> %val) {
+define void @mstore_constmask_v4i32_v4i32(<4 x i32> %trigger, ptr %addr, <4 x i32> %val) nounwind {
; SSE-LABEL: mstore_constmask_v4i32_v4i32:
; SSE: ## %bb.0:
; SSE-NEXT: movups %xmm1, (%rdi)
@@ -4693,7 +4693,7 @@ define void @mstore_constmask_v4i32_v4i32(<4 x i32> %trigger, ptr %addr, <4 x i3
; Make sure we are able to detect all ones constant mask after type legalization
; to avoid masked stores.
-define void @mstore_constmask_allones_split(<16 x i64> %trigger, ptr %addr, <16 x i64> %val) {
+define void @mstore_constmask_allones_split(<16 x i64> %trigger, ptr %addr, <16 x i64> %val) nounwind {
; SSE2-LABEL: mstore_constmask_allones_split:
; SSE2: ## %bb.0:
; SSE2-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm0
@@ -4810,7 +4810,7 @@ define void @mstore_constmask_allones_split(<16 x i64> %trigger, ptr %addr, <16
; When only one element of the mask is set, reduce to a scalar store.
-define void @one_mask_bit_set1(ptr %addr, <4 x i32> %val) {
+define void @one_mask_bit_set1(ptr %addr, <4 x i32> %val) nounwind {
; SSE-LABEL: one_mask_bit_set1:
; SSE: ## %bb.0:
; SSE-NEXT: movss %xmm0, (%rdi)
@@ -4832,7 +4832,7 @@ define void @one_mask_bit_set1(ptr %addr, <4 x i32> %val) {
; Choose a different element to show that the correct address offset is produced.
-define void @one_mask_bit_set2(ptr %addr, <4 x float> %val) {
+define void @one_mask_bit_set2(ptr %addr, <4 x float> %val) nounwind {
; SSE2-LABEL: one_mask_bit_set2:
; SSE2: ## %bb.0:
; SSE2-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
@@ -4860,7 +4860,7 @@ define void @one_mask_bit_set2(ptr %addr, <4 x float> %val) {
; Choose a different scalar type and a high element of a 256-bit vector because AVX doesn't support those evenly.
-define void @one_mask_bit_set3(ptr %addr, <4 x i64> %val) {
+define void @one_mask_bit_set3(ptr %addr, <4 x i64> %val) nounwind {
; SSE-LABEL: one_mask_bit_set3:
; SSE: ## %bb.0:
; SSE-NEXT: movlps %xmm1, 16(%rdi)
@@ -4886,7 +4886,7 @@ define void @one_mask_bit_set3(ptr %addr, <4 x i64> %val) {
; Choose a different scalar type and a high element of a 256-bit vector because AVX doesn't support those evenly.
-define void @one_mask_bit_set4(ptr %addr, <4 x double> %val) {
+define void @one_mask_bit_set4(ptr %addr, <4 x double> %val) nounwind {
; SSE-LABEL: one_mask_bit_set4:
; SSE: ## %bb.0:
; SSE-NEXT: movhps %xmm1, 24(%rdi)
@@ -4912,7 +4912,7 @@ define void @one_mask_bit_set4(ptr %addr, <4 x double> %val) {
; Try a 512-bit vector to make sure AVX doesn't die and AVX512 works as expected.
-define void @one_mask_bit_set5(ptr %addr, <8 x double> %val) {
+define void @one_mask_bit_set5(ptr %addr, <8 x double> %val) nounwind {
; SSE-LABEL: one_mask_bit_set5:
; SSE: ## %bb.0:
; SSE-NEXT: movlps %xmm3, 48(%rdi)
@@ -4944,7 +4944,7 @@ define void @one_mask_bit_set5(ptr %addr, <8 x double> %val) {
}
; Try one elt in each half of a vector that needs to split
-define void @one_mask_bit_set6(ptr %addr, <16 x i64> %val) {
+define void @one_mask_bit_set6(ptr %addr, <16 x i64> %val) nounwind {
; SSE2-LABEL: one_mask_bit_set6:
; SSE2: ## %bb.0:
; SSE2-NEXT: movlps %xmm3, 48(%rdi)
@@ -4999,7 +4999,7 @@ define void @one_mask_bit_set6(ptr %addr, <16 x i64> %val) {
ret void
}
-define void @top_bits_unset_stack() {
+define void @top_bits_unset_stack() nounwind {
; SSE-LABEL: top_bits_unset_stack:
; SSE: ## %bb.0: ## %entry
; SSE-NEXT: xorps %xmm0, %xmm0
@@ -5047,7 +5047,6 @@ define void @top_bits_unset_stack() {
; X86-AVX512-LABEL: top_bits_unset_stack:
; X86-AVX512: ## %bb.0: ## %entry
; X86-AVX512-NEXT: subl $76, %esp
-; X86-AVX512-NEXT: .cfi_def_cfa_offset 80
; X86-AVX512-NEXT: vxorpd %xmm0, %xmm0, %xmm0
; X86-AVX512-NEXT: movb $63, %al
; X86-AVX512-NEXT: kmovd %eax, %k1
@@ -5064,7 +5063,7 @@ entry:
; SimplifyDemandedBits eliminates an ashr here.
-define void @masked_store_bool_mask_demand_trunc_sext(<4 x double> %x, ptr %p, <4 x i32> %masksrc) {
+define void @masked_store_bool_mask_demand_trunc_sext(<4 x double> %x, ptr %p, <4 x i32> %masksrc) nounwind {
; SSE-LABEL: masked_store_bool_mask_demand_trunc_sext:
; SSE: ## %bb.0:
; SSE-NEXT: pslld $31, %xmm2
@@ -5160,7 +5159,7 @@ define void @masked_store_bool_mask_demand_trunc_sext(<4 x double> %x, ptr %p, <
; PR26697
-define void @one_mask_bit_set1_variable(ptr %addr, <4 x float> %val, <4 x i32> %mask) {
+define void @one_mask_bit_set1_variable(ptr %addr, <4 x float> %val, <4 x i32> %mask) nounwind {
; SSE2-LABEL: one_mask_bit_set1_variable:
; SSE2: ## %bb.0:
; SSE2-NEXT: movmskps %xmm1, %eax
@@ -5267,7 +5266,7 @@ define void @one_mask_bit_set1_variable(ptr %addr, <4 x float> %val, <4 x i32> %
; This needs to be widened to v4i32.
; This used to assert in type legalization. PR38436
; FIXME: The codegen for AVX512 should use KSHIFT to zero the upper bits of the mask.
-define void @widen_masked_store(<3 x i32> %v, ptr %p, <3 x i1> %mask) {
+define void @widen_masked_store(<3 x i32> %v, ptr %p, <3 x i1> %mask) nounwind {
; SSE2-LABEL: widen_masked_store:
; SSE2: ## %bb.0:
; SSE2-NEXT: andb $1, %sil
@@ -5448,7 +5447,7 @@ define void @widen_masked_store(<3 x i32> %v, ptr %p, <3 x i1> %mask) {
ret void
}
-define void @zero_mask(ptr %addr, <2 x double> %val) {
+define void @zero_mask(ptr %addr, <2 x double> %val) nounwind {
; SSE-LABEL: zero_mask:
; SSE: ## %bb.0:
; SSE-NEXT: retq
@@ -5464,7 +5463,7 @@ define void @zero_mask(ptr %addr, <2 x double> %val) {
ret void
}
-define void @PR11210(<4 x float> %x, ptr %ptr, <4 x float> %y, <2 x i64> %mask) {
+define void @PR11210(<4 x float> %x, ptr %ptr, <4 x float> %y, <2 x i64> %mask) nounwind {
; SSE2-LABEL: PR11210:
; SSE2: ## %bb.0:
; SSE2-NEXT: movmskps %xmm2, %eax
@@ -5638,492 +5637,248 @@ define void @PR11210(<4 x float> %x, ptr %ptr, <4 x float> %y, <2 x i64> %mask)
ret void
}
-define void @store_v24i32_v24i32_stride6_vf4_only_even_numbered_elts(ptr %trigger.ptr, ptr %val.ptr, ptr %dst) {
-; SSE2-LABEL: store_v24i32_v24i32_stride6_vf4_only_even_numbered_elts:
-; SSE2: ## %bb.0:
-; SSE2-NEXT: movdqa (%rdi), %xmm6
-; SSE2-NEXT: movdqa 32(%rdi), %xmm7
-; SSE2-NEXT: movdqa 64(%rdi), %xmm8
-; SSE2-NEXT: movl 80(%rsi), %eax
-; SSE2-NEXT: movl 64(%rsi), %r8d
-; SSE2-NEXT: movl 48(%rsi), %r9d
-; SSE2-NEXT: movl 32(%rsi), %r10d
-; SSE2-NEXT: movl 16(%rsi), %r11d
-; SSE2-NEXT: movdqa 80(%rsi), %xmm0
-; SSE2-NEXT: movdqa 64(%rsi), %xmm1
-; SSE2-NEXT: movdqa 48(%rsi), %xmm2
-; SSE2-NEXT: movdqa 32(%rsi), %xmm3
-; SSE2-NEXT: movdqa 16(%rsi), %xmm4
-; SSE2-NEXT: movdqa (%rsi), %xmm5
-; SSE2-NEXT: packssdw 48(%rdi), %xmm7
-; SSE2-NEXT: packssdw 16(%rdi), %xmm6
-; SSE2-NEXT: packsswb %xmm7, %xmm6
-; SSE2-NEXT: packssdw 80(%rdi), %xmm8
-; SSE2-NEXT: packsswb %xmm8, %xmm8
-; SSE2-NEXT: pmovmskb %xmm6, %edi
-; SSE2-NEXT: andl $21845, %edi ## imm = 0x5555
-; SSE2-NEXT: pmovmskb %xmm8, %ecx
-; SSE2-NEXT: andl $85, %ecx
-; SSE2-NEXT: shll $16, %ecx
-; SSE2-NEXT: orl %edi, %ecx
-; SSE2-NEXT: testb $1, %cl
-; SSE2-NEXT: jne LBB31_1
-; SSE2-NEXT: ## %bb.2: ## %else
-; SSE2-NEXT: testb $2, %cl
-; SSE2-NEXT: jne LBB31_3
-; SSE2-NEXT: LBB31_4: ## %else2
-; SSE2-NEXT: testb $4, %cl
-; SSE2-NEXT: jne LBB31_5
-; SSE2-NEXT: LBB31_6: ## %else4
-; SSE2-NEXT: testb $8, %cl
-; SSE2-NEXT: jne LBB31_7
-; SSE2-NEXT: LBB31_8: ## %else6
-; SSE2-NEXT: testb $16, %cl
-; SSE2-NEXT: jne LBB31_9
-; SSE2-NEXT: LBB31_10: ## %else8
-; SSE2-NEXT: testb $32, %cl
-; SSE2-NEXT: jne LBB31_11
-; SSE2-NEXT: LBB31_12: ## %else10
-; SSE2-NEXT: testb $64, %cl
-; SSE2-NEXT: jne LBB31_13
-; SSE2-NEXT: LBB31_14: ## %else12
-; SSE2-NEXT: testb %cl, %cl
-; SSE2-NEXT: js LBB31_15
-; SSE2-NEXT: LBB31_16: ## %else14
-; SSE2-NEXT: testl $256, %ecx ## imm = 0x100
-; SSE2-NEXT: jne LBB31_17
-; SSE2-NEXT: LBB31_18: ## %else16
-; SSE2-NEXT: testl $512, %ecx ## imm = 0x200
-; SSE2-NEXT: jne LBB31_19
-; SSE2-NEXT: LBB31_20: ## %else18
-; SSE2-NEXT: testl $1024, %ecx ## imm = 0x400
-; SSE2-NEXT: jne LBB31_21
-; SSE2-NEXT: LBB31_22: ## %else20
-; SSE2-NEXT: testl $2048, %ecx ## imm = 0x800
-; SSE2-NEXT: jne LBB31_23
-; SSE2-NEXT: LBB31_24: ## %else22
-; SSE2-NEXT: testl $4096, %ecx ## imm = 0x1000
-; SSE2-NEXT: jne LBB31_25
-; SSE2-NEXT: LBB31_26: ## %else24
-; SSE2-NEXT: testl $8192, %ecx ## imm = 0x2000
-; SSE2-NEXT: jne LBB31_27
-; SSE2-NEXT: LBB31_28: ## %else26
-; SSE2-NEXT: testl $16384, %ecx ## imm = 0x4000
-; SSE2-NEXT: jne LBB31_29
-; SSE2-NEXT: LBB31_30: ## %else28
-; SSE2-NEXT: testw %cx, %cx
-; SSE2-NEXT: js LBB31_31
-; SSE2-NEXT: LBB31_32: ## %else30
-; SSE2-NEXT: testl $65536, %ecx ## imm = 0x10000
-; SSE2-NEXT: jne LBB31_33
-; SSE2-NEXT: LBB31_34: ## %else32
-; SSE2-NEXT: testl $131072, %ecx ## imm = 0x20000
-; SSE2-NEXT: jne LBB31_35
-; SSE2-NEXT: LBB31_36: ## %else34
-; SSE2-NEXT: testl $262144, %ecx ## imm = 0x40000
-; SSE2-NEXT: jne LBB31_37
-; SSE2-NEXT: LBB31_38: ## %else36
-; SSE2-NEXT: testl $524288, %ecx ## imm = 0x80000
-; SSE2-NEXT: jne LBB31_39
-; SSE2-NEXT: LBB31_40: ## %else38
-; SSE2-NEXT: testl $1048576, %ecx ## imm = 0x100000
-; SSE2-NEXT: jne LBB31_41
-; SSE2-NEXT: LBB31_42: ## %else40
-; SSE2-NEXT: testl $2097152, %ecx ## imm = 0x200000
-; SSE2-NEXT: jne LBB31_43
-; SSE2-NEXT: LBB31_44: ## %else42
-; SSE2-NEXT: testl $4194304, %ecx ## imm = 0x400000
-; SSE2-NEXT: je LBB31_46
-; SSE2-NEXT: LBB31_45: ## %cond.store43
-; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
-; SSE2-NEXT: movd %xmm1, %eax
-; SSE2-NEXT: movl %eax, 88(%rdx)
-; SSE2-NEXT: LBB31_46: ## %else44
-; SSE2-NEXT: movb $1, %al
-; SSE2-NEXT: testb %al, %al
-; SSE2-NEXT: jne LBB31_48
-; SSE2-NEXT: ## %bb.47: ## %cond.store45
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3]
-; SSE2-NEXT: movd %xmm0, %eax
-; SSE2-NEXT: movl %eax, 92(%rdx)
-; SSE2-NEXT: LBB31_48: ## %else46
-; SSE2-NEXT: retq
-; SSE2-NEXT: LBB31_1: ## %cond.store
-; SSE2-NEXT: movl (%rsi), %esi
-; SSE2-NEXT: movl %esi, (%rdx)
-; SSE2-NEXT: testb $2, %cl
-; SSE2-NEXT: je LBB31_4
-; SSE2-NEXT: LBB31_3: ## %cond.store1
-; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm5[1,1,1,1]
-; SSE2-NEXT: movd %xmm6, %esi
-; SSE2-NEXT: movl %esi, 4(%rdx)
-; SSE2-NEXT: testb $4, %cl
-; SSE2-NEXT: je LBB31_6
-; SSE2-NEXT: LBB31_5: ## %cond.store3
-; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm5[2,3,2,3]
-; SSE2-NEXT: movd %xmm6, %esi
-; SSE2-NEXT: movl %esi, 8(%rdx)
-; SSE2-NEXT: testb $8, %cl
-; SSE2-NEXT: je LBB31_8
-; SSE2-NEXT: LBB31_7: ## %cond.store5
-; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm5[3,3,3,3]
-; SSE2-NEXT: movd %xmm5, %esi
-; SSE2-NEXT: movl %esi, 12(%rdx)
-; SSE2-NEXT: testb $16, %cl
-; SSE2-NEXT: je LBB31_10
-; SSE2-NEXT: LBB31_9: ## %cond.store7
-; SSE2-NEXT: movl %r11d, 16(%rdx)
-; SSE2-NEXT: testb $32, %cl
-; SSE2-NEXT: je LBB31_12
-; SSE2-NEXT: LBB31_11: ## %cond.store9
-; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[1,1,1,1]
-; SSE2-NEXT: movd %xmm5, %esi
-; SSE2-NEXT: movl %esi, 20(%rdx)
-; SSE2-NEXT: testb $64, %cl
-; SSE2-NEXT: je LBB31_14
-; SSE2-NEXT: LBB31_13: ## %cond.store11
-; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[2,3,2,3]
-; SSE2-NEXT: movd %xmm5, %esi
-; SSE2-NEXT: movl %esi, 24(%rdx)
-; SSE2-NEXT: testb %cl, %cl
-; SSE2-NEXT: jns LBB31_16
-; SSE2-NEXT: LBB31_15: ## %cond.store13
-; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[3,3,3,3]
-; SSE2-NEXT: movd %xmm4, %esi
-; SSE2-NEXT: movl %esi, 28(%rdx)
-; SSE2-NEXT: testl $256, %ecx ## imm = 0x100
-; SSE2-NEXT: je LBB31_18
-; SSE2-NEXT: LBB31_17: ## %cond.store15
-; SSE2-NEXT: movl %r10d, 32(%rdx)
-; SSE2-NEXT: testl $512, %ecx ## imm = 0x200
-; SSE2-NEXT: je LBB31_20
-; SSE2-NEXT: LBB31_19: ## %cond.store17
-; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm3[1,1,1,1]
-; SSE2-NEXT: movd %xmm4, %esi
-; SSE2-NEXT: movl %esi, 36(%rdx)
-; SSE2-NEXT: testl $1024, %ecx ## imm = 0x400
-; SSE2-NEXT: je LBB31_22
-; SSE2-NEXT: LBB31_21: ## %cond.store19
-; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm3[2,3,2,3]
-; SSE2-NEXT: movd %xmm4, %esi
-; SSE2-NEXT: movl %esi, 40(%rdx)
-; SSE2-NEXT: testl $2048, %ecx ## imm = 0x800
-; SSE2-NEXT: je LBB31_24
-; SSE2-NEXT: LBB31_23: ## %cond.store21
-; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[3,3,3,3]
-; SSE2-NEXT: movd %xmm3, %esi
-; SSE2-NEXT: movl %esi, 44(%rdx)
-; SSE2-NEXT: testl $4096, %ecx ## imm = 0x1000
-; SSE2-NEXT: je LBB31_26
-; SSE2-NEXT: LBB31_25: ## %cond.store23
-; SSE2-NEXT: movl %r9d, 48(%rdx)
-; SSE2-NEXT: testl $8192, %ecx ## imm = 0x2000
-; SSE2-NEXT: je LBB31_28
-; SSE2-NEXT: LBB31_27: ## %cond.store25
-; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[1,1,1,1]
-; SSE2-NEXT: movd %xmm3, %esi
-; SSE2-NEXT: movl %esi, 52(%rdx)
-; SSE2-NEXT: testl $16384, %ecx ## imm = 0x4000
-; SSE2-NEXT: je LBB31_30
-; SSE2-NEXT: LBB31_29: ## %cond.store27
-; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[2,3,2,3]
-; SSE2-NEXT: movd %xmm3, %esi
-; SSE2-NEXT: movl %esi, 56(%rdx)
-; SSE2-NEXT: testw %cx, %cx
-; SSE2-NEXT: jns LBB31_32
-; SSE2-NEXT: LBB31_31: ## %cond.store29
-; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[3,3,3,3]
-; SSE2-NEXT: movd %xmm2, %esi
-; SSE2-NEXT: movl %esi, 60(%rdx)
-; SSE2-NEXT: testl $65536, %ecx ## imm = 0x10000
-; SSE2-NEXT: je LBB31_34
-; SSE2-NEXT: LBB31_33: ## %cond.store31
-; SSE2-NEXT: movl %r8d, 64(%rdx)
-; SSE2-NEXT: testl $131072, %ecx ## imm = 0x20000
-; SSE2-NEXT: je LBB31_36
-; SSE2-NEXT: LBB31_35: ## %cond.store33
-; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,1,1]
-; SSE2-NEXT: movd %xmm2, %esi
-; SSE2-NEXT: movl %esi, 68(%rdx)
-; SSE2-NEXT: testl $262144, %ecx ## imm = 0x40000
-; SSE2-NEXT: je LBB31_38
-; SSE2-NEXT: LBB31_37: ## %cond.store35
-; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3]
-; SSE2-NEXT: movd %xmm2, %esi
-; SSE2-NEXT: movl %esi, 72(%rdx)
-; SSE2-NEXT: testl $524288, %ecx ## imm = 0x80000
-; SSE2-NEXT: je LBB31_40
-; SSE2-NEXT: LBB31_39: ## %cond.store37
-; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[3,3,3,3]
-; SSE2-NEXT: movd %xmm1, %esi
-; SSE2-NEXT: movl %esi, 76(%rdx)
-; SSE2-NEXT: testl $1048576, %ecx ## imm = 0x100000
-; SSE2-NEXT: je LBB31_42
-; SSE2-NEXT: LBB31_41: ## %cond.store39
-; SSE2-NEXT: movl %eax, 80(%rdx)
-; SSE2-NEXT: testl $2097152, %ecx ## imm = 0x200000
-; SSE2-NEXT: je LBB31_44
-; SSE2-NEXT: LBB31_43: ## %cond.store41
-; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
-; SSE2-NEXT: movd %xmm1, %eax
-; SSE2-NEXT: movl %eax, 84(%rdx)
-; SSE2-NEXT: testl $4194304, %ecx ## imm = 0x400000
-; SSE2-NEXT: jne LBB31_45
-; SSE2-NEXT: jmp LBB31_46
-;
-; SSE4-LABEL: store_v24i32_v24i32_stride6_vf4_only_even_numbered_elts:
-; SSE4: ## %bb.0:
-; SSE4-NEXT: pushq %rbp
-; SSE4-NEXT: .cfi_def_cfa_offset 16
-; SSE4-NEXT: pushq %r15
-; SSE4-NEXT: .cfi_def_cfa_offset 24
-; SSE4-NEXT: pushq %r14
-; SSE4-NEXT: .cfi_def_cfa_offset 32
-; SSE4-NEXT: pushq %r13
-; SSE4-NEXT: .cfi_def_cfa_offset 40
-; SSE4-NEXT: pushq %r12
-; SSE4-NEXT: .cfi_def_cfa_offset 48
-; SSE4-NEXT: pushq %rbx
-; SSE4-NEXT: .cfi_def_cfa_offset 56
-; SSE4-NEXT: .cfi_offset %rbx, -56
-; SSE4-NEXT: .cfi_offset %r12, -48
-; SSE4-NEXT: .cfi_offset %r13, -40
-; SSE4-NEXT: .cfi_offset %r14, -32
-; SSE4-NEXT: .cfi_offset %r15, -24
-; SSE4-NEXT: .cfi_offset %rbp, -16
-; SSE4-NEXT: movdqa (%rdi), %xmm1
-; SSE4-NEXT: movdqa 32(%rdi), %xmm2
-; SSE4-NEXT: movdqa 64(%rdi), %xmm0
-; SSE4-NEXT: movl 92(%rsi), %eax
-; SSE4-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill
-; SSE4-NEXT: movl 88(%rsi), %eax
-; SSE4-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill
-; SSE4-NEXT: movl 84(%rsi), %eax
-; SSE4-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill
-; SSE4-NEXT: movl 80(%rsi), %eax
-; SSE4-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill
-; SSE4-NEXT: movl 76(%rsi), %eax
-; SSE4-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill
-; SSE4-NEXT: movl 72(%rsi), %eax
-; SSE4-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill
-; SSE4-NEXT: movl 68(%rsi), %eax
-; SSE4-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill
-; SSE4-NEXT: movl 64(%rsi), %eax
-; SSE4-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill
-; SSE4-NEXT: movl 60(%rsi), %eax
-; SSE4-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill
-; SSE4-NEXT: movl 56(%rsi), %eax
-; SSE4-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill
-; SSE4-NEXT: movl 52(%rsi), %eax
-; SSE4-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill
-; SSE4-NEXT: packssdw 48(%rdi), %xmm2
-; SSE4-NEXT: packssdw 16(%rdi), %xmm1
-; SSE4-NEXT: packsswb %xmm2, %xmm1
-; SSE4-NEXT: packssdw 80(%rdi), %xmm0
-; SSE4-NEXT: packsswb %xmm0, %xmm0
-; SSE4-NEXT: pmovmskb %xmm1, %eax
-; SSE4-NEXT: andl $21845, %eax ## imm = 0x5555
-; SSE4-NEXT: pmovmskb %xmm0, %edi
-; SSE4-NEXT: andl $85, %edi
-; SSE4-NEXT: shll $16, %edi
-; SSE4-NEXT: orl %eax, %edi
-; SSE4-NEXT: movl 48(%rsi), %r13d
-; SSE4-NEXT: testb $1, %dil
-; SSE4-NEXT: movl 44(%rsi), %eax
-; SSE4-NEXT: movl 40(%rsi), %ecx
-; SSE4-NEXT: movl 36(%rsi), %r8d
-; SSE4-NEXT: movl 32(%rsi), %r9d
-; SSE4-NEXT: movl 28(%rsi), %r10d
-; SSE4-NEXT: movl 24(%rsi), %r11d
-; SSE4-NEXT: movl 20(%rsi), %ebx
-; SSE4-NEXT: movl 16(%rsi), %ebp
-; SSE4-NEXT: movl 12(%rsi), %r14d
-; SSE4-NEXT: movl 8(%rsi), %r15d
-; SSE4-NEXT: movl 4(%rsi), %r12d
-; SSE4-NEXT: jne LBB31_1
-; SSE4-NEXT: ## %bb.2: ## %else
-; SSE4-NEXT: testb $2, %dil
-; SSE4-NEXT: jne LBB31_3
-; SSE4-NEXT: LBB31_4: ## %else2
-; SSE4-NEXT: testb $4, %dil
-; SSE4-NEXT: jne LBB31_5
-; SSE4-NEXT: LBB31_6: ## %else4
-; SSE4-NEXT: testb $8, %dil
-; SSE4-NEXT: jne LBB31_7
-; SSE4-NEXT: LBB31_8: ## %else6
-; SSE4-NEXT: testb $16, %dil
-; SSE4-NEXT: jne LBB31_9
-; SSE4-NEXT: LBB31_10: ## %else8
-; SSE4-NEXT: testb $32, %dil
-; SSE4-NEXT: jne LBB31_11
-; SSE4-NEXT: LBB31_12: ## %else10
-; SSE4-NEXT: testb $64, %dil
-; SSE4-NEXT: jne LBB31_13
-; SSE4-NEXT: LBB31_14: ## %else12
-; SSE4-NEXT: testb %dil, %dil
-; SSE4-NEXT: js LBB31_15
-; SSE4-NEXT: LBB31_16: ## %else14
-; SSE4-NEXT: testl $256, %edi ## imm = 0x100
-; SSE4-NEXT: jne LBB31_17
-; SSE4-NEXT: LBB31_18: ## %else16
-; SSE4-NEXT: testl $512, %edi ## imm = 0x200
-; SSE4-NEXT: jne LBB31_19
-; SSE4-NEXT: LBB31_20: ## %else18
-; SSE4-NEXT: testl $1024, %edi ## imm = 0x400
-; SSE4-NEXT: jne LBB31_21
-; SSE4-NEXT: LBB31_22: ## %else20
-; SSE4-NEXT: testl $2048, %edi ## imm = 0x800
-; SSE4-NEXT: jne LBB31_23
-; SSE4-NEXT: LBB31_24: ## %else22
-; SSE4-NEXT: testl $4096, %edi ## imm = 0x1000
-; SSE4-NEXT: jne LBB31_25
-; SSE4-NEXT: LBB31_26: ## %else24
-; SSE4-NEXT: testl $8192, %edi ## imm = 0x2000
-; SSE4-NEXT: jne LBB31_27
-; SSE4-NEXT: LBB31_28: ## %else26
-; SSE4-NEXT: testl $16384, %edi ## imm = 0x4000
-; SSE4-NEXT: jne LBB31_29
-; SSE4-NEXT: LBB31_30: ## %else28
-; SSE4-NEXT: testw %di, %di
-; SSE4-NEXT: js LBB31_31
-; SSE4-NEXT: LBB31_32: ## %else30
-; SSE4-NEXT: testl $65536, %edi ## imm = 0x10000
-; SSE4-NEXT: jne LBB31_33
-; SSE4-NEXT: LBB31_34: ## %else32
-; SSE4-NEXT: testl $131072, %edi ## imm = 0x20000
-; SSE4-NEXT: jne LBB31_35
-; SSE4-NEXT: LBB31_36: ## %else34
-; SSE4-NEXT: testl $262144, %edi ## imm = 0x40000
-; SSE4-NEXT: jne LBB31_37
-; SSE4-NEXT: LBB31_38: ## %else36
-; SSE4-NEXT: testl $524288, %edi ## imm = 0x80000
-; SSE4-NEXT: jne LBB31_39
-; SSE4-NEXT: LBB31_40: ## %else38
-; SSE4-NEXT: testl $1048576, %edi ## imm = 0x100000
-; SSE4-NEXT: jne LBB31_41
-; SSE4-NEXT: LBB31_42: ## %else40
-; SSE4-NEXT: testl $2097152, %edi ## imm = 0x200000
-; SSE4-NEXT: jne LBB31_43
-; SSE4-NEXT: LBB31_44: ## %else42
-; SSE4-NEXT: testl $4194304, %edi ## imm = 0x400000
-; SSE4-NEXT: je LBB31_46
-; SSE4-NEXT: LBB31_45: ## %cond.store43
-; SSE4-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax ## 4-byte Reload
-; SSE4-NEXT: movl %eax, 88(%rdx)
-; SSE4-NEXT: LBB31_46: ## %else44
-; SSE4-NEXT: movb $1, %al
-; SSE4-NEXT: testb %al, %al
-; SSE4-NEXT: jne LBB31_48
-; SSE4-NEXT: ## %bb.47: ## %cond.store45
-; SSE4-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax ## 4-byte Reload
-; SSE4-NEXT: movl %eax, 92(%rdx)
-; SSE4-NEXT: LBB31_48: ## %else46
-; SSE4-NEXT: popq %rbx
-; SSE4-NEXT: popq %r12
-; SSE4-NEXT: popq %r13
-; SSE4-NEXT: popq %r14
-; SSE4-NEXT: popq %r15
-; SSE4-NEXT: popq %rbp
-; SSE4-NEXT: retq
-; SSE4-NEXT: LBB31_1: ## %cond.store
-; SSE4-NEXT: movl (%rsi), %esi
-; SSE4-NEXT: movl %esi, (%rdx)
-; SSE4-NEXT: testb $2, %dil
-; SSE4-NEXT: je LBB31_4
-; SSE4-NEXT: LBB31_3: ## %cond.store1
-; SSE4-NEXT: movl %r12d, 4(%rdx)
-; SSE4-NEXT: testb $4, %dil
-; SSE4-NEXT: je LBB31_6
-; SSE4-NEXT: LBB31_5: ## %cond.store3
-; SSE4-NEXT: movl %r15d, 8(%rdx)
-; SSE4-NEXT: testb $8, %dil
-; SSE4-NEXT: je LBB31_8
-; SSE4-NEXT: LBB31_7: ## %cond.store5
-; SSE4-NEXT: movl %r14d, 12(%rdx)
-; SSE4-NEXT: testb $16, %dil
-; SSE4-NEXT: je LBB31_10
-; SSE4-NEXT: LBB31_9: ## %cond.store7
-; SSE4-NEXT: movl %ebp, 16(%rdx)
-; SSE4-NEXT: testb $32, %dil
-; SSE4-NEXT: je LBB31_12
-; SSE4-NEXT: LBB31_11: ## %cond.store9
-; SSE4-NEXT: movl %ebx, 20(%rdx)
-; SSE4-NEXT: testb $64, %dil
-; SSE4-NEXT: je LBB31_14
-; SSE4-NEXT: LBB31_13: ## %cond.store11
-; SSE4-NEXT: movl %r11d, 24(%rdx)
-; SSE4-NEXT: testb %dil, %dil
-; SSE4-NEXT: jns LBB31_16
-; SSE4-NEXT: LBB31_15: ## %cond.store13
-; SSE4-NEXT: movl %r10d, 28(%rdx)
-; SSE4-NEXT: testl $256, %edi ## imm = 0x100
-; SSE4-NEXT: je LBB31_18
-; SSE4-NEXT: LBB31_17: ## %cond.store15
-; SSE4-NEXT: movl %r9d, 32(%rdx)
-; SSE4-NEXT: testl $512, %edi ## imm = 0x200
-; SSE4-NEXT: je LBB31_20
-; SSE4-NEXT: LBB31_19: ## %cond.store17
-; SSE4-NEXT: movl %r8d, 36(%rdx)
-; SSE4-NEXT: testl $1024, %edi ## imm = 0x400
-; SSE4-NEXT: je LBB31_22
-; SSE4-NEXT: LBB31_21: ## %cond.store19
-; SSE4-NEXT: movl %ecx, 40(%rdx)
-; SSE4-NEXT: testl $2048, %edi ## imm = 0x800
-; SSE4-NEXT: je LBB31_24
-; SSE4-NEXT: LBB31_23: ## %cond.store21
-; SSE4-NEXT: movl %eax, 44(%rdx)
-; SSE4-NEXT: testl $4096, %edi ## imm = 0x1000
-; SSE4-NEXT: je LBB31_26
-; SSE4-NEXT: LBB31_25: ## %cond.store23
-; SSE4-NEXT: movl %r13d, 48(%rdx)
-; SSE4-NEXT: testl $8192, %edi ## imm = 0x2000
-; SSE4-NEXT: je LBB31_28
-; SSE4-NEXT: LBB31_27: ## %cond.store25
-; SSE4-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax ## 4-byte Reload
-; SSE4-NEXT: movl %eax, 52(%rdx)
-; SSE4-NEXT: testl $16384, %edi ## imm = 0x4000
-; SSE4-NEXT: je LBB31_30
-; SSE4-NEXT: LBB31_29: ## %cond.store27
-; SSE4-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax ## 4-byte Reload
-; SSE4-NEXT: movl %eax, 56(%rdx)
-; SSE4-NEXT: testw %di, %di
-; SSE4-NEXT: jns LBB31_32
-; SSE4-NEXT: LBB31_31: ## %cond.store29
-; SSE4-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax ## 4-byte Reload
-; SSE4-NEXT: movl %eax, 60(%rdx)
-; SSE4-NEXT: testl $65536, %edi ## imm = 0x10000
-; SSE4-NEXT: je LBB31_34
-; SSE4-NEXT: LBB31_33: ## %cond.store31
-; SSE4-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax ## 4-byte Reload
-; SSE4-NEXT: movl %eax, 64(%rdx)
-; SSE4-NEXT: testl $131072, %edi ## imm = 0x20000
-; SSE4-NEXT: je LBB31_36
-; SSE4-NEXT: LBB31_35: ## %cond.store33
-; SSE4-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax ## 4-byte Reload
-; SSE4-NEXT: movl %eax, 68(%rdx)
-; SSE4-NEXT: testl $262144, %edi ## imm = 0x40000
-; SSE4-NEXT: je LBB31_38
-; SSE4-NEXT: LBB31_37: ## %cond.store35
-; SSE4-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax ## 4-byte Reload
-; SSE4-NEXT: movl %eax, 72(%rdx)
-; SSE4-NEXT: testl $524288, %edi ## imm = 0x80000
-; SSE4-NEXT: je LBB31_40
-; SSE4-NEXT: LBB31_39: ## %cond.store37
-; SSE4-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax ## 4-byte Reload
-; SSE4-NEXT: movl %eax, 76(%rdx)
-; SSE4-NEXT: testl $1048576, %edi ## imm = 0x100000
-; SSE4-NEXT: je LBB31_42
-; SSE4-NEXT: LBB31_41: ## %cond.store39
-; SSE4-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax ## 4-byte Reload
-; SSE4-NEXT: movl %eax, 80(%rdx)
-; SSE4-NEXT: testl $2097152, %edi ## imm = 0x200000
-; SSE4-NEXT: je LBB31_44
-; SSE4-NEXT: LBB31_43: ## %cond.store41
-; SSE4-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax ## 4-byte Reload
-; SSE4-NEXT: movl %eax, 84(%rdx)
-; SSE4-NEXT: testl $4194304, %edi ## imm = 0x400000
-; SSE4-NEXT: jne LBB31_45
-; SSE4-NEXT: jmp LBB31_46
+define void @store_v24i32_v24i32_stride6_vf4_only_even_numbered_elts(ptr %trigger.ptr, ptr %val.ptr, ptr %dst) nounwind {
+; SSE-LABEL: store_v24i32_v24i32_stride6_vf4_only_even_numbered_elts:
+; SSE: ## %bb.0:
+; SSE-NEXT: pushq %rbp
+; SSE-NEXT: pushq %r15
+; SSE-NEXT: pushq %r14
+; SSE-NEXT: pushq %r13
+; SSE-NEXT: pushq %r12
+; SSE-NEXT: pushq %rbx
+; SSE-NEXT: movdqa (%rdi), %xmm1
+; SSE-NEXT: movdqa 32(%rdi), %xmm2
+; SSE-NEXT: movdqa 64(%rdi), %xmm0
+; SSE-NEXT: movl 92(%rsi), %eax
+; SSE-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill
+; SSE-NEXT: movl 88(%rsi), %eax
+; SSE-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill
+; SSE-NEXT: movl 84(%rsi), %eax
+; SSE-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill
+; SSE-NEXT: movl 80(%rsi), %eax
+; SSE-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill
+; SSE-NEXT: movl 76(%rsi), %eax
+; SSE-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill
+; SSE-NEXT: movl 72(%rsi), %eax
+; SSE-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill
+; SSE-NEXT: movl 68(%rsi), %eax
+; SSE-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill
+; SSE-NEXT: movl 64(%rsi), %eax
+; SSE-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill
+; SSE-NEXT: movl 60(%rsi), %eax
+; SSE-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill
+; SSE-NEXT: movl 56(%rsi), %eax
+; SSE-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill
+; SSE-NEXT: movl 52(%rsi), %eax
+; SSE-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill
+; SSE-NEXT: packssdw 48(%rdi), %xmm2
+; SSE-NEXT: packssdw 16(%rdi), %xmm1
+; SSE-NEXT: packsswb %xmm2, %xmm1
+; SSE-NEXT: packssdw 80(%rdi), %xmm0
+; SSE-NEXT: packsswb %xmm0, %xmm0
+; SSE-NEXT: pmovmskb %xmm1, %eax
+; SSE-NEXT: andl $21845, %eax ## imm = 0x5555
+; SSE-NEXT: pmovmskb %xmm0, %edi
+; SSE-NEXT: andl $85, %edi
+; SSE-NEXT: shll $16, %edi
+; SSE-NEXT: orl %eax, %edi
+; SSE-NEXT: movl 48(%rsi), %r13d
+; SSE-NEXT: testb $1, %dil
+; SSE-NEXT: movl 44(%rsi), %eax
+; SSE-NEXT: movl 40(%rsi), %ecx
+; SSE-NEXT: movl 36(%rsi), %r8d
+; SSE-NEXT: movl 32(%rsi), %r9d
+; SSE-NEXT: movl 28(%rsi), %r10d
+; SSE-NEXT: movl 24(%rsi), %r11d
+; SSE-NEXT: movl 20(%rsi), %ebx
+; SSE-NEXT: movl 16(%rsi), %ebp
+; SSE-NEXT: movl 12(%rsi), %r14d
+; SSE-NEXT: movl 8(%rsi), %r15d
+; SSE-NEXT: movl 4(%rsi), %r12d
+; SSE-NEXT: jne LBB31_1
+; SSE-NEXT: ## %bb.2: ## %else
+; SSE-NEXT: testb $2, %dil
+; SSE-NEXT: jne LBB31_3
+; SSE-NEXT: LBB31_4: ## %else2
+; SSE-NEXT: testb $4, %dil
+; SSE-NEXT: jne LBB31_5
+; SSE-NEXT: LBB31_6: ## %else4
+; SSE-NEXT: testb $8, %dil
+; SSE-NEXT: jne LBB31_7
+; SSE-NEXT: LBB31_8: ## %else6
+; SSE-NEXT: testb $16, %dil
+; SSE-NEXT: jne LBB31_9
+; SSE-NEXT: LBB31_10: ## %else8
+; SSE-NEXT: testb $32, %dil
+; SSE-NEXT: jne LBB31_11
+; SSE-NEXT: LBB31_12: ## %else10
+; SSE-NEXT: testb $64, %dil
+; SSE-NEXT: jne LBB31_13
+; SSE-NEXT: LBB31_14: ## %else12
+; SSE-NEXT: testb %dil, %dil
+; SSE-NEXT: js LBB31_15
+; SSE-NEXT: LBB31_16: ## %else14
+; SSE-NEXT: testl $256, %edi ## imm = 0x100
+; SSE-NEXT: jne LBB31_17
+; SSE-NEXT: LBB31_18: ## %else16
+; SSE-NEXT: testl $512, %edi ## imm = 0x200
+; SSE-NEXT: jne LBB31_19
+; SSE-NEXT: LBB31_20: ## %else18
+; SSE-NEXT: testl $1024, %edi ## imm = 0x400
+; SSE-NEXT: jne LBB31_21
+; SSE-NEXT: LBB31_22: ## %else20
+; SSE-NEXT: testl $2048, %edi ## imm = 0x800
+; SSE-NEXT: jne LBB31_23
+; SSE-NEXT: LBB31_24: ## %else22
+; SSE-NEXT: testl $4096, %edi ## imm = 0x1000
+; SSE-NEXT: jne LBB31_25
+; SSE-NEXT: LBB31_26: ## %else24
+; SSE-NEXT: testl $8192, %edi ## imm = 0x2000
+; SSE-NEXT: jne LBB31_27
+; SSE-NEXT: LBB31_28: ## %else26
+; SSE-NEXT: testl $16384, %edi ## imm = 0x4000
+; SSE-NEXT: jne LBB31_29
+; SSE-NEXT: LBB31_30: ## %else28
+; SSE-NEXT: testw %di, %di
+; SSE-NEXT: js LBB31_31
+; SSE-NEXT: LBB31_32: ## %else30
+; SSE-NEXT: testl $65536, %edi ## imm = 0x10000
+; SSE-NEXT: jne LBB31_33
+; SSE-NEXT: LBB31_34: ## %else32
+; SSE-NEXT: testl $131072, %edi ## imm = 0x20000
+; SSE-NEXT: jne LBB31_35
+; SSE-NEXT: LBB31_36: ## %else34
+; SSE-NEXT: testl $262144, %edi ## imm = 0x40000
+; SSE-NEXT: jne LBB31_37
+; SSE-NEXT: LBB31_38: ## %else36
+; SSE-NEXT: testl $524288, %edi ## imm = 0x80000
+; SSE-NEXT: jne LBB31_39
+; SSE-NEXT: LBB31_40: ## %else38
+; SSE-NEXT: testl $1048576, %edi ## imm = 0x100000
+; SSE-NEXT: jne LBB31_41
+; SSE-NEXT: LBB31_42: ## %else40
+; SSE-NEXT: testl $2097152, %edi ## imm = 0x200000
+; SSE-NEXT: jne LBB31_43
+; SSE-NEXT: LBB31_44: ## %else42
+; SSE-NEXT: testl $4194304, %edi ## imm = 0x400000
+; SSE-NEXT: je LBB31_46
+; SSE-NEXT: LBB31_45: ## %cond.store43
+; SSE-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax ## 4-byte Reload
+; SSE-NEXT: movl %eax, 88(%rdx)
+; SSE-NEXT: LBB31_46: ## %else44
+; SSE-NEXT: movb $1, %al
+; SSE-NEXT: testb %al, %al
+; SSE-NEXT: jne LBB31_48
+; SSE-NEXT: ## %bb.47: ## %cond.store45
+; SSE-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax ## 4-byte Reload
+; SSE-NEXT: movl %eax, 92(%rdx)
+; SSE-NEXT: LBB31_48: ## %else46
+; SSE-NEXT: popq %rbx
+; SSE-NEXT: popq %r12
+; SSE-NEXT: popq %r13
+; SSE-NEXT: popq %r14
+; SSE-NEXT: popq %r15
+; SSE-NEXT: popq %rbp
+; SSE-NEXT: retq
+; SSE-NEXT: LBB31_1: ## %cond.store
+; SSE-NEXT: movl (%rsi), %esi
+; SSE-NEXT: movl %esi, (%rdx)
+; SSE-NEXT: testb $2, %dil
+; SSE-NEXT: je LBB31_4
+; SSE-NEXT: LBB31_3: ## %cond.store1
+; SSE-NEXT: movl %r12d, 4(%rdx)
+; SSE-NEXT: testb $4, %dil
+; SSE-NEXT: je LBB31_6
+; SSE-NEXT: LBB31_5: ## %cond.store3
+; SSE-NEXT: movl %r15d, 8(%rdx)
+; SSE-NEXT: testb $8, %dil
+; SSE-NEXT: je LBB31_8
+; SSE-NEXT: LBB31_7: ## %cond.store5
+; SSE-NEXT: movl %r14d, 12(%rdx)
+; SSE-NEXT: testb $16, %dil
+; SSE-NEXT: je LBB31_10
+; SSE-NEXT: LBB31_9: ## %cond.store7
+; SSE-NEXT: movl %ebp, 16(%rdx)
+; SSE-NEXT: testb $32, %dil
+; SSE-NEXT: je LBB31_12
+; SSE-NEXT: LBB31_11: ## %cond.store9
+; SSE-NEXT: movl %ebx, 20(%rdx)
+; SSE-NEXT: testb $64, %dil
+; SSE-NEXT: je LBB31_14
+; SSE-NEXT: LBB31_13: ## %cond.store11
+; SSE-NEXT: movl %r11d, 24(%rdx)
+; SSE-NEXT: testb %dil, %dil
+; SSE-NEXT: jns LBB31_16
+; SSE-NEXT: LBB31_15: ## %cond.store13
+; SSE-NEXT: movl %r10d, 28(%rdx)
+; SSE-NEXT: testl $256, %edi ## imm = 0x100
+; SSE-NEXT: je LBB31_18
+; SSE-NEXT: LBB31_17: ## %cond.store15
+; SSE-NEXT: movl %r9d, 32(%rdx)
+; SSE-NEXT: testl $512, %edi ## imm = 0x200
+; SSE-NEXT: je LBB31_20
+; SSE-NEXT: LBB31_19: ## %cond.store17
+; SSE-NEXT: movl %r8d, 36(%rdx)
+; SSE-NEXT: testl $1024, %edi ## imm = 0x400
+; SSE-NEXT: je LBB31_22
+; SSE-NEXT: LBB31_21: ## %cond.store19
+; SSE-NEXT: movl %ecx, 40(%rdx)
+; SSE-NEXT: testl $2048, %edi ## imm = 0x800
+; SSE-NEXT: je LBB31_24
+; SSE-NEXT: LBB31_23: ## %cond.store21
+; SSE-NEXT: movl %eax, 44(%rdx)
+; SSE-NEXT: testl $4096, %edi ## imm = 0x1000
+; SSE-NEXT: je LBB31_26
+; SSE-NEXT: LBB31_25: ## %cond.store23
+; SSE-NEXT: movl %r13d, 48(%rdx)
+; SSE-NEXT: testl $8192, %edi ## imm = 0x2000
+; SSE-NEXT: je LBB31_28
+; SSE-NEXT: LBB31_27: ## %cond.store25
+; SSE-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax ## 4-byte Reload
+; SSE-NEXT: movl %eax, 52(%rdx)
+; SSE-NEXT: testl $16384, %edi ## imm = 0x4000
+; SSE-NEXT: je LBB31_30
+; SSE-NEXT: LBB31_29: ## %cond.store27
+; SSE-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax ## 4-byte Reload
+; SSE-NEXT: movl %eax, 56(%rdx)
+; SSE-NEXT: testw %di, %di
+; SSE-NEXT: jns LBB31_32
+; SSE-NEXT: LBB31_31: ## %cond.store29
+; SSE-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax ## 4-byte Reload
+; SSE-NEXT: movl %eax, 60(%rdx)
+; SSE-NEXT: testl $65536, %edi ## imm = 0x10000
+; SSE-NEXT: je LBB31_34
+; SSE-NEXT: LBB31_33: ## %cond.store31
+; SSE-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax ## 4-byte Reload
+; SSE-NEXT: movl %eax, 64(%rdx)
+; SSE-NEXT: testl $131072, %edi ## imm = 0x20000
+; SSE-NEXT: je LBB31_36
+; SSE-NEXT: LBB31_35: ## %cond.store33
+; SSE-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax ## 4-byte Reload
+; SSE-NEXT: movl %eax, 68(%rdx)
+; SSE-NEXT: testl $262144, %edi ## imm = 0x40000
+; SSE-NEXT: je LBB31_38
+; SSE-NEXT: LBB31_37: ## %cond.store35
+; SSE-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax ## 4-byte Reload
+; SSE-NEXT: movl %eax, 72(%rdx)
+; SSE-NEXT: testl $524288, %edi ## imm = 0x80000
+; SSE-NEXT: je LBB31_40
+; SSE-NEXT: LBB31_39: ## %cond.store37
+; SSE-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax ## 4-byte Reload
+; SSE-NEXT: movl %eax, 76(%rdx)
+; SSE-NEXT: testl $1048576, %edi ## imm = 0x100000
+; SSE-NEXT: je LBB31_42
+; SSE-NEXT: LBB31_41: ## %cond.store39
+; SSE-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax ## 4-byte Reload
+; SSE-NEXT: movl %eax, 80(%rdx)
+; SSE-NEXT: testl $2097152, %edi ## imm = 0x200000
+; SSE-NEXT: je LBB31_44
+; SSE-NEXT: LBB31_43: ## %cond.store41
+; SSE-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax ## 4-byte Reload
+; SSE-NEXT: movl %eax, 84(%rdx)
+; SSE-NEXT: testl $4194304, %edi ## imm = 0x400000
+; SSE-NEXT: jne LBB31_45
+; SSE-NEXT: jmp LBB31_46
;
; AVX1-LABEL: store_v24i32_v24i32_stride6_vf4_only_even_numbered_elts:
; AVX1: ## %bb.0:
@@ -6266,7 +6021,7 @@ define void @store_v24i32_v24i32_stride6_vf4_only_even_numbered_elts(ptr %trigge
}
; From https://reviews.llvm.org/rGf8d9097168b7#1165311
-define void @undefshuffle(<8 x i1> %i0, ptr %src, ptr %dst) #0 {
+define void @undefshuffle(<8 x i1> %i0, ptr %src, ptr %dst) nounwind {
; SSE2-LABEL: undefshuffle:
; SSE2: ## %bb.0: ## %else
; SSE2-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
diff --git a/llvm/test/CodeGen/X86/pr45378.ll b/llvm/test/CodeGen/X86/pr45378.ll
index 426f4eed662a..6a5770a4b4ad 100644
--- a/llvm/test/CodeGen/X86/pr45378.ll
+++ b/llvm/test/CodeGen/X86/pr45378.ll
@@ -1,10 +1,10 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=sse2 | FileCheck %s --check-prefix=SSE2
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=sse4.1 | FileCheck %s --check-prefix=SSE41
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx | FileCheck %s --check-prefix=AVX
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx2 | FileCheck %s --check-prefix=AVX
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512f | FileCheck %s --check-prefix=AVX
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512bw | FileCheck %s --check-prefix=AVX
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=sse2 | FileCheck %s --check-prefixes=CHECK,SSE2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=sse4.1 | FileCheck %s --check-prefixes=CHECK,SSE41
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx | FileCheck %s --check-prefixes=CHECK,AVX
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx2 | FileCheck %s --check-prefixes=CHECK,AVX
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512f | FileCheck %s --check-prefixes=CHECK,AVX
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512bw | FileCheck %s --check-prefixes=CHECK,AVX
declare i64 @llvm.vector.reduce.or.v2i64(<2 x i64>)
@@ -71,28 +71,12 @@ define i1 @parseHeaders2_scalar_or(ptr %ptr) nounwind {
}
define i1 @parseHeaders2_scalar_and(ptr %ptr) nounwind {
-; SSE2-LABEL: parseHeaders2_scalar_and:
-; SSE2: # %bb.0:
-; SSE2-NEXT: movdqu (%rdi), %xmm0
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
-; SSE2-NEXT: movq %xmm0, %rax
-; SSE2-NEXT: testq %rax, (%rdi)
-; SSE2-NEXT: sete %al
-; SSE2-NEXT: retq
-;
-; SSE41-LABEL: parseHeaders2_scalar_and:
-; SSE41: # %bb.0:
-; SSE41-NEXT: movq (%rdi), %rax
-; SSE41-NEXT: testq %rax, 8(%rdi)
-; SSE41-NEXT: sete %al
-; SSE41-NEXT: retq
-;
-; AVX-LABEL: parseHeaders2_scalar_and:
-; AVX: # %bb.0:
-; AVX-NEXT: movq (%rdi), %rax
-; AVX-NEXT: testq %rax, 8(%rdi)
-; AVX-NEXT: sete %al
-; AVX-NEXT: retq
+; CHECK-LABEL: parseHeaders2_scalar_and:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movq (%rdi), %rax
+; CHECK-NEXT: testq %rax, 8(%rdi)
+; CHECK-NEXT: sete %al
+; CHECK-NEXT: retq
%vload = load <2 x i64>, ptr %ptr, align 8
%v1 = extractelement <2 x i64> %vload, i32 0
%v2 = extractelement <2 x i64> %vload, i32 1
diff --git a/llvm/test/CodeGen/X86/setcc-non-simple-type.ll b/llvm/test/CodeGen/X86/setcc-non-simple-type.ll
index 2187c653f76c..97c3c2040b29 100644
--- a/llvm/test/CodeGen/X86/setcc-non-simple-type.ll
+++ b/llvm/test/CodeGen/X86/setcc-non-simple-type.ll
@@ -60,36 +60,30 @@ define void @failing(ptr %0, ptr %1) nounwind {
; CHECK-NEXT: .LBB0_2: # %vector.body
; CHECK-NEXT: # Parent Loop BB0_1 Depth=1
; CHECK-NEXT: # => This Inner Loop Header: Depth=2
-; CHECK-NEXT: movdqu 1024(%rdx,%rdi), %xmm5
-; CHECK-NEXT: movdqu 1040(%rdx,%rdi), %xmm6
-; CHECK-NEXT: pshufd {{.*#+}} xmm5 = xmm5[2,3,2,3]
-; CHECK-NEXT: movq %xmm5, %r8
-; CHECK-NEXT: pshufd {{.*#+}} xmm5 = xmm6[2,3,2,3]
-; CHECK-NEXT: movq %xmm5, %r9
-; CHECK-NEXT: cmpq 1040(%rdx,%rdi), %rsi
-; CHECK-NEXT: movq %rcx, %r10
-; CHECK-NEXT: sbbq %r9, %r10
-; CHECK-NEXT: setge %r9b
-; CHECK-NEXT: movzbl %r9b, %r9d
-; CHECK-NEXT: andl $1, %r9d
-; CHECK-NEXT: negq %r9
-; CHECK-NEXT: movq %r9, %xmm5
; CHECK-NEXT: cmpq 1024(%rdx,%rdi), %rsi
-; CHECK-NEXT: movq %rcx, %r9
-; CHECK-NEXT: sbbq %r8, %r9
+; CHECK-NEXT: movq %rcx, %r8
+; CHECK-NEXT: sbbq 1032(%rdx,%rdi), %r8
+; CHECK-NEXT: setge %r8b
+; CHECK-NEXT: movzbl %r8b, %r8d
+; CHECK-NEXT: andl $1, %r8d
+; CHECK-NEXT: negq %r8
+; CHECK-NEXT: movq %r8, %xmm5
+; CHECK-NEXT: cmpq 1040(%rdx,%rdi), %rsi
+; CHECK-NEXT: movq %rcx, %r8
+; CHECK-NEXT: sbbq 1048(%rdx,%rdi), %r8
; CHECK-NEXT: setge %r8b
; CHECK-NEXT: movzbl %r8b, %r8d
; CHECK-NEXT: andl $1, %r8d
; CHECK-NEXT: negq %r8
; CHECK-NEXT: movq %r8, %xmm6
-; CHECK-NEXT: punpcklqdq {{.*#+}} xmm6 = xmm6[0],xmm5[0]
-; CHECK-NEXT: movdqa %xmm1, %xmm5
-; CHECK-NEXT: psllq %xmm4, %xmm5
+; CHECK-NEXT: punpcklqdq {{.*#+}} xmm5 = xmm5[0],xmm6[0]
+; CHECK-NEXT: movdqa %xmm1, %xmm6
+; CHECK-NEXT: psllq %xmm4, %xmm6
; CHECK-NEXT: pshufd {{.*#+}} xmm7 = xmm4[2,3,2,3]
; CHECK-NEXT: movdqa %xmm1, %xmm8
; CHECK-NEXT: psllq %xmm7, %xmm8
-; CHECK-NEXT: movsd {{.*#+}} xmm8 = xmm5[0],xmm8[1]
-; CHECK-NEXT: andpd %xmm6, %xmm8
+; CHECK-NEXT: movsd {{.*#+}} xmm8 = xmm6[0],xmm8[1]
+; CHECK-NEXT: andpd %xmm5, %xmm8
; CHECK-NEXT: orpd %xmm8, %xmm3
; CHECK-NEXT: paddq %xmm2, %xmm4
; CHECK-NEXT: addq $32, %rdi
diff --git a/llvm/test/CodeGen/X86/shrink_vmul.ll b/llvm/test/CodeGen/X86/shrink_vmul.ll
index 2610f4322c8e..62051d170994 100644
--- a/llvm/test/CodeGen/X86/shrink_vmul.ll
+++ b/llvm/test/CodeGen/X86/shrink_vmul.ll
@@ -1983,91 +1983,75 @@ define void @PR34947(ptr %p0, ptr %p1) nounwind {
; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-SSE-NEXT: movzwl 16(%eax), %edx
; X86-SSE-NEXT: movl %edx, (%esp) # 4-byte Spill
-; X86-SSE-NEXT: movdqa (%eax), %xmm3
-; X86-SSE-NEXT: movdqa (%ecx), %xmm0
-; X86-SSE-NEXT: movdqa 16(%ecx), %xmm1
-; X86-SSE-NEXT: pxor %xmm5, %xmm5
-; X86-SSE-NEXT: movdqa %xmm3, %xmm2
-; X86-SSE-NEXT: pextrw $7, %xmm3, %eax
-; X86-SSE-NEXT: pextrw $4, %xmm3, %edi
-; X86-SSE-NEXT: pextrw $0, %xmm3, %ebp
-; X86-SSE-NEXT: pextrw $1, %xmm3, %esi
-; X86-SSE-NEXT: pextrw $3, %xmm3, %ebx
-; X86-SSE-NEXT: movdqa %xmm3, %xmm4
-; X86-SSE-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[1],xmm5[1],xmm4[2],xmm5[2],xmm4[3],xmm5[3]
-; X86-SSE-NEXT: punpckhwd {{.*#+}} xmm2 = xmm2[4],xmm5[4],xmm2[5],xmm5[5],xmm2[6],xmm5[6],xmm2[7],xmm5[7]
-; X86-SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm1[3,3,3,3]
-; X86-SSE-NEXT: movd %xmm3, %ecx
+; X86-SSE-NEXT: movdqa (%eax), %xmm2
+; X86-SSE-NEXT: pxor %xmm1, %xmm1
+; X86-SSE-NEXT: movdqa %xmm2, %xmm0
+; X86-SSE-NEXT: pextrw $7, %xmm2, %eax
+; X86-SSE-NEXT: pextrw $4, %xmm2, %esi
+; X86-SSE-NEXT: pextrw $1, %xmm2, %edi
+; X86-SSE-NEXT: pextrw $0, %xmm2, %ebx
+; X86-SSE-NEXT: pextrw $3, %xmm2, %ebp
+; X86-SSE-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
+; X86-SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; X86-SSE-NEXT: xorl %edx, %edx
+; X86-SSE-NEXT: divl 28(%ecx)
+; X86-SSE-NEXT: movd %edx, %xmm1
+; X86-SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,3,2,3]
+; X86-SSE-NEXT: movd %xmm3, %eax
; X86-SSE-NEXT: xorl %edx, %edx
-; X86-SSE-NEXT: divl %ecx
+; X86-SSE-NEXT: divl 24(%ecx)
; X86-SSE-NEXT: movd %edx, %xmm3
-; X86-SSE-NEXT: pshufd {{.*#+}} xmm5 = xmm2[2,3,2,3]
-; X86-SSE-NEXT: movd %xmm5, %eax
-; X86-SSE-NEXT: pshufd {{.*#+}} xmm5 = xmm1[2,3,2,3]
-; X86-SSE-NEXT: movd %xmm5, %ecx
+; X86-SSE-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1]
+; X86-SSE-NEXT: movl %esi, %eax
; X86-SSE-NEXT: xorl %edx, %edx
-; X86-SSE-NEXT: divl %ecx
-; X86-SSE-NEXT: movd %edx, %xmm5
-; X86-SSE-NEXT: punpckldq {{.*#+}} xmm5 = xmm5[0],xmm3[0],xmm5[1],xmm3[1]
+; X86-SSE-NEXT: divl 16(%ecx)
+; X86-SSE-NEXT: movd %edx, %xmm1
+; X86-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
+; X86-SSE-NEXT: movd %xmm0, %eax
+; X86-SSE-NEXT: xorl %edx, %edx
+; X86-SSE-NEXT: divl 20(%ecx)
+; X86-SSE-NEXT: movd %edx, %xmm0
+; X86-SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; X86-SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0]
; X86-SSE-NEXT: movl %edi, %eax
; X86-SSE-NEXT: xorl %edx, %edx
-; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-SSE-NEXT: divl 16(%edi)
+; X86-SSE-NEXT: divl 4(%ecx)
; X86-SSE-NEXT: movd %edx, %xmm3
-; X86-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,1,1]
-; X86-SSE-NEXT: movd %xmm2, %eax
-; X86-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,1,1]
-; X86-SSE-NEXT: movd %xmm1, %ecx
+; X86-SSE-NEXT: movl %ebx, %eax
; X86-SSE-NEXT: xorl %edx, %edx
-; X86-SSE-NEXT: divl %ecx
-; X86-SSE-NEXT: movd %edx, %xmm1
-; X86-SSE-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1]
-; X86-SSE-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm5[0]
+; X86-SSE-NEXT: divl (%ecx)
+; X86-SSE-NEXT: movd %edx, %xmm0
+; X86-SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
; X86-SSE-NEXT: movl %ebp, %eax
; X86-SSE-NEXT: xorl %edx, %edx
-; X86-SSE-NEXT: divl (%edi)
-; X86-SSE-NEXT: movd %edx, %xmm1
-; X86-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,1,1]
-; X86-SSE-NEXT: movd %xmm2, %ecx
-; X86-SSE-NEXT: movl %esi, %eax
-; X86-SSE-NEXT: xorl %edx, %edx
-; X86-SSE-NEXT: divl %ecx
-; X86-SSE-NEXT: movd %edx, %xmm2
-; X86-SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
-; X86-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm0[3,3,3,3]
-; X86-SSE-NEXT: movd %xmm2, %ecx
-; X86-SSE-NEXT: movl %ebx, %eax
+; X86-SSE-NEXT: divl 12(%ecx)
+; X86-SSE-NEXT: movd %edx, %xmm3
+; X86-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,3,2,3]
+; X86-SSE-NEXT: movd %xmm2, %eax
; X86-SSE-NEXT: xorl %edx, %edx
-; X86-SSE-NEXT: divl %ecx
+; X86-SSE-NEXT: divl 8(%ecx)
; X86-SSE-NEXT: movd %edx, %xmm2
-; X86-SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm4[2,3,2,3]
-; X86-SSE-NEXT: movd %xmm4, %eax
-; X86-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
-; X86-SSE-NEXT: movd %xmm0, %ecx
-; X86-SSE-NEXT: xorl %edx, %edx
-; X86-SSE-NEXT: divl %ecx
-; X86-SSE-NEXT: movd %edx, %xmm0
-; X86-SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; X86-SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; X86-SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
+; X86-SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
; X86-SSE-NEXT: movl (%esp), %eax # 4-byte Reload
; X86-SSE-NEXT: xorl %edx, %edx
-; X86-SSE-NEXT: divl 32(%edi)
+; X86-SSE-NEXT: divl 32(%ecx)
; X86-SSE-NEXT: movdqa {{.*#+}} xmm2 = [8199,8199,8199,8199]
-; X86-SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm1[1,1,3,3]
-; X86-SSE-NEXT: pmuludq %xmm2, %xmm1
-; X86-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,2,2,3]
-; X86-SSE-NEXT: pmuludq %xmm2, %xmm4
-; X86-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm4[0,2,2,3]
-; X86-SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; X86-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
+; X86-SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
+; X86-SSE-NEXT: pmuludq %xmm2, %xmm0
+; X86-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; X86-SSE-NEXT: pmuludq %xmm2, %xmm3
; X86-SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3]
+; X86-SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
+; X86-SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,1,3,3]
; X86-SSE-NEXT: pmuludq %xmm2, %xmm1
; X86-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
-; X86-SSE-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1]
+; X86-SSE-NEXT: pmuludq %xmm2, %xmm3
+; X86-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,2,2,3]
+; X86-SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
; X86-SSE-NEXT: imull $8199, %edx, %eax # imm = 0x2007
; X86-SSE-NEXT: movl %eax, (%eax)
-; X86-SSE-NEXT: movdqa %xmm3, (%eax)
+; X86-SSE-NEXT: movdqa %xmm1, (%eax)
; X86-SSE-NEXT: movdqa %xmm0, (%eax)
; X86-SSE-NEXT: addl $4, %esp
; X86-SSE-NEXT: popl %esi
@@ -2204,91 +2188,76 @@ define void @PR34947(ptr %p0, ptr %p1) nounwind {
; X64-SSE-LABEL: PR34947:
; X64-SSE: # %bb.0:
; X64-SSE-NEXT: movzwl 16(%rdi), %ecx
-; X64-SSE-NEXT: movdqa (%rdi), %xmm3
-; X64-SSE-NEXT: movdqa (%rsi), %xmm0
-; X64-SSE-NEXT: movdqa 16(%rsi), %xmm1
-; X64-SSE-NEXT: pxor %xmm5, %xmm5
-; X64-SSE-NEXT: movdqa %xmm3, %xmm2
-; X64-SSE-NEXT: pextrw $7, %xmm3, %eax
-; X64-SSE-NEXT: pextrw $4, %xmm3, %r8d
-; X64-SSE-NEXT: pextrw $0, %xmm3, %r10d
-; X64-SSE-NEXT: pextrw $1, %xmm3, %edi
-; X64-SSE-NEXT: pextrw $3, %xmm3, %r9d
-; X64-SSE-NEXT: movdqa %xmm3, %xmm4
-; X64-SSE-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[1],xmm5[1],xmm4[2],xmm5[2],xmm4[3],xmm5[3]
-; X64-SSE-NEXT: punpckhwd {{.*#+}} xmm2 = xmm2[4],xmm5[4],xmm2[5],xmm5[5],xmm2[6],xmm5[6],xmm2[7],xmm5[7]
-; X64-SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm1[3,3,3,3]
-; X64-SSE-NEXT: movd %xmm3, %r11d
-; X64-SSE-NEXT: xorl %edx, %edx
-; X64-SSE-NEXT: divl %r11d
-; X64-SSE-NEXT: movd %edx, %xmm3
-; X64-SSE-NEXT: pshufd {{.*#+}} xmm5 = xmm2[2,3,2,3]
-; X64-SSE-NEXT: movd %xmm5, %eax
-; X64-SSE-NEXT: pshufd {{.*#+}} xmm5 = xmm1[2,3,2,3]
-; X64-SSE-NEXT: movd %xmm5, %r11d
+; X64-SSE-NEXT: movdqa (%rdi), %xmm2
+; X64-SSE-NEXT: pxor %xmm1, %xmm1
+; X64-SSE-NEXT: movdqa %xmm2, %xmm0
+; X64-SSE-NEXT: pextrw $7, %xmm2, %eax
+; X64-SSE-NEXT: pextrw $4, %xmm2, %edi
+; X64-SSE-NEXT: pextrw $1, %xmm2, %r8d
+; X64-SSE-NEXT: pextrw $0, %xmm2, %r9d
+; X64-SSE-NEXT: pextrw $3, %xmm2, %r10d
+; X64-SSE-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
+; X64-SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
; X64-SSE-NEXT: xorl %edx, %edx
-; X64-SSE-NEXT: divl %r11d
-; X64-SSE-NEXT: movd %edx, %xmm5
-; X64-SSE-NEXT: punpckldq {{.*#+}} xmm5 = xmm5[0],xmm3[0],xmm5[1],xmm3[1]
-; X64-SSE-NEXT: movl %r8d, %eax
+; X64-SSE-NEXT: divl 28(%rsi)
+; X64-SSE-NEXT: movd %edx, %xmm1
+; X64-SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,3,2,3]
+; X64-SSE-NEXT: movd %xmm3, %eax
; X64-SSE-NEXT: xorl %edx, %edx
-; X64-SSE-NEXT: divl 16(%rsi)
+; X64-SSE-NEXT: divl 24(%rsi)
; X64-SSE-NEXT: movd %edx, %xmm3
-; X64-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,1,1]
-; X64-SSE-NEXT: movd %xmm2, %eax
-; X64-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,1,1]
-; X64-SSE-NEXT: movd %xmm1, %r8d
-; X64-SSE-NEXT: xorl %edx, %edx
-; X64-SSE-NEXT: divl %r8d
-; X64-SSE-NEXT: movd %edx, %xmm1
; X64-SSE-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1]
-; X64-SSE-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm5[0]
-; X64-SSE-NEXT: movl %r10d, %eax
+; X64-SSE-NEXT: movl %edi, %eax
; X64-SSE-NEXT: xorl %edx, %edx
-; X64-SSE-NEXT: divl (%rsi)
+; X64-SSE-NEXT: divl 16(%rsi)
; X64-SSE-NEXT: movd %edx, %xmm1
-; X64-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,1,1]
-; X64-SSE-NEXT: movd %xmm2, %r8d
-; X64-SSE-NEXT: movl %edi, %eax
+; X64-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
+; X64-SSE-NEXT: movd %xmm0, %eax
; X64-SSE-NEXT: xorl %edx, %edx
-; X64-SSE-NEXT: divl %r8d
-; X64-SSE-NEXT: movd %edx, %xmm2
-; X64-SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
-; X64-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm0[3,3,3,3]
-; X64-SSE-NEXT: movd %xmm2, %edi
+; X64-SSE-NEXT: divl 20(%rsi)
+; X64-SSE-NEXT: movd %edx, %xmm0
+; X64-SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; X64-SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0]
+; X64-SSE-NEXT: movl %r8d, %eax
+; X64-SSE-NEXT: xorl %edx, %edx
+; X64-SSE-NEXT: divl 4(%rsi)
+; X64-SSE-NEXT: movd %edx, %xmm0
; X64-SSE-NEXT: movl %r9d, %eax
; X64-SSE-NEXT: xorl %edx, %edx
-; X64-SSE-NEXT: divl %edi
-; X64-SSE-NEXT: movd %edx, %xmm2
-; X64-SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm4[2,3,2,3]
-; X64-SSE-NEXT: movd %xmm4, %eax
-; X64-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
-; X64-SSE-NEXT: movd %xmm0, %edi
+; X64-SSE-NEXT: divl (%rsi)
+; X64-SSE-NEXT: movd %edx, %xmm3
+; X64-SSE-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1]
+; X64-SSE-NEXT: movl %r10d, %eax
; X64-SSE-NEXT: xorl %edx, %edx
-; X64-SSE-NEXT: divl %edi
+; X64-SSE-NEXT: divl 12(%rsi)
; X64-SSE-NEXT: movd %edx, %xmm0
-; X64-SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; X64-SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; X64-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,3,2,3]
+; X64-SSE-NEXT: movd %xmm2, %eax
+; X64-SSE-NEXT: xorl %edx, %edx
+; X64-SSE-NEXT: divl 8(%rsi)
+; X64-SSE-NEXT: movd %edx, %xmm2
+; X64-SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
+; X64-SSE-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm2[0]
; X64-SSE-NEXT: movl %ecx, %eax
; X64-SSE-NEXT: xorl %edx, %edx
; X64-SSE-NEXT: divl 32(%rsi)
; X64-SSE-NEXT: movdqa {{.*#+}} xmm0 = [8199,8199,8199,8199]
-; X64-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
-; X64-SSE-NEXT: pmuludq %xmm0, %xmm1
-; X64-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
-; X64-SSE-NEXT: pmuludq %xmm0, %xmm2
-; X64-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
-; X64-SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
; X64-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,1,3,3]
; X64-SSE-NEXT: pmuludq %xmm0, %xmm3
; X64-SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3]
; X64-SSE-NEXT: pmuludq %xmm0, %xmm2
+; X64-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
+; X64-SSE-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
+; X64-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
+; X64-SSE-NEXT: pmuludq %xmm0, %xmm1
+; X64-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
+; X64-SSE-NEXT: pmuludq %xmm0, %xmm2
; X64-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3]
-; X64-SSE-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1]
+; X64-SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
; X64-SSE-NEXT: imull $8199, %edx, %eax # imm = 0x2007
; X64-SSE-NEXT: movl %eax, (%rax)
-; X64-SSE-NEXT: movdqa %xmm3, (%rax)
; X64-SSE-NEXT: movdqa %xmm1, (%rax)
+; X64-SSE-NEXT: movdqa %xmm3, (%rax)
; X64-SSE-NEXT: retq
;
; X64-AVX1-LABEL: PR34947:
diff --git a/llvm/test/CodeGen/X86/stack-protector.ll b/llvm/test/CodeGen/X86/stack-protector.ll
index a277f9f862ab..f4f3ae4f55f2 100644
--- a/llvm/test/CodeGen/X86/stack-protector.ll
+++ b/llvm/test/CodeGen/X86/stack-protector.ll
@@ -1,6 +1,7 @@
; RUN: llc -mtriple=i386-pc-linux-gnu < %s -o - | FileCheck --check-prefix=LINUX-I386 %s
; RUN: llc -mtriple=x86_64-pc-linux-gnu < %s -o - | FileCheck --check-prefix=LINUX-X64 %s
; RUN: llc -code-model=kernel -mtriple=x86_64-pc-linux-gnu < %s -o - | FileCheck --check-prefix=LINUX-KERNEL-X64 %s
+; RUN: llc -code-model=kernel -mtriple=x86_64-unknown-freebsd < %s -o - | FileCheck --check-prefix=FREEBSD-KERNEL-X64 %s
; RUN: llc -mtriple=x86_64-apple-darwin < %s -o - | FileCheck --check-prefix=DARWIN-X64 %s
; RUN: llc -mtriple=amd64-pc-openbsd < %s -o - | FileCheck --check-prefix=OPENBSD-AMD64 %s
; RUN: llc -mtriple=i386-pc-windows-msvc < %s -o - | FileCheck -check-prefix=MSVC-I386 %s
@@ -75,6 +76,10 @@ entry:
; LINUX-X64: mov{{l|q}} %fs:
; LINUX-X64: callq __stack_chk_fail
+; FREEBSD-KERNEL-X64-LABEL: test1b:
+; FREEBSD-KERNEL-X64-NOT: mov{{l|q}} __stack_chk_guard@GOTPCREL
+; FREEBSD-KERNEL-X64: callq __stack_chk_fail
+
; LINUX-KERNEL-X64-LABEL: test1b:
; LINUX-KERNEL-X64: mov{{l|q}} %gs:
; LINUX-KERNEL-X64: callq __stack_chk_fail
@@ -118,6 +123,10 @@ entry:
; LINUX-X64: mov{{l|q}} %fs:
; LINUX-X64: callq __stack_chk_fail
+; FREEBSD-KERNEL-X64-LABEL: test1c:
+; FREEBSD-KERNEL-X64: mov{{l|q}} __stack_chk_guard(%rip)
+; FREEBSD-KERNEL-X64: callq __stack_chk_fail
+
; LINUX-KERNEL-X64-LABEL: test1c:
; LINUX-KERNEL-X64: mov{{l|q}} %gs:
; LINUX-KERNEL-X64: callq __stack_chk_fail
diff --git a/llvm/test/CodeGen/X86/var-permute-128.ll b/llvm/test/CodeGen/X86/var-permute-128.ll
index 99a3821bb9ba..f2240a946844 100644
--- a/llvm/test/CodeGen/X86/var-permute-128.ll
+++ b/llvm/test/CodeGen/X86/var-permute-128.ll
@@ -1101,17 +1101,13 @@ define <16 x i8> @var_shuffle_v16i8_from_v32i8_v16i8(<32 x i8> %v, <16 x i8> %in
define void @indices_convert() {
; SSE3-LABEL: indices_convert:
; SSE3: # %bb.0: # %bb
-; SSE3-NEXT: movdqa (%rax), %xmm0
-; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
-; SSE3-NEXT: movd %xmm1, %eax
-; SSE3-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp)
-; SSE3-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp)
+; SSE3-NEXT: movaps (%rax), %xmm0
+; SSE3-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
+; SSE3-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
+; SSE3-NEXT: movl (%rax), %eax
+; SSE3-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
+; SSE3-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
; SSE3-NEXT: andl $3, %eax
-; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3]
-; SSE3-NEXT: movd %xmm1, %ecx
-; SSE3-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp)
-; SSE3-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp)
-; SSE3-NEXT: andl $3, %ecx
; SSE3-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; SSE3-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
; SSE3-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
@@ -1120,17 +1116,13 @@ define void @indices_convert() {
;
; SSSE3-LABEL: indices_convert:
; SSSE3: # %bb.0: # %bb
-; SSSE3-NEXT: movdqa (%rax), %xmm0
-; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
-; SSSE3-NEXT: movd %xmm1, %eax
-; SSSE3-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp)
-; SSSE3-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp)
+; SSSE3-NEXT: movaps (%rax), %xmm0
+; SSSE3-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
+; SSSE3-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
+; SSSE3-NEXT: movl (%rax), %eax
+; SSSE3-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
+; SSSE3-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
; SSSE3-NEXT: andl $3, %eax
-; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3]
-; SSSE3-NEXT: movd %xmm1, %ecx
-; SSSE3-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp)
-; SSSE3-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp)
-; SSSE3-NEXT: andl $3, %ecx
; SSSE3-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; SSSE3-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
; SSSE3-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
diff --git a/llvm/test/CodeGen/X86/vec_int_to_fp.ll b/llvm/test/CodeGen/X86/vec_int_to_fp.ll
index 7bbcdee9a680..e26de4be7066 100644
--- a/llvm/test/CodeGen/X86/vec_int_to_fp.ll
+++ b/llvm/test/CodeGen/X86/vec_int_to_fp.ll
@@ -2911,23 +2911,12 @@ define <8 x float> @uitofp_16i8_to_8f32(<16 x i8> %a) {
;
define <2 x double> @sitofp_load_2i64_to_2f64(ptr%a) {
-; SSE2-LABEL: sitofp_load_2i64_to_2f64:
-; SSE2: # %bb.0:
-; SSE2-NEXT: movdqa (%rdi), %xmm1
-; SSE2-NEXT: cvtsi2sdq (%rdi), %xmm0
-; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
-; SSE2-NEXT: movq %xmm1, %rax
-; SSE2-NEXT: xorps %xmm1, %xmm1
-; SSE2-NEXT: cvtsi2sd %rax, %xmm1
-; SSE2-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
-; SSE2-NEXT: retq
-;
-; SSE41-LABEL: sitofp_load_2i64_to_2f64:
-; SSE41: # %bb.0:
-; SSE41-NEXT: cvtsi2sdq 8(%rdi), %xmm1
-; SSE41-NEXT: cvtsi2sdq (%rdi), %xmm0
-; SSE41-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
-; SSE41-NEXT: retq
+; SSE-LABEL: sitofp_load_2i64_to_2f64:
+; SSE: # %bb.0:
+; SSE-NEXT: cvtsi2sdq 8(%rdi), %xmm1
+; SSE-NEXT: cvtsi2sdq (%rdi), %xmm0
+; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE-NEXT: retq
;
; VEX-LABEL: sitofp_load_2i64_to_2f64:
; VEX: # %bb.0:
@@ -3093,35 +3082,16 @@ define <2 x double> @sitofp_load_2i8_to_2f64(ptr%a) {
}
define <4 x double> @sitofp_load_4i64_to_4f64(ptr%a) {
-; SSE2-LABEL: sitofp_load_4i64_to_4f64:
-; SSE2: # %bb.0:
-; SSE2-NEXT: movdqa (%rdi), %xmm1
-; SSE2-NEXT: movdqa 16(%rdi), %xmm2
-; SSE2-NEXT: cvtsi2sdq (%rdi), %xmm0
-; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
-; SSE2-NEXT: movq %xmm1, %rax
-; SSE2-NEXT: xorps %xmm1, %xmm1
-; SSE2-NEXT: cvtsi2sd %rax, %xmm1
-; SSE2-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
-; SSE2-NEXT: xorps %xmm1, %xmm1
-; SSE2-NEXT: cvtsi2sdq 16(%rdi), %xmm1
-; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,3,2,3]
-; SSE2-NEXT: movq %xmm2, %rax
-; SSE2-NEXT: xorps %xmm2, %xmm2
-; SSE2-NEXT: cvtsi2sd %rax, %xmm2
-; SSE2-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
-; SSE2-NEXT: retq
-;
-; SSE41-LABEL: sitofp_load_4i64_to_4f64:
-; SSE41: # %bb.0:
-; SSE41-NEXT: cvtsi2sdq 8(%rdi), %xmm1
-; SSE41-NEXT: cvtsi2sdq (%rdi), %xmm0
-; SSE41-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
-; SSE41-NEXT: cvtsi2sdq 24(%rdi), %xmm2
-; SSE41-NEXT: xorps %xmm1, %xmm1
-; SSE41-NEXT: cvtsi2sdq 16(%rdi), %xmm1
-; SSE41-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
-; SSE41-NEXT: retq
+; SSE-LABEL: sitofp_load_4i64_to_4f64:
+; SSE: # %bb.0:
+; SSE-NEXT: cvtsi2sdq 8(%rdi), %xmm1
+; SSE-NEXT: cvtsi2sdq (%rdi), %xmm0
+; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE-NEXT: cvtsi2sdq 24(%rdi), %xmm2
+; SSE-NEXT: xorps %xmm1, %xmm1
+; SSE-NEXT: cvtsi2sdq 16(%rdi), %xmm1
+; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; SSE-NEXT: retq
;
; VEX-LABEL: sitofp_load_4i64_to_4f64:
; VEX: # %bb.0:
@@ -3865,22 +3835,14 @@ define <4 x double> @uitofp_load_4i8_to_4f64(ptr%a) {
define <4 x float> @sitofp_load_4i64_to_4f32(ptr%a) {
; SSE2-LABEL: sitofp_load_4i64_to_4f32:
; SSE2: # %bb.0:
-; SSE2-NEXT: movdqa (%rdi), %xmm1
-; SSE2-NEXT: movdqa 16(%rdi), %xmm0
-; SSE2-NEXT: cvtsi2ssq 16(%rdi), %xmm2
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
-; SSE2-NEXT: movq %xmm0, %rax
-; SSE2-NEXT: xorps %xmm0, %xmm0
-; SSE2-NEXT: cvtsi2ss %rax, %xmm0
-; SSE2-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
+; SSE2-NEXT: cvtsi2ssq 24(%rdi), %xmm0
+; SSE2-NEXT: cvtsi2ssq 16(%rdi), %xmm1
+; SSE2-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; SSE2-NEXT: cvtsi2ssq 8(%rdi), %xmm2
; SSE2-NEXT: xorps %xmm0, %xmm0
; SSE2-NEXT: cvtsi2ssq (%rdi), %xmm0
-; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
-; SSE2-NEXT: movq %xmm1, %rax
-; SSE2-NEXT: xorps %xmm1, %xmm1
-; SSE2-NEXT: cvtsi2ss %rax, %xmm1
-; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
+; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSE2-NEXT: retq
;
; SSE41-LABEL: sitofp_load_4i64_to_4f32:
@@ -4015,39 +3977,24 @@ define <4 x float> @sitofp_load_4i8_to_4f32(ptr%a) {
define <8 x float> @sitofp_load_8i64_to_8f32(ptr%a) {
; SSE2-LABEL: sitofp_load_8i64_to_8f32:
; SSE2: # %bb.0:
-; SSE2-NEXT: movdqa (%rdi), %xmm1
-; SSE2-NEXT: movdqa 16(%rdi), %xmm0
-; SSE2-NEXT: movdqa 32(%rdi), %xmm2
-; SSE2-NEXT: movdqa 48(%rdi), %xmm3
-; SSE2-NEXT: cvtsi2ssq 16(%rdi), %xmm4
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
-; SSE2-NEXT: movq %xmm0, %rax
-; SSE2-NEXT: xorps %xmm0, %xmm0
-; SSE2-NEXT: cvtsi2ss %rax, %xmm0
-; SSE2-NEXT: unpcklps {{.*#+}} xmm4 = xmm4[0],xmm0[0],xmm4[1],xmm0[1]
+; SSE2-NEXT: cvtsi2ssq 24(%rdi), %xmm0
+; SSE2-NEXT: cvtsi2ssq 16(%rdi), %xmm1
+; SSE2-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; SSE2-NEXT: cvtsi2ssq 8(%rdi), %xmm2
; SSE2-NEXT: xorps %xmm0, %xmm0
; SSE2-NEXT: cvtsi2ssq (%rdi), %xmm0
-; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
-; SSE2-NEXT: movq %xmm1, %rax
-; SSE2-NEXT: xorps %xmm1, %xmm1
-; SSE2-NEXT: cvtsi2ss %rax, %xmm1
-; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm4[0]
-; SSE2-NEXT: xorps %xmm4, %xmm4
-; SSE2-NEXT: cvtsi2ssq 48(%rdi), %xmm4
-; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[2,3,2,3]
-; SSE2-NEXT: movq %xmm1, %rax
+; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSE2-NEXT: xorps %xmm1, %xmm1
-; SSE2-NEXT: cvtsi2ss %rax, %xmm1
-; SSE2-NEXT: unpcklps {{.*#+}} xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1]
+; SSE2-NEXT: cvtsi2ssq 56(%rdi), %xmm1
+; SSE2-NEXT: xorps %xmm2, %xmm2
+; SSE2-NEXT: cvtsi2ssq 48(%rdi), %xmm2
+; SSE2-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; SSE2-NEXT: cvtsi2ssq 40(%rdi), %xmm3
; SSE2-NEXT: xorps %xmm1, %xmm1
; SSE2-NEXT: cvtsi2ssq 32(%rdi), %xmm1
-; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,3,2,3]
-; SSE2-NEXT: movq %xmm2, %rax
-; SSE2-NEXT: xorps %xmm2, %xmm2
-; SSE2-NEXT: cvtsi2ss %rax, %xmm2
-; SSE2-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
-; SSE2-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm4[0]
+; SSE2-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
+; SSE2-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0]
; SSE2-NEXT: retq
;
; SSE41-LABEL: sitofp_load_8i64_to_8f32:
@@ -4256,70 +4203,64 @@ define <8 x float> @sitofp_load_8i8_to_8f32(ptr%a) {
define <4 x float> @uitofp_load_4i64_to_4f32(ptr%a) {
; SSE2-LABEL: uitofp_load_4i64_to_4f32:
; SSE2: # %bb.0:
-; SSE2-NEXT: movdqa 16(%rdi), %xmm0
-; SSE2-NEXT: movq 16(%rdi), %rax
+; SSE2-NEXT: movq 24(%rdi), %rax
; SSE2-NEXT: testq %rax, %rax
; SSE2-NEXT: js .LBB83_1
; SSE2-NEXT: # %bb.2:
-; SSE2-NEXT: cvtsi2ss %rax, %xmm1
+; SSE2-NEXT: cvtsi2ss %rax, %xmm0
; SSE2-NEXT: jmp .LBB83_3
; SSE2-NEXT: .LBB83_1:
; SSE2-NEXT: movq %rax, %rcx
; SSE2-NEXT: shrq %rcx
; SSE2-NEXT: andl $1, %eax
; SSE2-NEXT: orq %rcx, %rax
-; SSE2-NEXT: cvtsi2ss %rax, %xmm1
-; SSE2-NEXT: addss %xmm1, %xmm1
+; SSE2-NEXT: cvtsi2ss %rax, %xmm0
+; SSE2-NEXT: addss %xmm0, %xmm0
; SSE2-NEXT: .LBB83_3:
-; SSE2-NEXT: movq (%rdi), %rax
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
-; SSE2-NEXT: movq %xmm0, %rcx
-; SSE2-NEXT: testq %rcx, %rcx
+; SSE2-NEXT: movq 16(%rdi), %rax
+; SSE2-NEXT: testq %rax, %rax
; SSE2-NEXT: js .LBB83_4
; SSE2-NEXT: # %bb.5:
-; SSE2-NEXT: cvtsi2ss %rcx, %xmm2
+; SSE2-NEXT: cvtsi2ss %rax, %xmm1
; SSE2-NEXT: jmp .LBB83_6
; SSE2-NEXT: .LBB83_4:
+; SSE2-NEXT: movq %rax, %rcx
+; SSE2-NEXT: shrq %rcx
+; SSE2-NEXT: andl $1, %eax
+; SSE2-NEXT: orq %rcx, %rax
+; SSE2-NEXT: cvtsi2ss %rax, %xmm1
+; SSE2-NEXT: addss %xmm1, %xmm1
+; SSE2-NEXT: .LBB83_6:
+; SSE2-NEXT: movq (%rdi), %rax
+; SSE2-NEXT: movq 8(%rdi), %rcx
+; SSE2-NEXT: testq %rcx, %rcx
+; SSE2-NEXT: js .LBB83_7
+; SSE2-NEXT: # %bb.8:
+; SSE2-NEXT: cvtsi2ss %rcx, %xmm2
+; SSE2-NEXT: jmp .LBB83_9
+; SSE2-NEXT: .LBB83_7:
; SSE2-NEXT: movq %rcx, %rdx
; SSE2-NEXT: shrq %rdx
; SSE2-NEXT: andl $1, %ecx
; SSE2-NEXT: orq %rdx, %rcx
; SSE2-NEXT: cvtsi2ss %rcx, %xmm2
; SSE2-NEXT: addss %xmm2, %xmm2
-; SSE2-NEXT: .LBB83_6:
-; SSE2-NEXT: movdqa (%rdi), %xmm3
-; SSE2-NEXT: testq %rax, %rax
-; SSE2-NEXT: js .LBB83_7
-; SSE2-NEXT: # %bb.8:
-; SSE2-NEXT: xorps %xmm0, %xmm0
-; SSE2-NEXT: cvtsi2ss %rax, %xmm0
-; SSE2-NEXT: jmp .LBB83_9
-; SSE2-NEXT: .LBB83_7:
-; SSE2-NEXT: movq %rax, %rcx
-; SSE2-NEXT: shrq %rcx
-; SSE2-NEXT: andl $1, %eax
-; SSE2-NEXT: orq %rcx, %rax
-; SSE2-NEXT: xorps %xmm0, %xmm0
-; SSE2-NEXT: cvtsi2ss %rax, %xmm0
-; SSE2-NEXT: addss %xmm0, %xmm0
; SSE2-NEXT: .LBB83_9:
-; SSE2-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
-; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[2,3,2,3]
-; SSE2-NEXT: movq %xmm2, %rax
+; SSE2-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
; SSE2-NEXT: testq %rax, %rax
; SSE2-NEXT: js .LBB83_10
; SSE2-NEXT: # %bb.11:
-; SSE2-NEXT: xorps %xmm2, %xmm2
-; SSE2-NEXT: cvtsi2ss %rax, %xmm2
+; SSE2-NEXT: xorps %xmm0, %xmm0
+; SSE2-NEXT: cvtsi2ss %rax, %xmm0
; SSE2-NEXT: jmp .LBB83_12
; SSE2-NEXT: .LBB83_10:
; SSE2-NEXT: movq %rax, %rcx
; SSE2-NEXT: shrq %rcx
; SSE2-NEXT: andl $1, %eax
; SSE2-NEXT: orq %rcx, %rax
-; SSE2-NEXT: xorps %xmm2, %xmm2
-; SSE2-NEXT: cvtsi2ss %rax, %xmm2
-; SSE2-NEXT: addss %xmm2, %xmm2
+; SSE2-NEXT: xorps %xmm0, %xmm0
+; SSE2-NEXT: cvtsi2ss %rax, %xmm0
+; SSE2-NEXT: addss %xmm0, %xmm0
; SSE2-NEXT: .LBB83_12:
; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
@@ -4591,8 +4532,7 @@ define <4 x float> @uitofp_load_4i8_to_4f32(ptr%a) {
define <8 x float> @uitofp_load_8i64_to_8f32(ptr%a) {
; SSE2-LABEL: uitofp_load_8i64_to_8f32:
; SSE2: # %bb.0:
-; SSE2-NEXT: movdqa 16(%rdi), %xmm0
-; SSE2-NEXT: movq 16(%rdi), %rax
+; SSE2-NEXT: movq 24(%rdi), %rax
; SSE2-NEXT: testq %rax, %rax
; SSE2-NEXT: js .LBB87_1
; SSE2-NEXT: # %bb.2:
@@ -4606,127 +4546,114 @@ define <8 x float> @uitofp_load_8i64_to_8f32(ptr%a) {
; SSE2-NEXT: cvtsi2ss %rax, %xmm2
; SSE2-NEXT: addss %xmm2, %xmm2
; SSE2-NEXT: .LBB87_3:
-; SSE2-NEXT: movq (%rdi), %rax
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
-; SSE2-NEXT: movq %xmm0, %rcx
-; SSE2-NEXT: testq %rcx, %rcx
+; SSE2-NEXT: movq 16(%rdi), %rax
+; SSE2-NEXT: testq %rax, %rax
; SSE2-NEXT: js .LBB87_4
; SSE2-NEXT: # %bb.5:
-; SSE2-NEXT: cvtsi2ss %rcx, %xmm1
+; SSE2-NEXT: cvtsi2ss %rax, %xmm1
; SSE2-NEXT: jmp .LBB87_6
; SSE2-NEXT: .LBB87_4:
-; SSE2-NEXT: movq %rcx, %rdx
-; SSE2-NEXT: shrq %rdx
-; SSE2-NEXT: andl $1, %ecx
-; SSE2-NEXT: orq %rdx, %rcx
-; SSE2-NEXT: cvtsi2ss %rcx, %xmm1
+; SSE2-NEXT: movq %rax, %rcx
+; SSE2-NEXT: shrq %rcx
+; SSE2-NEXT: andl $1, %eax
+; SSE2-NEXT: orq %rcx, %rax
+; SSE2-NEXT: cvtsi2ss %rax, %xmm1
; SSE2-NEXT: addss %xmm1, %xmm1
; SSE2-NEXT: .LBB87_6:
-; SSE2-NEXT: movdqa (%rdi), %xmm3
-; SSE2-NEXT: testq %rax, %rax
+; SSE2-NEXT: movq (%rdi), %rax
+; SSE2-NEXT: movq 8(%rdi), %rcx
+; SSE2-NEXT: testq %rcx, %rcx
; SSE2-NEXT: js .LBB87_7
; SSE2-NEXT: # %bb.8:
-; SSE2-NEXT: xorps %xmm0, %xmm0
-; SSE2-NEXT: cvtsi2ss %rax, %xmm0
-; SSE2-NEXT: jmp .LBB87_9
-; SSE2-NEXT: .LBB87_7:
+; SSE2-NEXT: cvtsi2ss %rcx, %xmm3
+; SSE2-NEXT: testq %rax, %rax
+; SSE2-NEXT: jns .LBB87_11
+; SSE2-NEXT: .LBB87_10:
; SSE2-NEXT: movq %rax, %rcx
; SSE2-NEXT: shrq %rcx
; SSE2-NEXT: andl $1, %eax
; SSE2-NEXT: orq %rcx, %rax
-; SSE2-NEXT: xorps %xmm0, %xmm0
; SSE2-NEXT: cvtsi2ss %rax, %xmm0
; SSE2-NEXT: addss %xmm0, %xmm0
-; SSE2-NEXT: .LBB87_9:
-; SSE2-NEXT: movq 48(%rdi), %rax
-; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[2,3,2,3]
-; SSE2-NEXT: movq %xmm3, %rcx
-; SSE2-NEXT: testq %rcx, %rcx
-; SSE2-NEXT: js .LBB87_10
-; SSE2-NEXT: # %bb.11:
-; SSE2-NEXT: cvtsi2ss %rcx, %xmm4
; SSE2-NEXT: jmp .LBB87_12
-; SSE2-NEXT: .LBB87_10:
+; SSE2-NEXT: .LBB87_7:
; SSE2-NEXT: movq %rcx, %rdx
; SSE2-NEXT: shrq %rdx
; SSE2-NEXT: andl $1, %ecx
; SSE2-NEXT: orq %rdx, %rcx
-; SSE2-NEXT: cvtsi2ss %rcx, %xmm4
-; SSE2-NEXT: addss %xmm4, %xmm4
+; SSE2-NEXT: cvtsi2ss %rcx, %xmm3
+; SSE2-NEXT: addss %xmm3, %xmm3
+; SSE2-NEXT: testq %rax, %rax
+; SSE2-NEXT: js .LBB87_10
+; SSE2-NEXT: .LBB87_11:
+; SSE2-NEXT: cvtsi2ss %rax, %xmm0
; SSE2-NEXT: .LBB87_12:
-; SSE2-NEXT: movdqa 48(%rdi), %xmm5
+; SSE2-NEXT: movq 56(%rdi), %rax
; SSE2-NEXT: testq %rax, %rax
; SSE2-NEXT: js .LBB87_13
; SSE2-NEXT: # %bb.14:
-; SSE2-NEXT: xorps %xmm3, %xmm3
-; SSE2-NEXT: cvtsi2ss %rax, %xmm3
+; SSE2-NEXT: cvtsi2ss %rax, %xmm5
; SSE2-NEXT: jmp .LBB87_15
; SSE2-NEXT: .LBB87_13:
; SSE2-NEXT: movq %rax, %rcx
; SSE2-NEXT: shrq %rcx
; SSE2-NEXT: andl $1, %eax
; SSE2-NEXT: orq %rcx, %rax
-; SSE2-NEXT: xorps %xmm3, %xmm3
-; SSE2-NEXT: cvtsi2ss %rax, %xmm3
-; SSE2-NEXT: addss %xmm3, %xmm3
+; SSE2-NEXT: cvtsi2ss %rax, %xmm5
+; SSE2-NEXT: addss %xmm5, %xmm5
; SSE2-NEXT: .LBB87_15:
-; SSE2-NEXT: movq 32(%rdi), %rax
-; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm5[2,3,2,3]
-; SSE2-NEXT: movq %xmm5, %rcx
-; SSE2-NEXT: testq %rcx, %rcx
+; SSE2-NEXT: movq 48(%rdi), %rax
+; SSE2-NEXT: testq %rax, %rax
; SSE2-NEXT: js .LBB87_16
; SSE2-NEXT: # %bb.17:
-; SSE2-NEXT: xorps %xmm5, %xmm5
-; SSE2-NEXT: cvtsi2ss %rcx, %xmm5
+; SSE2-NEXT: cvtsi2ss %rax, %xmm4
; SSE2-NEXT: jmp .LBB87_18
; SSE2-NEXT: .LBB87_16:
-; SSE2-NEXT: movq %rcx, %rdx
-; SSE2-NEXT: shrq %rdx
-; SSE2-NEXT: andl $1, %ecx
-; SSE2-NEXT: orq %rdx, %rcx
-; SSE2-NEXT: xorps %xmm5, %xmm5
-; SSE2-NEXT: cvtsi2ss %rcx, %xmm5
-; SSE2-NEXT: addss %xmm5, %xmm5
+; SSE2-NEXT: movq %rax, %rcx
+; SSE2-NEXT: shrq %rcx
+; SSE2-NEXT: andl $1, %eax
+; SSE2-NEXT: orq %rcx, %rax
+; SSE2-NEXT: cvtsi2ss %rax, %xmm4
+; SSE2-NEXT: addss %xmm4, %xmm4
; SSE2-NEXT: .LBB87_18:
-; SSE2-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
-; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1]
-; SSE2-NEXT: movdqa 32(%rdi), %xmm4
+; SSE2-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
+; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
+; SSE2-NEXT: movq 40(%rdi), %rax
; SSE2-NEXT: testq %rax, %rax
; SSE2-NEXT: js .LBB87_19
; SSE2-NEXT: # %bb.20:
-; SSE2-NEXT: xorps %xmm1, %xmm1
-; SSE2-NEXT: cvtsi2ss %rax, %xmm1
+; SSE2-NEXT: xorps %xmm2, %xmm2
+; SSE2-NEXT: cvtsi2ss %rax, %xmm2
; SSE2-NEXT: jmp .LBB87_21
; SSE2-NEXT: .LBB87_19:
; SSE2-NEXT: movq %rax, %rcx
; SSE2-NEXT: shrq %rcx
; SSE2-NEXT: andl $1, %eax
; SSE2-NEXT: orq %rcx, %rax
-; SSE2-NEXT: xorps %xmm1, %xmm1
-; SSE2-NEXT: cvtsi2ss %rax, %xmm1
-; SSE2-NEXT: addss %xmm1, %xmm1
+; SSE2-NEXT: xorps %xmm2, %xmm2
+; SSE2-NEXT: cvtsi2ss %rax, %xmm2
+; SSE2-NEXT: addss %xmm2, %xmm2
; SSE2-NEXT: .LBB87_21:
-; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
-; SSE2-NEXT: unpcklps {{.*#+}} xmm3 = xmm3[0],xmm5[0],xmm3[1],xmm5[1]
-; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm4[2,3,2,3]
-; SSE2-NEXT: movq %xmm2, %rax
+; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE2-NEXT: unpcklps {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[1],xmm5[1]
+; SSE2-NEXT: movq 32(%rdi), %rax
; SSE2-NEXT: testq %rax, %rax
; SSE2-NEXT: js .LBB87_22
; SSE2-NEXT: # %bb.23:
-; SSE2-NEXT: xorps %xmm2, %xmm2
-; SSE2-NEXT: cvtsi2ss %rax, %xmm2
+; SSE2-NEXT: xorps %xmm1, %xmm1
+; SSE2-NEXT: cvtsi2ss %rax, %xmm1
; SSE2-NEXT: jmp .LBB87_24
; SSE2-NEXT: .LBB87_22:
; SSE2-NEXT: movq %rax, %rcx
; SSE2-NEXT: shrq %rcx
; SSE2-NEXT: andl $1, %eax
; SSE2-NEXT: orq %rcx, %rax
-; SSE2-NEXT: xorps %xmm2, %xmm2
-; SSE2-NEXT: cvtsi2ss %rax, %xmm2
-; SSE2-NEXT: addss %xmm2, %xmm2
+; SSE2-NEXT: xorps %xmm1, %xmm1
+; SSE2-NEXT: cvtsi2ss %rax, %xmm1
+; SSE2-NEXT: addss %xmm1, %xmm1
; SSE2-NEXT: .LBB87_24:
; SSE2-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
-; SSE2-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm3[0]
+; SSE2-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm4[0]
; SSE2-NEXT: retq
;
; SSE41-LABEL: uitofp_load_8i64_to_8f32:
diff --git a/llvm/test/Instrumentation/MemorySanitizer/AArch64/vararg_shadow.ll b/llvm/test/Instrumentation/MemorySanitizer/AArch64/vararg_shadow.ll
index 96ac4b6088c3..9133b329deb2 100644
--- a/llvm/test/Instrumentation/MemorySanitizer/AArch64/vararg_shadow.ll
+++ b/llvm/test/Instrumentation/MemorySanitizer/AArch64/vararg_shadow.ll
@@ -758,7 +758,7 @@ define linkonce_odr dso_local void @_Z5test2IcEvT_iz(i8 noundef %t, i32 noundef
; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 193514046488576
; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP9]], i8 0, i64 32, i1 false)
-; CHECK-NEXT: call void @llvm.va_start(ptr nonnull [[ARGS]])
+; CHECK-NEXT: call void @llvm.va_start.p0(ptr nonnull [[ARGS]])
; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[ARGS]] to i64
; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[TMP10]], 0
; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
@@ -808,7 +808,7 @@ define linkonce_odr dso_local void @_Z5test2IcEvT_iz(i8 noundef %t, i32 noundef
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[TMP52]], ptr align 16 [[TMP53]], i64 [[TMP0]], i1 false)
; CHECK-NEXT: store i64 0, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @_Z3usePv(ptr noundef nonnull [[ARGS]])
-; CHECK-NEXT: call void @llvm.va_end(ptr nonnull [[ARGS]])
+; CHECK-NEXT: call void @llvm.va_end.p0(ptr nonnull [[ARGS]])
; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 32, ptr nonnull [[ARGS]])
; CHECK-NEXT: ret void
;
@@ -851,7 +851,7 @@ define linkonce_odr dso_local void @_Z5test2IiEvT_iz(i32 noundef %t, i32 noundef
; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 193514046488576
; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP9]], i8 0, i64 32, i1 false)
-; CHECK-NEXT: call void @llvm.va_start(ptr nonnull [[ARGS]])
+; CHECK-NEXT: call void @llvm.va_start.p0(ptr nonnull [[ARGS]])
; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[ARGS]] to i64
; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[TMP10]], 0
; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
@@ -901,7 +901,7 @@ define linkonce_odr dso_local void @_Z5test2IiEvT_iz(i32 noundef %t, i32 noundef
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[TMP52]], ptr align 16 [[TMP53]], i64 [[TMP0]], i1 false)
; CHECK-NEXT: store i64 0, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @_Z3usePv(ptr noundef nonnull [[ARGS]])
-; CHECK-NEXT: call void @llvm.va_end(ptr nonnull [[ARGS]])
+; CHECK-NEXT: call void @llvm.va_end.p0(ptr nonnull [[ARGS]])
; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 32, ptr nonnull [[ARGS]])
; CHECK-NEXT: ret void
;
@@ -936,7 +936,7 @@ define linkonce_odr dso_local void @_Z5test2IfEvT_iz(float noundef %t, i32 nound
; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 193514046488576
; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP9]], i8 0, i64 32, i1 false)
-; CHECK-NEXT: call void @llvm.va_start(ptr nonnull [[ARGS]])
+; CHECK-NEXT: call void @llvm.va_start.p0(ptr nonnull [[ARGS]])
; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[ARGS]] to i64
; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[TMP10]], 0
; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
@@ -986,7 +986,7 @@ define linkonce_odr dso_local void @_Z5test2IfEvT_iz(float noundef %t, i32 nound
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[TMP52]], ptr align 16 [[TMP53]], i64 [[TMP0]], i1 false)
; CHECK-NEXT: store i64 0, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @_Z3usePv(ptr noundef nonnull [[ARGS]])
-; CHECK-NEXT: call void @llvm.va_end(ptr nonnull [[ARGS]])
+; CHECK-NEXT: call void @llvm.va_end.p0(ptr nonnull [[ARGS]])
; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 32, ptr nonnull [[ARGS]])
; CHECK-NEXT: ret void
;
@@ -1021,7 +1021,7 @@ define linkonce_odr dso_local void @_Z5test2IdEvT_iz(double noundef %t, i32 noun
; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 193514046488576
; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP9]], i8 0, i64 32, i1 false)
-; CHECK-NEXT: call void @llvm.va_start(ptr nonnull [[ARGS]])
+; CHECK-NEXT: call void @llvm.va_start.p0(ptr nonnull [[ARGS]])
; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[ARGS]] to i64
; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[TMP10]], 0
; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
@@ -1071,7 +1071,7 @@ define linkonce_odr dso_local void @_Z5test2IdEvT_iz(double noundef %t, i32 noun
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[TMP52]], ptr align 16 [[TMP53]], i64 [[TMP0]], i1 false)
; CHECK-NEXT: store i64 0, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @_Z3usePv(ptr noundef nonnull [[ARGS]])
-; CHECK-NEXT: call void @llvm.va_end(ptr nonnull [[ARGS]])
+; CHECK-NEXT: call void @llvm.va_end.p0(ptr nonnull [[ARGS]])
; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 32, ptr nonnull [[ARGS]])
; CHECK-NEXT: ret void
;
@@ -1106,7 +1106,7 @@ define linkonce_odr dso_local void @_Z5test2IeEvT_iz(fp128 noundef %t, i32 nound
; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 193514046488576
; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP9]], i8 0, i64 32, i1 false)
-; CHECK-NEXT: call void @llvm.va_start(ptr nonnull [[ARGS]])
+; CHECK-NEXT: call void @llvm.va_start.p0(ptr nonnull [[ARGS]])
; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[ARGS]] to i64
; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[TMP10]], 0
; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
@@ -1156,7 +1156,7 @@ define linkonce_odr dso_local void @_Z5test2IeEvT_iz(fp128 noundef %t, i32 nound
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[TMP52]], ptr align 16 [[TMP53]], i64 [[TMP0]], i1 false)
; CHECK-NEXT: store i64 0, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @_Z3usePv(ptr noundef nonnull [[ARGS]])
-; CHECK-NEXT: call void @llvm.va_end(ptr nonnull [[ARGS]])
+; CHECK-NEXT: call void @llvm.va_end.p0(ptr nonnull [[ARGS]])
; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 32, ptr nonnull [[ARGS]])
; CHECK-NEXT: ret void
;
@@ -1191,7 +1191,7 @@ define linkonce_odr dso_local void @_Z5test2I6IntIntEvT_iz(i64 %t.coerce, i32 no
; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 193514046488576
; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP9]], i8 0, i64 32, i1 false)
-; CHECK-NEXT: call void @llvm.va_start(ptr nonnull [[ARGS]])
+; CHECK-NEXT: call void @llvm.va_start.p0(ptr nonnull [[ARGS]])
; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[ARGS]] to i64
; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[TMP10]], 0
; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
@@ -1241,7 +1241,7 @@ define linkonce_odr dso_local void @_Z5test2I6IntIntEvT_iz(i64 %t.coerce, i32 no
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[TMP52]], ptr align 16 [[TMP53]], i64 [[TMP0]], i1 false)
; CHECK-NEXT: store i64 0, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @_Z3usePv(ptr noundef nonnull [[ARGS]])
-; CHECK-NEXT: call void @llvm.va_end(ptr nonnull [[ARGS]])
+; CHECK-NEXT: call void @llvm.va_end.p0(ptr nonnull [[ARGS]])
; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 32, ptr nonnull [[ARGS]])
; CHECK-NEXT: ret void
;
@@ -1276,7 +1276,7 @@ define linkonce_odr dso_local void @_Z5test2I10Int64Int64EvT_iz([2 x i64] %t.coe
; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 193514046488576
; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP9]], i8 0, i64 32, i1 false)
-; CHECK-NEXT: call void @llvm.va_start(ptr nonnull [[ARGS]])
+; CHECK-NEXT: call void @llvm.va_start.p0(ptr nonnull [[ARGS]])
; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[ARGS]] to i64
; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[TMP10]], 0
; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
@@ -1326,7 +1326,7 @@ define linkonce_odr dso_local void @_Z5test2I10Int64Int64EvT_iz([2 x i64] %t.coe
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[TMP52]], ptr align 16 [[TMP53]], i64 [[TMP0]], i1 false)
; CHECK-NEXT: store i64 0, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @_Z3usePv(ptr noundef nonnull [[ARGS]])
-; CHECK-NEXT: call void @llvm.va_end(ptr nonnull [[ARGS]])
+; CHECK-NEXT: call void @llvm.va_end.p0(ptr nonnull [[ARGS]])
; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 32, ptr nonnull [[ARGS]])
; CHECK-NEXT: ret void
;
@@ -1361,7 +1361,7 @@ define linkonce_odr dso_local void @_Z5test2I12DoubleDoubleEvT_iz([2 x double] a
; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 193514046488576
; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP9]], i8 0, i64 32, i1 false)
-; CHECK-NEXT: call void @llvm.va_start(ptr nonnull [[ARGS]])
+; CHECK-NEXT: call void @llvm.va_start.p0(ptr nonnull [[ARGS]])
; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[ARGS]] to i64
; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[TMP10]], 0
; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
@@ -1411,7 +1411,7 @@ define linkonce_odr dso_local void @_Z5test2I12DoubleDoubleEvT_iz([2 x double] a
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[TMP52]], ptr align 16 [[TMP53]], i64 [[TMP0]], i1 false)
; CHECK-NEXT: store i64 0, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @_Z3usePv(ptr noundef nonnull [[ARGS]])
-; CHECK-NEXT: call void @llvm.va_end(ptr nonnull [[ARGS]])
+; CHECK-NEXT: call void @llvm.va_end.p0(ptr nonnull [[ARGS]])
; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 32, ptr nonnull [[ARGS]])
; CHECK-NEXT: ret void
;
@@ -1446,7 +1446,7 @@ define linkonce_odr dso_local void @_Z5test2I7Double4EvT_iz([4 x double] alignst
; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 193514046488576
; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP9]], i8 0, i64 32, i1 false)
-; CHECK-NEXT: call void @llvm.va_start(ptr nonnull [[ARGS]])
+; CHECK-NEXT: call void @llvm.va_start.p0(ptr nonnull [[ARGS]])
; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[ARGS]] to i64
; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[TMP10]], 0
; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
@@ -1496,7 +1496,7 @@ define linkonce_odr dso_local void @_Z5test2I7Double4EvT_iz([4 x double] alignst
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[TMP52]], ptr align 16 [[TMP53]], i64 [[TMP0]], i1 false)
; CHECK-NEXT: store i64 0, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @_Z3usePv(ptr noundef nonnull [[ARGS]])
-; CHECK-NEXT: call void @llvm.va_end(ptr nonnull [[ARGS]])
+; CHECK-NEXT: call void @llvm.va_end.p0(ptr nonnull [[ARGS]])
; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 32, ptr nonnull [[ARGS]])
; CHECK-NEXT: ret void
;
@@ -1531,7 +1531,7 @@ define linkonce_odr dso_local void @_Z5test2I11DoubleFloatEvT_iz([2 x i64] %t.co
; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 193514046488576
; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP9]], i8 0, i64 32, i1 false)
-; CHECK-NEXT: call void @llvm.va_start(ptr nonnull [[ARGS]])
+; CHECK-NEXT: call void @llvm.va_start.p0(ptr nonnull [[ARGS]])
; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[ARGS]] to i64
; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[TMP10]], 0
; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
@@ -1581,7 +1581,7 @@ define linkonce_odr dso_local void @_Z5test2I11DoubleFloatEvT_iz([2 x i64] %t.co
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[TMP52]], ptr align 16 [[TMP53]], i64 [[TMP0]], i1 false)
; CHECK-NEXT: store i64 0, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @_Z3usePv(ptr noundef nonnull [[ARGS]])
-; CHECK-NEXT: call void @llvm.va_end(ptr nonnull [[ARGS]])
+; CHECK-NEXT: call void @llvm.va_end.p0(ptr nonnull [[ARGS]])
; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 32, ptr nonnull [[ARGS]])
; CHECK-NEXT: ret void
;
@@ -1616,7 +1616,7 @@ define linkonce_odr dso_local void @_Z5test2I11LongDouble2EvT_iz([2 x fp128] ali
; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 193514046488576
; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP9]], i8 0, i64 32, i1 false)
-; CHECK-NEXT: call void @llvm.va_start(ptr nonnull [[ARGS]])
+; CHECK-NEXT: call void @llvm.va_start.p0(ptr nonnull [[ARGS]])
; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[ARGS]] to i64
; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[TMP10]], 0
; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
@@ -1666,7 +1666,7 @@ define linkonce_odr dso_local void @_Z5test2I11LongDouble2EvT_iz([2 x fp128] ali
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[TMP52]], ptr align 16 [[TMP53]], i64 [[TMP0]], i1 false)
; CHECK-NEXT: store i64 0, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @_Z3usePv(ptr noundef nonnull [[ARGS]])
-; CHECK-NEXT: call void @llvm.va_end(ptr nonnull [[ARGS]])
+; CHECK-NEXT: call void @llvm.va_end.p0(ptr nonnull [[ARGS]])
; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 32, ptr nonnull [[ARGS]])
; CHECK-NEXT: ret void
;
@@ -1701,7 +1701,7 @@ define linkonce_odr dso_local void @_Z5test2I11LongDouble4EvT_iz([4 x fp128] ali
; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 193514046488576
; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP9]], i8 0, i64 32, i1 false)
-; CHECK-NEXT: call void @llvm.va_start(ptr nonnull [[ARGS]])
+; CHECK-NEXT: call void @llvm.va_start.p0(ptr nonnull [[ARGS]])
; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[ARGS]] to i64
; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[TMP10]], 0
; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
@@ -1751,7 +1751,7 @@ define linkonce_odr dso_local void @_Z5test2I11LongDouble4EvT_iz([4 x fp128] ali
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[TMP52]], ptr align 16 [[TMP53]], i64 [[TMP0]], i1 false)
; CHECK-NEXT: store i64 0, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @_Z3usePv(ptr noundef nonnull [[ARGS]])
-; CHECK-NEXT: call void @llvm.va_end(ptr nonnull [[ARGS]])
+; CHECK-NEXT: call void @llvm.va_end.p0(ptr nonnull [[ARGS]])
; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 32, ptr nonnull [[ARGS]])
; CHECK-NEXT: ret void
;
diff --git a/llvm/test/Instrumentation/MemorySanitizer/SystemZ/vararg-kernel.ll b/llvm/test/Instrumentation/MemorySanitizer/SystemZ/vararg-kernel.ll
index 1535fccfc211..e0b5907719af 100644
--- a/llvm/test/Instrumentation/MemorySanitizer/SystemZ/vararg-kernel.ll
+++ b/llvm/test/Instrumentation/MemorySanitizer/SystemZ/vararg-kernel.ll
@@ -39,7 +39,7 @@ define i64 @foo(i64 %guard, ...) #1 {
; Only 56 bytes of the register save area is copied, because of
; "use-soft-float".
-; CHECK: call void @llvm.va_start(ptr %vl)
+; CHECK: call void @llvm.va_start.p0(ptr %vl)
; CHECK: [[VlAddr:%.*]] = ptrtoint ptr %vl to i64
; CHECK: [[RegSaveAreaAddrAddr:%.*]] = add i64 [[VlAddr]], 24
; CHECK: [[RegSaveAreaAddr:%.*]] = inttoptr i64 [[RegSaveAreaAddrAddr]] to ptr
diff --git a/llvm/test/Instrumentation/MemorySanitizer/X86/vararg_shadow.ll b/llvm/test/Instrumentation/MemorySanitizer/X86/vararg_shadow.ll
index aff4d2c55ad6..205101564dfe 100644
--- a/llvm/test/Instrumentation/MemorySanitizer/X86/vararg_shadow.ll
+++ b/llvm/test/Instrumentation/MemorySanitizer/X86/vararg_shadow.ll
@@ -560,7 +560,7 @@ define linkonce_odr dso_local void @_Z5test2IcEvT_iz(i8 noundef signext %t, i32
; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080
; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP9]], i8 0, i64 24, i1 false)
-; CHECK-NEXT: call void @llvm.va_start(ptr nonnull [[ARGS]])
+; CHECK-NEXT: call void @llvm.va_start.p0(ptr nonnull [[ARGS]])
; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[ARGS]] to i64
; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[TMP10]], 16
; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
@@ -580,7 +580,7 @@ define linkonce_odr dso_local void @_Z5test2IcEvT_iz(i8 noundef signext %t, i32
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[TMP23]], ptr align 16 [[TMP24]], i64 [[TMP0]], i1 false)
; CHECK-NEXT: store i64 0, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @_Z3usePv(ptr noundef nonnull [[ARGS]])
-; CHECK-NEXT: call void @llvm.va_end(ptr nonnull [[ARGS]])
+; CHECK-NEXT: call void @llvm.va_end.p0(ptr nonnull [[ARGS]])
; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 24, ptr nonnull [[ARGS]])
; CHECK-NEXT: ret void
;
@@ -623,7 +623,7 @@ define linkonce_odr dso_local void @_Z5test2IiEvT_iz(i32 noundef %t, i32 noundef
; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080
; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP9]], i8 0, i64 24, i1 false)
-; CHECK-NEXT: call void @llvm.va_start(ptr nonnull [[ARGS]])
+; CHECK-NEXT: call void @llvm.va_start.p0(ptr nonnull [[ARGS]])
; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[ARGS]] to i64
; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[TMP10]], 16
; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
@@ -643,7 +643,7 @@ define linkonce_odr dso_local void @_Z5test2IiEvT_iz(i32 noundef %t, i32 noundef
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[TMP23]], ptr align 16 [[TMP24]], i64 [[TMP0]], i1 false)
; CHECK-NEXT: store i64 0, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @_Z3usePv(ptr noundef nonnull [[ARGS]])
-; CHECK-NEXT: call void @llvm.va_end(ptr nonnull [[ARGS]])
+; CHECK-NEXT: call void @llvm.va_end.p0(ptr nonnull [[ARGS]])
; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 24, ptr nonnull [[ARGS]])
; CHECK-NEXT: ret void
;
@@ -678,7 +678,7 @@ define linkonce_odr dso_local void @_Z5test2IfEvT_iz(float noundef %t, i32 nound
; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080
; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP9]], i8 0, i64 24, i1 false)
-; CHECK-NEXT: call void @llvm.va_start(ptr nonnull [[ARGS]])
+; CHECK-NEXT: call void @llvm.va_start.p0(ptr nonnull [[ARGS]])
; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[ARGS]] to i64
; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[TMP10]], 16
; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
@@ -698,7 +698,7 @@ define linkonce_odr dso_local void @_Z5test2IfEvT_iz(float noundef %t, i32 nound
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[TMP23]], ptr align 16 [[TMP24]], i64 [[TMP0]], i1 false)
; CHECK-NEXT: store i64 0, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @_Z3usePv(ptr noundef nonnull [[ARGS]])
-; CHECK-NEXT: call void @llvm.va_end(ptr nonnull [[ARGS]])
+; CHECK-NEXT: call void @llvm.va_end.p0(ptr nonnull [[ARGS]])
; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 24, ptr nonnull [[ARGS]])
; CHECK-NEXT: ret void
;
@@ -733,7 +733,7 @@ define linkonce_odr dso_local void @_Z5test2IdEvT_iz(double noundef %t, i32 noun
; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080
; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP9]], i8 0, i64 24, i1 false)
-; CHECK-NEXT: call void @llvm.va_start(ptr nonnull [[ARGS]])
+; CHECK-NEXT: call void @llvm.va_start.p0(ptr nonnull [[ARGS]])
; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[ARGS]] to i64
; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[TMP10]], 16
; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
@@ -753,7 +753,7 @@ define linkonce_odr dso_local void @_Z5test2IdEvT_iz(double noundef %t, i32 noun
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[TMP23]], ptr align 16 [[TMP24]], i64 [[TMP0]], i1 false)
; CHECK-NEXT: store i64 0, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @_Z3usePv(ptr noundef nonnull [[ARGS]])
-; CHECK-NEXT: call void @llvm.va_end(ptr nonnull [[ARGS]])
+; CHECK-NEXT: call void @llvm.va_end.p0(ptr nonnull [[ARGS]])
; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 24, ptr nonnull [[ARGS]])
; CHECK-NEXT: ret void
;
@@ -788,7 +788,7 @@ define linkonce_odr dso_local void @_Z5test2IeEvT_iz(x86_fp80 noundef %t, i32 no
; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080
; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP9]], i8 0, i64 24, i1 false)
-; CHECK-NEXT: call void @llvm.va_start(ptr nonnull [[ARGS]])
+; CHECK-NEXT: call void @llvm.va_start.p0(ptr nonnull [[ARGS]])
; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[ARGS]] to i64
; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[TMP10]], 16
; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
@@ -808,7 +808,7 @@ define linkonce_odr dso_local void @_Z5test2IeEvT_iz(x86_fp80 noundef %t, i32 no
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[TMP23]], ptr align 16 [[TMP24]], i64 [[TMP0]], i1 false)
; CHECK-NEXT: store i64 0, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @_Z3usePv(ptr noundef nonnull [[ARGS]])
-; CHECK-NEXT: call void @llvm.va_end(ptr nonnull [[ARGS]])
+; CHECK-NEXT: call void @llvm.va_end.p0(ptr nonnull [[ARGS]])
; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 24, ptr nonnull [[ARGS]])
; CHECK-NEXT: ret void
;
@@ -843,7 +843,7 @@ define linkonce_odr dso_local void @_Z5test2I6IntIntEvT_iz(i64 %t.coerce, i32 no
; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080
; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP9]], i8 0, i64 24, i1 false)
-; CHECK-NEXT: call void @llvm.va_start(ptr nonnull [[ARGS]])
+; CHECK-NEXT: call void @llvm.va_start.p0(ptr nonnull [[ARGS]])
; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[ARGS]] to i64
; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[TMP10]], 16
; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
@@ -863,7 +863,7 @@ define linkonce_odr dso_local void @_Z5test2I6IntIntEvT_iz(i64 %t.coerce, i32 no
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[TMP23]], ptr align 16 [[TMP24]], i64 [[TMP0]], i1 false)
; CHECK-NEXT: store i64 0, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @_Z3usePv(ptr noundef nonnull [[ARGS]])
-; CHECK-NEXT: call void @llvm.va_end(ptr nonnull [[ARGS]])
+; CHECK-NEXT: call void @llvm.va_end.p0(ptr nonnull [[ARGS]])
; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 24, ptr nonnull [[ARGS]])
; CHECK-NEXT: ret void
;
@@ -898,7 +898,7 @@ define linkonce_odr dso_local void @_Z5test2I10Int64Int64EvT_iz(i64 %t.coerce0,
; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080
; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP9]], i8 0, i64 24, i1 false)
-; CHECK-NEXT: call void @llvm.va_start(ptr nonnull [[ARGS]])
+; CHECK-NEXT: call void @llvm.va_start.p0(ptr nonnull [[ARGS]])
; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[ARGS]] to i64
; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[TMP10]], 16
; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
@@ -918,7 +918,7 @@ define linkonce_odr dso_local void @_Z5test2I10Int64Int64EvT_iz(i64 %t.coerce0,
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[TMP23]], ptr align 16 [[TMP24]], i64 [[TMP0]], i1 false)
; CHECK-NEXT: store i64 0, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @_Z3usePv(ptr noundef nonnull [[ARGS]])
-; CHECK-NEXT: call void @llvm.va_end(ptr nonnull [[ARGS]])
+; CHECK-NEXT: call void @llvm.va_end.p0(ptr nonnull [[ARGS]])
; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 24, ptr nonnull [[ARGS]])
; CHECK-NEXT: ret void
;
@@ -953,7 +953,7 @@ define linkonce_odr dso_local void @_Z5test2I12DoubleDoubleEvT_iz(double %t.coer
; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080
; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP9]], i8 0, i64 24, i1 false)
-; CHECK-NEXT: call void @llvm.va_start(ptr nonnull [[ARGS]])
+; CHECK-NEXT: call void @llvm.va_start.p0(ptr nonnull [[ARGS]])
; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[ARGS]] to i64
; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[TMP10]], 16
; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
@@ -973,7 +973,7 @@ define linkonce_odr dso_local void @_Z5test2I12DoubleDoubleEvT_iz(double %t.coer
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[TMP23]], ptr align 16 [[TMP24]], i64 [[TMP0]], i1 false)
; CHECK-NEXT: store i64 0, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @_Z3usePv(ptr noundef nonnull [[ARGS]])
-; CHECK-NEXT: call void @llvm.va_end(ptr nonnull [[ARGS]])
+; CHECK-NEXT: call void @llvm.va_end.p0(ptr nonnull [[ARGS]])
; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 24, ptr nonnull [[ARGS]])
; CHECK-NEXT: ret void
;
@@ -1008,7 +1008,7 @@ define linkonce_odr dso_local void @_Z5test2I7Double4EvT_iz(ptr noundef byval(%s
; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080
; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP9]], i8 0, i64 24, i1 false)
-; CHECK-NEXT: call void @llvm.va_start(ptr nonnull [[ARGS]])
+; CHECK-NEXT: call void @llvm.va_start.p0(ptr nonnull [[ARGS]])
; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[ARGS]] to i64
; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[TMP10]], 16
; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
@@ -1028,7 +1028,7 @@ define linkonce_odr dso_local void @_Z5test2I7Double4EvT_iz(ptr noundef byval(%s
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[TMP23]], ptr align 16 [[TMP24]], i64 [[TMP0]], i1 false)
; CHECK-NEXT: store i64 0, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @_Z3usePv(ptr noundef nonnull [[ARGS]])
-; CHECK-NEXT: call void @llvm.va_end(ptr nonnull [[ARGS]])
+; CHECK-NEXT: call void @llvm.va_end.p0(ptr nonnull [[ARGS]])
; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 24, ptr nonnull [[ARGS]])
; CHECK-NEXT: ret void
;
@@ -1063,7 +1063,7 @@ define linkonce_odr dso_local void @_Z5test2I11DoubleFloatEvT_iz(double %t.coerc
; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080
; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP9]], i8 0, i64 24, i1 false)
-; CHECK-NEXT: call void @llvm.va_start(ptr nonnull [[ARGS]])
+; CHECK-NEXT: call void @llvm.va_start.p0(ptr nonnull [[ARGS]])
; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[ARGS]] to i64
; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[TMP10]], 16
; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
@@ -1083,7 +1083,7 @@ define linkonce_odr dso_local void @_Z5test2I11DoubleFloatEvT_iz(double %t.coerc
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[TMP23]], ptr align 16 [[TMP24]], i64 [[TMP0]], i1 false)
; CHECK-NEXT: store i64 0, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @_Z3usePv(ptr noundef nonnull [[ARGS]])
-; CHECK-NEXT: call void @llvm.va_end(ptr nonnull [[ARGS]])
+; CHECK-NEXT: call void @llvm.va_end.p0(ptr nonnull [[ARGS]])
; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 24, ptr nonnull [[ARGS]])
; CHECK-NEXT: ret void
;
@@ -1118,7 +1118,7 @@ define linkonce_odr dso_local void @_Z5test2I11LongDouble2EvT_iz(ptr noundef byv
; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080
; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP9]], i8 0, i64 24, i1 false)
-; CHECK-NEXT: call void @llvm.va_start(ptr nonnull [[ARGS]])
+; CHECK-NEXT: call void @llvm.va_start.p0(ptr nonnull [[ARGS]])
; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[ARGS]] to i64
; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[TMP10]], 16
; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
@@ -1138,7 +1138,7 @@ define linkonce_odr dso_local void @_Z5test2I11LongDouble2EvT_iz(ptr noundef byv
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[TMP23]], ptr align 16 [[TMP24]], i64 [[TMP0]], i1 false)
; CHECK-NEXT: store i64 0, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @_Z3usePv(ptr noundef nonnull [[ARGS]])
-; CHECK-NEXT: call void @llvm.va_end(ptr nonnull [[ARGS]])
+; CHECK-NEXT: call void @llvm.va_end.p0(ptr nonnull [[ARGS]])
; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 24, ptr nonnull [[ARGS]])
; CHECK-NEXT: ret void
;
@@ -1173,7 +1173,7 @@ define linkonce_odr dso_local void @_Z5test2I11LongDouble4EvT_iz(ptr noundef byv
; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080
; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP9]], i8 0, i64 24, i1 false)
-; CHECK-NEXT: call void @llvm.va_start(ptr nonnull [[ARGS]])
+; CHECK-NEXT: call void @llvm.va_start.p0(ptr nonnull [[ARGS]])
; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[ARGS]] to i64
; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[TMP10]], 16
; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
@@ -1193,7 +1193,7 @@ define linkonce_odr dso_local void @_Z5test2I11LongDouble4EvT_iz(ptr noundef byv
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[TMP23]], ptr align 16 [[TMP24]], i64 [[TMP0]], i1 false)
; CHECK-NEXT: store i64 0, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @_Z3usePv(ptr noundef nonnull [[ARGS]])
-; CHECK-NEXT: call void @llvm.va_end(ptr nonnull [[ARGS]])
+; CHECK-NEXT: call void @llvm.va_end.p0(ptr nonnull [[ARGS]])
; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 24, ptr nonnull [[ARGS]])
; CHECK-NEXT: ret void
;
diff --git a/llvm/test/Instrumentation/MemorySanitizer/msan_debug_info.ll b/llvm/test/Instrumentation/MemorySanitizer/msan_debug_info.ll
index 21f3311a57ef..f07f3ad06e60 100644
--- a/llvm/test/Instrumentation/MemorySanitizer/msan_debug_info.ll
+++ b/llvm/test/Instrumentation/MemorySanitizer/msan_debug_info.ll
@@ -542,7 +542,7 @@ define void @VAStart(i32 %x, ...) sanitize_memory {
; CHECK-NEXT: [[TMP29:%.*]] = add i64 [[TMP27]], 17592186044416, !dbg [[DBG11]]
; CHECK-NEXT: [[TMP30:%.*]] = inttoptr i64 [[TMP29]] to ptr, !dbg [[DBG11]]
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP28]], i8 0, i64 24, i1 false), !dbg [[DBG11]]
-; CHECK-NEXT: call void @llvm.va_start(ptr [[VA]]), !dbg [[DBG11]]
+; CHECK-NEXT: call void @llvm.va_start.p0(ptr [[VA]]), !dbg [[DBG11]]
; CHECK-NEXT: [[TMP31:%.*]] = ptrtoint ptr [[VA]] to i64, !dbg [[DBG11]]
; CHECK-NEXT: [[TMP32:%.*]] = add i64 [[TMP31]], 16, !dbg [[DBG11]]
; CHECK-NEXT: [[TMP33:%.*]] = inttoptr i64 [[TMP32]] to ptr, !dbg [[DBG11]]
diff --git a/llvm/test/MC/AMDGPU/hsa-amdgpu-exprs.s b/llvm/test/MC/AMDGPU/hsa-amdgpu-exprs.s
new file mode 100644
index 000000000000..4623500987be
--- /dev/null
+++ b/llvm/test/MC/AMDGPU/hsa-amdgpu-exprs.s
@@ -0,0 +1,27 @@
+// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck --check-prefix=ASM %s
+// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx90a -filetype=obj < %s > %t
+// RUN: llvm-objdump -s -j .rodata %t | FileCheck --check-prefix=OBJDUMP %s
+
+// OBJDUMP: 0000 00000000 0f000000 00000000 00000000
+
+.text
+
+.p2align 8
+.type caller,@function
+caller:
+ s_endpgm
+
+.rodata
+
+.p2align 6
+.amdhsa_kernel caller
+ .amdhsa_next_free_vgpr 0
+ .amdhsa_next_free_sgpr 0
+ .amdhsa_accum_offset 4
+ .amdhsa_private_segment_fixed_size max(7, callee1.private_seg_size, callee2.private_seg_size)
+.end_amdhsa_kernel
+
+.set callee1.private_seg_size, 4
+.set callee2.private_seg_size, 15
+
+// ASM: .amdhsa_private_segment_fixed_size max(7, callee1.private_seg_size, callee2.private_seg_size)
diff --git a/llvm/test/MC/AMDGPU/hsa-sym-expr-failure.s b/llvm/test/MC/AMDGPU/hsa-sym-expr-failure.s
new file mode 100644
index 000000000000..fab3e893352b
--- /dev/null
+++ b/llvm/test/MC/AMDGPU/hsa-sym-expr-failure.s
@@ -0,0 +1,281 @@
+// RUN: not llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx90a %s 2>&1 | FileCheck --check-prefix=ASM %s
+
+// Some expression currently require (immediately) solvable expressions, i.e.,
+// they don't depend on yet-unknown symbolic values.
+
+.text
+// ASM: .text
+
+.amdhsa_code_object_version 4
+// ASM: .amdhsa_code_object_version 4
+
+.p2align 8
+.type user_sgpr_count,@function
+user_sgpr_count:
+ s_endpgm
+
+.p2align 6
+.amdhsa_kernel user_sgpr_count
+ .amdhsa_next_free_vgpr 0
+ .amdhsa_next_free_sgpr 0
+ .amdhsa_accum_offset 4
+ .amdhsa_user_sgpr_count defined_boolean
+.end_amdhsa_kernel
+
+// ASM: error: directive should have resolvable expression
+// ASM-NEXT: .amdhsa_user_sgpr_count
+
+.p2align 8
+.type user_sgpr_private_segment_buffer,@function
+user_sgpr_private_segment_buffer:
+ s_endpgm
+
+.amdhsa_kernel user_sgpr_private_segment_buffer
+ .amdhsa_next_free_vgpr 0
+ .amdhsa_next_free_sgpr 0
+ .amdhsa_accum_offset 4
+ .amdhsa_user_sgpr_private_segment_buffer defined_boolean
+.end_amdhsa_kernel
+
+// ASM: error: directive should have resolvable expression
+// ASM-NEXT: .amdhsa_user_sgpr_private_segment_buffer
+
+.p2align 8
+.type user_sgpr_kernarg_preload_length,@function
+user_sgpr_kernarg_preload_length:
+ s_endpgm
+
+.amdhsa_kernel user_sgpr_kernarg_preload_length
+ .amdhsa_next_free_vgpr 0
+ .amdhsa_next_free_sgpr 0
+ .amdhsa_accum_offset 4
+ .amdhsa_user_sgpr_kernarg_preload_length defined_boolean
+.end_amdhsa_kernel
+
+// ASM: error: directive should have resolvable expression
+// ASM-NEXT: .amdhsa_user_sgpr_kernarg_preload_length defined_boolean
+
+.p2align 8
+.type user_sgpr_kernarg_preload_offset,@function
+user_sgpr_kernarg_preload_offset:
+ s_endpgm
+
+.amdhsa_kernel user_sgpr_kernarg_preload_offset
+ .amdhsa_next_free_vgpr 0
+ .amdhsa_next_free_sgpr 0
+ .amdhsa_accum_offset 4
+ .amdhsa_user_sgpr_kernarg_preload_offset defined_boolean
+.end_amdhsa_kernel
+
+// ASM: error: directive should have resolvable expression
+// ASM-NEXT: .amdhsa_user_sgpr_kernarg_preload_offset defined_boolean
+
+.p2align 8
+.type user_sgpr_dispatch_ptr,@function
+user_sgpr_dispatch_ptr:
+ s_endpgm
+
+.p2align 6
+.amdhsa_kernel user_sgpr_dispatch_ptr
+ .amdhsa_next_free_vgpr 0
+ .amdhsa_next_free_sgpr 0
+ .amdhsa_accum_offset 4
+ .amdhsa_user_sgpr_dispatch_ptr defined_boolean
+.end_amdhsa_kernel
+
+// ASM: error: directive should have resolvable expression
+// ASM-NEXT: .amdhsa_user_sgpr_dispatch_ptr
+
+.p2align 8
+.type user_sgpr_queue_ptr,@function
+user_sgpr_queue_ptr:
+ s_endpgm
+
+.p2align 6
+.amdhsa_kernel user_sgpr_queue_ptr
+ .amdhsa_next_free_vgpr 0
+ .amdhsa_next_free_sgpr 0
+ .amdhsa_accum_offset 4
+ .amdhsa_user_sgpr_queue_ptr defined_boolean
+.end_amdhsa_kernel
+
+// ASM: error: directive should have resolvable expression
+// ASM-NEXT: .amdhsa_user_sgpr_queue_ptr
+
+.p2align 8
+.type user_sgpr_kernarg_segment_ptr,@function
+user_sgpr_kernarg_segment_ptr:
+ s_endpgm
+
+.p2align 6
+.amdhsa_kernel user_sgpr_kernarg_segment_ptr
+ .amdhsa_next_free_vgpr 0
+ .amdhsa_next_free_sgpr 0
+ .amdhsa_accum_offset 4
+ .amdhsa_user_sgpr_kernarg_segment_ptr defined_boolean
+.end_amdhsa_kernel
+
+// ASM: error: directive should have resolvable expression
+// ASM-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr
+
+.p2align 8
+.type user_sgpr_dispatch_id,@function
+user_sgpr_dispatch_id:
+ s_endpgm
+
+.p2align 6
+.amdhsa_kernel user_sgpr_dispatch_id
+ .amdhsa_next_free_vgpr 0
+ .amdhsa_next_free_sgpr 0
+ .amdhsa_accum_offset 4
+ .amdhsa_user_sgpr_dispatch_id defined_boolean
+.end_amdhsa_kernel
+
+// ASM: error: directive should have resolvable expression
+// ASM-NEXT: .amdhsa_user_sgpr_dispatch_id
+
+.p2align 8
+.type user_sgpr_flat_scratch_init,@function
+user_sgpr_flat_scratch_init:
+ s_endpgm
+
+.p2align 6
+.amdhsa_kernel user_sgpr_flat_scratch_init
+ .amdhsa_next_free_vgpr 0
+ .amdhsa_next_free_sgpr 0
+ .amdhsa_accum_offset 4
+ .amdhsa_user_sgpr_flat_scratch_init defined_boolean
+.end_amdhsa_kernel
+
+// ASM: error: directive should have resolvable expression
+// ASM-NEXT: .amdhsa_user_sgpr_flat_scratch_init
+
+.p2align 8
+.type user_sgpr_private_segment_size,@function
+user_sgpr_private_segment_size:
+ s_endpgm
+
+.p2align 6
+.amdhsa_kernel user_sgpr_private_segment_size
+ .amdhsa_next_free_vgpr 0
+ .amdhsa_next_free_sgpr 0
+ .amdhsa_accum_offset 4
+ .amdhsa_user_sgpr_private_segment_size defined_boolean
+.end_amdhsa_kernel
+
+// ASM: error: directive should have resolvable expression
+// ASM-NEXT: .amdhsa_user_sgpr_private_segment_size
+
+.p2align 8
+.type wavefront_size32,@function
+wavefront_size32:
+ s_endpgm
+
+.p2align 6
+.amdhsa_kernel wavefront_size32
+ .amdhsa_next_free_vgpr 0
+ .amdhsa_next_free_sgpr 0
+ .amdhsa_accum_offset 4
+ .amdhsa_wavefront_size32 defined_boolean
+.end_amdhsa_kernel
+
+// ASM: error: directive should have resolvable expression
+// ASM-NEXT: .amdhsa_wavefront_size32
+
+.p2align 8
+.type next_free_vgpr,@function
+next_free_vgpr:
+ s_endpgm
+
+.p2align 6
+.amdhsa_kernel next_free_vgpr
+ .amdhsa_next_free_vgpr defined_boolean
+ .amdhsa_next_free_sgpr 0
+ .amdhsa_accum_offset 4
+.end_amdhsa_kernel
+
+// ASM: error: directive should have resolvable expression
+// ASM-NEXT: .amdhsa_next_free_vgpr
+
+.p2align 8
+.type next_free_sgpr,@function
+next_free_sgpr:
+ s_endpgm
+
+.p2align 6
+.amdhsa_kernel next_free_sgpr
+ .amdhsa_next_free_vgpr 0
+ .amdhsa_next_free_sgpr defined_boolean
+ .amdhsa_accum_offset 4
+.end_amdhsa_kernel
+
+// ASM: error: directive should have resolvable expression
+// ASM-NEXT: .amdhsa_next_free_sgpr
+
+.p2align 8
+.type accum_offset,@function
+accum_offset:
+ s_endpgm
+
+.p2align 6
+.amdhsa_kernel accum_offset
+ .amdhsa_next_free_vgpr 0
+ .amdhsa_next_free_sgpr 0
+ .amdhsa_accum_offset defined_boolean
+.end_amdhsa_kernel
+
+// ASM: error: directive should have resolvable expression
+// ASM-NEXT: .amdhsa_accum_offset
+
+.p2align 8
+.type reserve_vcc,@function
+reserve_vcc:
+ s_endpgm
+
+.p2align 6
+.amdhsa_kernel reserve_vcc
+ .amdhsa_next_free_vgpr 0
+ .amdhsa_next_free_sgpr 0
+ .amdhsa_accum_offset 4
+ .amdhsa_reserve_vcc defined_boolean
+.end_amdhsa_kernel
+
+// ASM: error: directive should have resolvable expression
+// ASM-NEXT: .amdhsa_reserve_vcc
+
+.p2align 8
+.type reserve_flat_scratch,@function
+reserve_flat_scratch:
+ s_endpgm
+
+.p2align 6
+.amdhsa_kernel reserve_flat_scratch
+ .amdhsa_next_free_vgpr 0
+ .amdhsa_next_free_sgpr 0
+ .amdhsa_accum_offset 4
+ .amdhsa_reserve_flat_scratch defined_boolean
+.end_amdhsa_kernel
+
+// ASM: error: directive should have resolvable expression
+// ASM-NEXT: .amdhsa_reserve_flat_scratch
+
+.p2align 8
+.type shared_vgpr_count,@function
+shared_vgpr_count:
+ s_endpgm
+
+.p2align 6
+.amdhsa_kernel shared_vgpr_count
+ .amdhsa_next_free_vgpr 0
+ .amdhsa_next_free_sgpr 0
+ .amdhsa_accum_offset 4
+ .amdhsa_shared_vgpr_count defined_boolean
+.end_amdhsa_kernel
+
+// ASM: error: directive should have resolvable expression
+// ASM-NEXT: .amdhsa_shared_vgpr_count
+
+.set defined_boolean, 1
+
+// ASM: .set defined_boolean, 1
+// ASM-NEXT: .no_dead_strip defined_boolean
diff --git a/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx10.s b/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx10.s
new file mode 100644
index 000000000000..95af59c413ae
--- /dev/null
+++ b/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx10.s
@@ -0,0 +1,190 @@
+// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx1010 < %s | FileCheck --check-prefix=ASM %s
+// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx1010 -filetype=obj < %s > %t
+// RUN: llvm-objdump -s -j .rodata %t | FileCheck --check-prefix=OBJDUMP %s
+
+// When going from asm -> asm, the expressions should remain the same (i.e., symbolic).
+// When going from asm -> obj, the expressions should get resolved (through fixups),
+
+// OBJDUMP: Contents of section .rodata
+// expr_defined_later
+// OBJDUMP-NEXT: 0000 2b000000 2c000000 00000000 00000000
+// OBJDUMP-NEXT: 0010 00000000 00000000 00000000 00000000
+// OBJDUMP-NEXT: 0020 00000000 00000000 00000000 00000000
+// OBJDUMP-NEXT: 0030 00f0afe4 801f007f 000c0000 00000000
+// expr_defined
+// OBJDUMP-NEXT: 0040 2a000000 2b000000 00000000 00000000
+// OBJDUMP-NEXT: 0050 00000000 00000000 00000000 00000000
+// OBJDUMP-NEXT: 0060 00000000 00000000 00000000 00000000
+// OBJDUMP-NEXT: 0070 00f0afe4 801f007f 000c0000 00000000
+
+.text
+// ASM: .text
+
+.amdhsa_code_object_version 4
+// ASM: .amdhsa_code_object_version 4
+
+.p2align 8
+.type expr_defined_later,@function
+expr_defined_later:
+ s_endpgm
+
+.p2align 8
+.type expr_defined,@function
+expr_defined:
+ s_endpgm
+
+.rodata
+// ASM: .rodata
+
+.p2align 6
+.amdhsa_kernel expr_defined_later
+ .amdhsa_group_segment_fixed_size defined_value+2
+ .amdhsa_private_segment_fixed_size defined_value+3
+ .amdhsa_system_vgpr_workitem_id defined_2_bits
+ .amdhsa_float_round_mode_32 defined_2_bits
+ .amdhsa_float_round_mode_16_64 defined_2_bits
+ .amdhsa_float_denorm_mode_32 defined_2_bits
+ .amdhsa_float_denorm_mode_16_64 defined_2_bits
+ .amdhsa_system_sgpr_workgroup_id_x defined_boolean
+ .amdhsa_system_sgpr_workgroup_id_y defined_boolean
+ .amdhsa_system_sgpr_workgroup_id_z defined_boolean
+ .amdhsa_system_sgpr_workgroup_info defined_boolean
+ .amdhsa_fp16_overflow defined_boolean
+ .amdhsa_workgroup_processor_mode defined_boolean
+ .amdhsa_memory_ordered defined_boolean
+ .amdhsa_forward_progress defined_boolean
+ .amdhsa_exception_fp_ieee_invalid_op defined_boolean
+ .amdhsa_exception_fp_denorm_src defined_boolean
+ .amdhsa_exception_fp_ieee_div_zero defined_boolean
+ .amdhsa_exception_fp_ieee_overflow defined_boolean
+ .amdhsa_exception_fp_ieee_underflow defined_boolean
+ .amdhsa_exception_fp_ieee_inexact defined_boolean
+ .amdhsa_exception_int_div_zero defined_boolean
+ .amdhsa_uses_dynamic_stack defined_boolean
+ .amdhsa_next_free_vgpr 0
+ .amdhsa_next_free_sgpr 0
+.end_amdhsa_kernel
+
+.set defined_value, 41
+.set defined_2_bits, 3
+.set defined_boolean, 1
+
+.p2align 6
+.amdhsa_kernel expr_defined
+ .amdhsa_group_segment_fixed_size defined_value+1
+ .amdhsa_private_segment_fixed_size defined_value+2
+ .amdhsa_system_vgpr_workitem_id defined_2_bits
+ .amdhsa_float_round_mode_32 defined_2_bits
+ .amdhsa_float_round_mode_16_64 defined_2_bits
+ .amdhsa_float_denorm_mode_32 defined_2_bits
+ .amdhsa_float_denorm_mode_16_64 defined_2_bits
+ .amdhsa_system_sgpr_workgroup_id_x defined_boolean
+ .amdhsa_system_sgpr_workgroup_id_y defined_boolean
+ .amdhsa_system_sgpr_workgroup_id_z defined_boolean
+ .amdhsa_system_sgpr_workgroup_info defined_boolean
+ .amdhsa_fp16_overflow defined_boolean
+ .amdhsa_workgroup_processor_mode defined_boolean
+ .amdhsa_memory_ordered defined_boolean
+ .amdhsa_forward_progress defined_boolean
+ .amdhsa_exception_fp_ieee_invalid_op defined_boolean
+ .amdhsa_exception_fp_denorm_src defined_boolean
+ .amdhsa_exception_fp_ieee_div_zero defined_boolean
+ .amdhsa_exception_fp_ieee_overflow defined_boolean
+ .amdhsa_exception_fp_ieee_underflow defined_boolean
+ .amdhsa_exception_fp_ieee_inexact defined_boolean
+ .amdhsa_exception_int_div_zero defined_boolean
+ .amdhsa_uses_dynamic_stack defined_boolean
+ .amdhsa_next_free_vgpr 0
+ .amdhsa_next_free_sgpr 0
+.end_amdhsa_kernel
+
+// ASM: .amdhsa_kernel expr_defined_later
+// ASM-NEXT: .amdhsa_group_segment_fixed_size defined_value+2
+// ASM-NEXT: .amdhsa_private_segment_fixed_size defined_value+3
+// ASM-NEXT: .amdhsa_kernarg_size 0
+// ASM-NEXT: .amdhsa_user_sgpr_count (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&62)>>1
+// ASM-NEXT: .amdhsa_user_sgpr_private_segment_buffer (((((0&(~1024))|(1<<10))&(~2048))|(defined_boolean<<11))&1)>>0
+// ASM-NEXT: .amdhsa_user_sgpr_dispatch_ptr (((((0&(~1024))|(1<<10))&(~2048))|(defined_boolean<<11))&2)>>1
+// ASM-NEXT: .amdhsa_user_sgpr_queue_ptr (((((0&(~1024))|(1<<10))&(~2048))|(defined_boolean<<11))&4)>>2
+// ASM-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr (((((0&(~1024))|(1<<10))&(~2048))|(defined_boolean<<11))&8)>>3
+// ASM-NEXT: .amdhsa_user_sgpr_dispatch_id (((((0&(~1024))|(1<<10))&(~2048))|(defined_boolean<<11))&16)>>4
+// ASM-NEXT: .amdhsa_user_sgpr_flat_scratch_init (((((0&(~1024))|(1<<10))&(~2048))|(defined_boolean<<11))&32)>>5
+// ASM-NEXT: .amdhsa_user_sgpr_private_segment_size (((((0&(~1024))|(1<<10))&(~2048))|(defined_boolean<<11))&64)>>6
+// ASM-NEXT: .amdhsa_wavefront_size32 (((((0&(~1024))|(1<<10))&(~2048))|(defined_boolean<<11))&1024)>>10
+// ASM-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&1)>>0
+// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_x (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&128)>>7
+// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_y (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&256)>>8
+// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_z (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&512)>>9
+// ASM-NEXT: .amdhsa_system_sgpr_workgroup_info (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&1024)>>10
+// ASM-NEXT: .amdhsa_system_vgpr_workitem_id (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&6144)>>11
+// ASM-NEXT: .amdhsa_next_free_vgpr 0
+// ASM-NEXT: .amdhsa_next_free_sgpr 0
+// ASM-NEXT: .amdhsa_reserve_xnack_mask 1
+// ASM-NEXT: .amdhsa_float_round_mode_32 (((((((((((((((((((((((((((((((0&(~786432))|(3<<18))&(~2097152))|(1<<21))&(~8388608))|(1<<23))&(~536870912))|(1<<29))&(~1073741824))|(1<<30))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~63))|(0<<0))&(~960))|(0<<6))&12288)>>12
+// ASM-NEXT: .amdhsa_float_round_mode_16_64 (((((((((((((((((((((((((((((((0&(~786432))|(3<<18))&(~2097152))|(1<<21))&(~8388608))|(1<<23))&(~536870912))|(1<<29))&(~1073741824))|(1<<30))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~63))|(0<<0))&(~960))|(0<<6))&49152)>>14
+// ASM-NEXT: .amdhsa_float_denorm_mode_32 (((((((((((((((((((((((((((((((0&(~786432))|(3<<18))&(~2097152))|(1<<21))&(~8388608))|(1<<23))&(~536870912))|(1<<29))&(~1073741824))|(1<<30))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~63))|(0<<0))&(~960))|(0<<6))&196608)>>16
+// ASM-NEXT: .amdhsa_float_denorm_mode_16_64 (((((((((((((((((((((((((((((((0&(~786432))|(3<<18))&(~2097152))|(1<<21))&(~8388608))|(1<<23))&(~536870912))|(1<<29))&(~1073741824))|(1<<30))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~63))|(0<<0))&(~960))|(0<<6))&786432)>>18
+// ASM-NEXT: .amdhsa_dx10_clamp (((((((((((((((((((((((((((((((0&(~786432))|(3<<18))&(~2097152))|(1<<21))&(~8388608))|(1<<23))&(~536870912))|(1<<29))&(~1073741824))|(1<<30))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~63))|(0<<0))&(~960))|(0<<6))&2097152)>>21
+// ASM-NEXT: .amdhsa_ieee_mode (((((((((((((((((((((((((((((((0&(~786432))|(3<<18))&(~2097152))|(1<<21))&(~8388608))|(1<<23))&(~536870912))|(1<<29))&(~1073741824))|(1<<30))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~63))|(0<<0))&(~960))|(0<<6))&8388608)>>23
+// ASM-NEXT: .amdhsa_fp16_overflow (((((((((((((((((((((((((((((((0&(~786432))|(3<<18))&(~2097152))|(1<<21))&(~8388608))|(1<<23))&(~536870912))|(1<<29))&(~1073741824))|(1<<30))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~63))|(0<<0))&(~960))|(0<<6))&67108864)>>26
+// ASM-NEXT: .amdhsa_workgroup_processor_mode (((((((((((((((((((((((((((((((0&(~786432))|(3<<18))&(~2097152))|(1<<21))&(~8388608))|(1<<23))&(~536870912))|(1<<29))&(~1073741824))|(1<<30))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~63))|(0<<0))&(~960))|(0<<6))&536870912)>>29
+// ASM-NEXT: .amdhsa_memory_ordered (((((((((((((((((((((((((((((((0&(~786432))|(3<<18))&(~2097152))|(1<<21))&(~8388608))|(1<<23))&(~536870912))|(1<<29))&(~1073741824))|(1<<30))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~63))|(0<<0))&(~960))|(0<<6))&1073741824)>>30
+// ASM-NEXT: .amdhsa_forward_progress (((((((((((((((((((((((((((((((0&(~786432))|(3<<18))&(~2097152))|(1<<21))&(~8388608))|(1<<23))&(~536870912))|(1<<29))&(~1073741824))|(1<<30))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~63))|(0<<0))&(~960))|(0<<6))&2147483648)>>31
+// ASM-NEXT: .amdhsa_shared_vgpr_count 0
+// ASM-NEXT: .amdhsa_exception_fp_ieee_invalid_op (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&16777216)>>24
+// ASM-NEXT: .amdhsa_exception_fp_denorm_src (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&33554432)>>25
+// ASM-NEXT: .amdhsa_exception_fp_ieee_div_zero (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&67108864)>>26
+// ASM-NEXT: .amdhsa_exception_fp_ieee_overflow (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&134217728)>>27
+// ASM-NEXT: .amdhsa_exception_fp_ieee_underflow (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&268435456)>>28
+// ASM-NEXT: .amdhsa_exception_fp_ieee_inexact (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&536870912)>>29
+// ASM-NEXT: .amdhsa_exception_int_div_zero (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&1073741824)>>30
+// ASM-NEXT: .end_amdhsa_kernel
+
+// ASM: .set defined_value, 41
+// ASM-NEXT: .no_dead_strip defined_value
+// ASM-NEXT: .set defined_2_bits, 3
+// ASM-NEXT: .no_dead_strip defined_2_bits
+// ASM-NEXT: .set defined_boolean, 1
+// ASM-NEXT: .no_dead_strip defined_boolean
+
+// ASM: .amdhsa_kernel expr_defined
+// ASM-NEXT: .amdhsa_group_segment_fixed_size 42
+// ASM-NEXT: .amdhsa_private_segment_fixed_size 43
+// ASM-NEXT: .amdhsa_kernarg_size 0
+// ASM-NEXT: .amdhsa_user_sgpr_count 0
+// ASM-NEXT: .amdhsa_user_sgpr_private_segment_buffer 0
+// ASM-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0
+// ASM-NEXT: .amdhsa_user_sgpr_queue_ptr 0
+// ASM-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 0
+// ASM-NEXT: .amdhsa_user_sgpr_dispatch_id 0
+// ASM-NEXT: .amdhsa_user_sgpr_flat_scratch_init 0
+// ASM-NEXT: .amdhsa_user_sgpr_private_segment_size 0
+// ASM-NEXT: .amdhsa_wavefront_size32 1
+// ASM-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 0
+// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1
+// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_y 1
+// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_z 1
+// ASM-NEXT: .amdhsa_system_sgpr_workgroup_info 1
+// ASM-NEXT: .amdhsa_system_vgpr_workitem_id 3
+// ASM-NEXT: .amdhsa_next_free_vgpr 0
+// ASM-NEXT: .amdhsa_next_free_sgpr 0
+// ASM-NEXT: .amdhsa_reserve_xnack_mask 1
+// ASM-NEXT: .amdhsa_float_round_mode_32 3
+// ASM-NEXT: .amdhsa_float_round_mode_16_64 3
+// ASM-NEXT: .amdhsa_float_denorm_mode_32 3
+// ASM-NEXT: .amdhsa_float_denorm_mode_16_64 3
+// ASM-NEXT: .amdhsa_dx10_clamp 1
+// ASM-NEXT: .amdhsa_ieee_mode 1
+// ASM-NEXT: .amdhsa_fp16_overflow 1
+// ASM-NEXT: .amdhsa_workgroup_processor_mode 1
+// ASM-NEXT: .amdhsa_memory_ordered 1
+// ASM-NEXT: .amdhsa_forward_progress 1
+// ASM-NEXT: .amdhsa_shared_vgpr_count 0
+// ASM-NEXT: .amdhsa_exception_fp_ieee_invalid_op 1
+// ASM-NEXT: .amdhsa_exception_fp_denorm_src 1
+// ASM-NEXT: .amdhsa_exception_fp_ieee_div_zero 1
+// ASM-NEXT: .amdhsa_exception_fp_ieee_overflow 1
+// ASM-NEXT: .amdhsa_exception_fp_ieee_underflow 1
+// ASM-NEXT: .amdhsa_exception_fp_ieee_inexact 1
+// ASM-NEXT: .amdhsa_exception_int_div_zero 1
+// ASM-NEXT: .end_amdhsa_kernel
diff --git a/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx11.s b/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx11.s
new file mode 100644
index 000000000000..e1107fb69ba4
--- /dev/null
+++ b/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx11.s
@@ -0,0 +1,186 @@
+// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck --check-prefix=ASM %s
+// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx1100 -filetype=obj < %s > %t
+// RUN: llvm-objdump -s -j .rodata %t | FileCheck --check-prefix=OBJDUMP %s
+
+// When going from asm -> asm, the expressions should remain the same (i.e., symbolic).
+// When going from asm -> obj, the expressions should get resolved (through fixups),
+
+// OBJDUMP: Contents of section .rodata
+// expr_defined_later
+// OBJDUMP-NEXT: 0000 2b000000 2c000000 00000000 00000000
+// OBJDUMP-NEXT: 0010 00000000 00000000 00000000 00000000
+// OBJDUMP-NEXT: 0020 00000000 00000000 00000000 00000000
+// OBJDUMP-NEXT: 0030 00f0afe4 811f007f 000c0000 00000000
+// expr_defined
+// OBJDUMP-NEXT: 0040 2a000000 2b000000 00000000 00000000
+// OBJDUMP-NEXT: 0050 00000000 00000000 00000000 00000000
+// OBJDUMP-NEXT: 0060 00000000 00000000 00000000 00000000
+// OBJDUMP-NEXT: 0070 00f0afe4 811f007f 000c0000 00000000
+
+.text
+// ASM: .text
+
+.amdhsa_code_object_version 4
+// ASM: .amdhsa_code_object_version 4
+
+.p2align 8
+.type expr_defined_later,@function
+expr_defined_later:
+ s_endpgm
+
+.p2align 8
+.type expr_defined,@function
+expr_defined:
+ s_endpgm
+
+.rodata
+// ASM: .rodata
+
+.p2align 6
+.amdhsa_kernel expr_defined_later
+ .amdhsa_group_segment_fixed_size defined_value+2
+ .amdhsa_private_segment_fixed_size defined_value+3
+ .amdhsa_system_vgpr_workitem_id defined_2_bits
+ .amdhsa_float_round_mode_32 defined_2_bits
+ .amdhsa_float_round_mode_16_64 defined_2_bits
+ .amdhsa_float_denorm_mode_32 defined_2_bits
+ .amdhsa_float_denorm_mode_16_64 defined_2_bits
+ .amdhsa_system_sgpr_workgroup_id_x defined_boolean
+ .amdhsa_system_sgpr_workgroup_id_y defined_boolean
+ .amdhsa_system_sgpr_workgroup_id_z defined_boolean
+ .amdhsa_system_sgpr_workgroup_info defined_boolean
+ .amdhsa_fp16_overflow defined_boolean
+ .amdhsa_workgroup_processor_mode defined_boolean
+ .amdhsa_memory_ordered defined_boolean
+ .amdhsa_forward_progress defined_boolean
+ .amdhsa_exception_fp_ieee_invalid_op defined_boolean
+ .amdhsa_exception_fp_denorm_src defined_boolean
+ .amdhsa_exception_fp_ieee_div_zero defined_boolean
+ .amdhsa_exception_fp_ieee_overflow defined_boolean
+ .amdhsa_exception_fp_ieee_underflow defined_boolean
+ .amdhsa_exception_fp_ieee_inexact defined_boolean
+ .amdhsa_exception_int_div_zero defined_boolean
+ .amdhsa_enable_private_segment defined_boolean
+ .amdhsa_uses_dynamic_stack defined_boolean
+ .amdhsa_next_free_vgpr 0
+ .amdhsa_next_free_sgpr 0
+.end_amdhsa_kernel
+
+.set defined_value, 41
+.set defined_2_bits, 3
+.set defined_boolean, 1
+
+.p2align 6
+.amdhsa_kernel expr_defined
+ .amdhsa_group_segment_fixed_size defined_value+1
+ .amdhsa_private_segment_fixed_size defined_value+2
+ .amdhsa_system_vgpr_workitem_id defined_2_bits
+ .amdhsa_float_round_mode_32 defined_2_bits
+ .amdhsa_float_round_mode_16_64 defined_2_bits
+ .amdhsa_float_denorm_mode_32 defined_2_bits
+ .amdhsa_float_denorm_mode_16_64 defined_2_bits
+ .amdhsa_system_sgpr_workgroup_id_x defined_boolean
+ .amdhsa_system_sgpr_workgroup_id_y defined_boolean
+ .amdhsa_system_sgpr_workgroup_id_z defined_boolean
+ .amdhsa_system_sgpr_workgroup_info defined_boolean
+ .amdhsa_fp16_overflow defined_boolean
+ .amdhsa_workgroup_processor_mode defined_boolean
+ .amdhsa_memory_ordered defined_boolean
+ .amdhsa_forward_progress defined_boolean
+ .amdhsa_exception_fp_ieee_invalid_op defined_boolean
+ .amdhsa_exception_fp_denorm_src defined_boolean
+ .amdhsa_exception_fp_ieee_div_zero defined_boolean
+ .amdhsa_exception_fp_ieee_overflow defined_boolean
+ .amdhsa_exception_fp_ieee_underflow defined_boolean
+ .amdhsa_exception_fp_ieee_inexact defined_boolean
+ .amdhsa_exception_int_div_zero defined_boolean
+ .amdhsa_enable_private_segment defined_boolean
+ .amdhsa_uses_dynamic_stack defined_boolean
+ .amdhsa_next_free_vgpr 0
+ .amdhsa_next_free_sgpr 0
+.end_amdhsa_kernel
+
+// ASM: .amdhsa_kernel expr_defined_later
+// ASM-NEXT: .amdhsa_group_segment_fixed_size defined_value+2
+// ASM-NEXT: .amdhsa_private_segment_fixed_size defined_value+3
+// ASM-NEXT: .amdhsa_kernarg_size 0
+// ASM-NEXT: .amdhsa_user_sgpr_count (((((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&62)>>1
+// ASM-NEXT: .amdhsa_user_sgpr_dispatch_ptr (((((0&(~1024))|(1<<10))&(~2048))|(defined_boolean<<11))&2)>>1
+// ASM-NEXT: .amdhsa_user_sgpr_queue_ptr (((((0&(~1024))|(1<<10))&(~2048))|(defined_boolean<<11))&4)>>2
+// ASM-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr (((((0&(~1024))|(1<<10))&(~2048))|(defined_boolean<<11))&8)>>3
+// ASM-NEXT: .amdhsa_user_sgpr_dispatch_id (((((0&(~1024))|(1<<10))&(~2048))|(defined_boolean<<11))&16)>>4
+// ASM-NEXT: .amdhsa_user_sgpr_private_segment_size (((((0&(~1024))|(1<<10))&(~2048))|(defined_boolean<<11))&64)>>6
+// ASM-NEXT: .amdhsa_wavefront_size32 (((((0&(~1024))|(1<<10))&(~2048))|(defined_boolean<<11))&1024)>>10
+// ASM-NEXT: .amdhsa_enable_private_segment (((((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&1)>>0
+// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_x (((((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&128)>>7
+// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_y (((((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&256)>>8
+// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_z (((((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&512)>>9
+// ASM-NEXT: .amdhsa_system_sgpr_workgroup_info (((((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&1024)>>10
+// ASM-NEXT: .amdhsa_system_vgpr_workitem_id (((((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&6144)>>11
+// ASM-NEXT: .amdhsa_next_free_vgpr 0
+// ASM-NEXT: .amdhsa_next_free_sgpr 0
+// ASM-NEXT: .amdhsa_float_round_mode_32 (((((((((((((((((((((((((((((((0&(~786432))|(3<<18))&(~2097152))|(1<<21))&(~8388608))|(1<<23))&(~536870912))|(1<<29))&(~1073741824))|(1<<30))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~63))|(0<<0))&(~960))|(0<<6))&12288)>>12
+// ASM-NEXT: .amdhsa_float_round_mode_16_64 (((((((((((((((((((((((((((((((0&(~786432))|(3<<18))&(~2097152))|(1<<21))&(~8388608))|(1<<23))&(~536870912))|(1<<29))&(~1073741824))|(1<<30))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~63))|(0<<0))&(~960))|(0<<6))&49152)>>14
+// ASM-NEXT: .amdhsa_float_denorm_mode_32 (((((((((((((((((((((((((((((((0&(~786432))|(3<<18))&(~2097152))|(1<<21))&(~8388608))|(1<<23))&(~536870912))|(1<<29))&(~1073741824))|(1<<30))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~63))|(0<<0))&(~960))|(0<<6))&196608)>>16
+// ASM-NEXT: .amdhsa_float_denorm_mode_16_64 (((((((((((((((((((((((((((((((0&(~786432))|(3<<18))&(~2097152))|(1<<21))&(~8388608))|(1<<23))&(~536870912))|(1<<29))&(~1073741824))|(1<<30))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~63))|(0<<0))&(~960))|(0<<6))&786432)>>18
+// ASM-NEXT: .amdhsa_dx10_clamp (((((((((((((((((((((((((((((((0&(~786432))|(3<<18))&(~2097152))|(1<<21))&(~8388608))|(1<<23))&(~536870912))|(1<<29))&(~1073741824))|(1<<30))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~63))|(0<<0))&(~960))|(0<<6))&2097152)>>21
+// ASM-NEXT: .amdhsa_ieee_mode (((((((((((((((((((((((((((((((0&(~786432))|(3<<18))&(~2097152))|(1<<21))&(~8388608))|(1<<23))&(~536870912))|(1<<29))&(~1073741824))|(1<<30))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~63))|(0<<0))&(~960))|(0<<6))&8388608)>>23
+// ASM-NEXT: .amdhsa_fp16_overflow (((((((((((((((((((((((((((((((0&(~786432))|(3<<18))&(~2097152))|(1<<21))&(~8388608))|(1<<23))&(~536870912))|(1<<29))&(~1073741824))|(1<<30))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~63))|(0<<0))&(~960))|(0<<6))&67108864)>>26
+// ASM-NEXT: .amdhsa_workgroup_processor_mode (((((((((((((((((((((((((((((((0&(~786432))|(3<<18))&(~2097152))|(1<<21))&(~8388608))|(1<<23))&(~536870912))|(1<<29))&(~1073741824))|(1<<30))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~63))|(0<<0))&(~960))|(0<<6))&536870912)>>29
+// ASM-NEXT: .amdhsa_memory_ordered (((((((((((((((((((((((((((((((0&(~786432))|(3<<18))&(~2097152))|(1<<21))&(~8388608))|(1<<23))&(~536870912))|(1<<29))&(~1073741824))|(1<<30))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~63))|(0<<0))&(~960))|(0<<6))&1073741824)>>30
+// ASM-NEXT: .amdhsa_forward_progress (((((((((((((((((((((((((((((((0&(~786432))|(3<<18))&(~2097152))|(1<<21))&(~8388608))|(1<<23))&(~536870912))|(1<<29))&(~1073741824))|(1<<30))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~63))|(0<<0))&(~960))|(0<<6))&2147483648)>>31
+// ASM-NEXT: .amdhsa_shared_vgpr_count 0
+// ASM-NEXT: .amdhsa_exception_fp_ieee_invalid_op (((((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&16777216)>>24
+// ASM-NEXT: .amdhsa_exception_fp_denorm_src (((((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&33554432)>>25
+// ASM-NEXT: .amdhsa_exception_fp_ieee_div_zero (((((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&67108864)>>26
+// ASM-NEXT: .amdhsa_exception_fp_ieee_overflow (((((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&134217728)>>27
+// ASM-NEXT: .amdhsa_exception_fp_ieee_underflow (((((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&268435456)>>28
+// ASM-NEXT: .amdhsa_exception_fp_ieee_inexact (((((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&536870912)>>29
+// ASM-NEXT: .amdhsa_exception_int_div_zero (((((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&1073741824)>>30
+// ASM-NEXT: .end_amdhsa_kernel
+
+// ASM: .set defined_value, 41
+// ASM-NEXT: .no_dead_strip defined_value
+// ASM-NEXT: .set defined_2_bits, 3
+// ASM-NEXT: .no_dead_strip defined_2_bits
+// ASM-NEXT: .set defined_boolean, 1
+// ASM-NEXT: .no_dead_strip defined_boolean
+
+// ASM: .amdhsa_kernel expr_defined
+// ASM-NEXT: .amdhsa_group_segment_fixed_size 42
+// ASM-NEXT: .amdhsa_private_segment_fixed_size 43
+// ASM-NEXT: .amdhsa_kernarg_size 0
+// ASM-NEXT: .amdhsa_user_sgpr_count 0
+// ASM-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0
+// ASM-NEXT: .amdhsa_user_sgpr_queue_ptr 0
+// ASM-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 0
+// ASM-NEXT: .amdhsa_user_sgpr_dispatch_id 0
+// ASM-NEXT: .amdhsa_user_sgpr_private_segment_size 0
+// ASM-NEXT: .amdhsa_wavefront_size32 1
+// ASM-NEXT: .amdhsa_enable_private_segment 1
+// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1
+// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_y 1
+// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_z 1
+// ASM-NEXT: .amdhsa_system_sgpr_workgroup_info 1
+// ASM-NEXT: .amdhsa_system_vgpr_workitem_id 3
+// ASM-NEXT: .amdhsa_next_free_vgpr 0
+// ASM-NEXT: .amdhsa_next_free_sgpr 0
+// ASM-NEXT: .amdhsa_float_round_mode_32 3
+// ASM-NEXT: .amdhsa_float_round_mode_16_64 3
+// ASM-NEXT: .amdhsa_float_denorm_mode_32 3
+// ASM-NEXT: .amdhsa_float_denorm_mode_16_64 3
+// ASM-NEXT: .amdhsa_dx10_clamp 1
+// ASM-NEXT: .amdhsa_ieee_mode 1
+// ASM-NEXT: .amdhsa_fp16_overflow 1
+// ASM-NEXT: .amdhsa_workgroup_processor_mode 1
+// ASM-NEXT: .amdhsa_memory_ordered 1
+// ASM-NEXT: .amdhsa_forward_progress 1
+// ASM-NEXT: .amdhsa_shared_vgpr_count 0
+// ASM-NEXT: .amdhsa_exception_fp_ieee_invalid_op 1
+// ASM-NEXT: .amdhsa_exception_fp_denorm_src 1
+// ASM-NEXT: .amdhsa_exception_fp_ieee_div_zero 1
+// ASM-NEXT: .amdhsa_exception_fp_ieee_overflow 1
+// ASM-NEXT: .amdhsa_exception_fp_ieee_underflow 1
+// ASM-NEXT: .amdhsa_exception_fp_ieee_inexact 1
+// ASM-NEXT: .amdhsa_exception_int_div_zero 1
+// ASM-NEXT: .end_amdhsa_kernel
diff --git a/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx12.s b/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx12.s
new file mode 100644
index 000000000000..449616d35186
--- /dev/null
+++ b/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx12.s
@@ -0,0 +1,184 @@
+// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx1200 < %s | FileCheck --check-prefix=ASM %s
+// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx1200 -filetype=obj < %s > %t
+// RUN: llvm-objdump -s -j .rodata %t | FileCheck --check-prefix=OBJDUMP %s
+
+// When going from asm -> asm, the expressions should remain the same (i.e., symbolic).
+// When going from asm -> obj, the expressions should get resolved (through fixups),
+
+// OBJDUMP: Contents of section .rodata
+// expr_defined_later
+// OBJDUMP-NEXT: 0000 2b000000 2c000000 00000000 00000000
+// OBJDUMP-NEXT: 0010 00000000 00000000 00000000 00000000
+// OBJDUMP-NEXT: 0020 00000000 00000000 00000000 00000000
+// OBJDUMP-NEXT: 0030 00f02fe4 811f007f 000c0000 00000000
+// expr_defined
+// OBJDUMP-NEXT: 0040 2a000000 2b000000 00000000 00000000
+// OBJDUMP-NEXT: 0050 00000000 00000000 00000000 00000000
+// OBJDUMP-NEXT: 0060 00000000 00000000 00000000 00000000
+// OBJDUMP-NEXT: 0070 00f02fe4 811f007f 000c0000 00000000
+
+.text
+// ASM: .text
+
+.amdhsa_code_object_version 4
+// ASM: .amdhsa_code_object_version 4
+
+.p2align 8
+.type expr_defined_later,@function
+expr_defined_later:
+ s_endpgm
+
+.p2align 8
+.type expr_defined,@function
+expr_defined:
+ s_endpgm
+
+.rodata
+// ASM: .rodata
+
+.p2align 6
+.amdhsa_kernel expr_defined_later
+ .amdhsa_group_segment_fixed_size defined_value+2
+ .amdhsa_private_segment_fixed_size defined_value+3
+ .amdhsa_system_vgpr_workitem_id defined_2_bits
+ .amdhsa_float_round_mode_32 defined_2_bits
+ .amdhsa_float_round_mode_16_64 defined_2_bits
+ .amdhsa_float_denorm_mode_32 defined_2_bits
+ .amdhsa_float_denorm_mode_16_64 defined_2_bits
+ .amdhsa_system_sgpr_workgroup_id_x defined_boolean
+ .amdhsa_system_sgpr_workgroup_id_y defined_boolean
+ .amdhsa_system_sgpr_workgroup_id_z defined_boolean
+ .amdhsa_system_sgpr_workgroup_info defined_boolean
+ .amdhsa_fp16_overflow defined_boolean
+ .amdhsa_workgroup_processor_mode defined_boolean
+ .amdhsa_memory_ordered defined_boolean
+ .amdhsa_forward_progress defined_boolean
+ .amdhsa_exception_fp_ieee_invalid_op defined_boolean
+ .amdhsa_exception_fp_denorm_src defined_boolean
+ .amdhsa_exception_fp_ieee_div_zero defined_boolean
+ .amdhsa_exception_fp_ieee_overflow defined_boolean
+ .amdhsa_exception_fp_ieee_underflow defined_boolean
+ .amdhsa_exception_fp_ieee_inexact defined_boolean
+ .amdhsa_exception_int_div_zero defined_boolean
+ .amdhsa_round_robin_scheduling defined_boolean
+ .amdhsa_enable_private_segment defined_boolean
+ .amdhsa_uses_dynamic_stack defined_boolean
+ .amdhsa_next_free_vgpr 0
+ .amdhsa_next_free_sgpr 0
+.end_amdhsa_kernel
+
+.set defined_value, 41
+.set defined_2_bits, 3
+.set defined_boolean, 1
+
+.p2align 6
+.amdhsa_kernel expr_defined
+ .amdhsa_group_segment_fixed_size defined_value+1
+ .amdhsa_private_segment_fixed_size defined_value+2
+ .amdhsa_system_vgpr_workitem_id defined_2_bits
+ .amdhsa_float_round_mode_32 defined_2_bits
+ .amdhsa_float_round_mode_16_64 defined_2_bits
+ .amdhsa_float_denorm_mode_32 defined_2_bits
+ .amdhsa_float_denorm_mode_16_64 defined_2_bits
+ .amdhsa_system_sgpr_workgroup_id_x defined_boolean
+ .amdhsa_system_sgpr_workgroup_id_y defined_boolean
+ .amdhsa_system_sgpr_workgroup_id_z defined_boolean
+ .amdhsa_system_sgpr_workgroup_info defined_boolean
+ .amdhsa_fp16_overflow defined_boolean
+ .amdhsa_workgroup_processor_mode defined_boolean
+ .amdhsa_memory_ordered defined_boolean
+ .amdhsa_forward_progress defined_boolean
+ .amdhsa_exception_fp_ieee_invalid_op defined_boolean
+ .amdhsa_exception_fp_denorm_src defined_boolean
+ .amdhsa_exception_fp_ieee_div_zero defined_boolean
+ .amdhsa_exception_fp_ieee_overflow defined_boolean
+ .amdhsa_exception_fp_ieee_underflow defined_boolean
+ .amdhsa_exception_fp_ieee_inexact defined_boolean
+ .amdhsa_exception_int_div_zero defined_boolean
+ .amdhsa_round_robin_scheduling defined_boolean
+ .amdhsa_enable_private_segment defined_boolean
+ .amdhsa_uses_dynamic_stack defined_boolean
+ .amdhsa_next_free_vgpr 0
+ .amdhsa_next_free_sgpr 0
+.end_amdhsa_kernel
+
+// ASM: .amdhsa_kernel expr_defined_later
+// ASM-NEXT: .amdhsa_group_segment_fixed_size defined_value+2
+// ASM-NEXT: .amdhsa_private_segment_fixed_size defined_value+3
+// ASM-NEXT: .amdhsa_kernarg_size 0
+// ASM-NEXT: .amdhsa_user_sgpr_count (((((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&62)>>1
+// ASM-NEXT: .amdhsa_user_sgpr_dispatch_ptr (((((0&(~1024))|(1<<10))&(~2048))|(defined_boolean<<11))&2)>>1
+// ASM-NEXT: .amdhsa_user_sgpr_queue_ptr (((((0&(~1024))|(1<<10))&(~2048))|(defined_boolean<<11))&4)>>2
+// ASM-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr (((((0&(~1024))|(1<<10))&(~2048))|(defined_boolean<<11))&8)>>3
+// ASM-NEXT: .amdhsa_user_sgpr_dispatch_id (((((0&(~1024))|(1<<10))&(~2048))|(defined_boolean<<11))&16)>>4
+// ASM-NEXT: .amdhsa_user_sgpr_private_segment_size (((((0&(~1024))|(1<<10))&(~2048))|(defined_boolean<<11))&64)>>6
+// ASM-NEXT: .amdhsa_wavefront_size32 (((((0&(~1024))|(1<<10))&(~2048))|(defined_boolean<<11))&1024)>>10
+// ASM-NEXT: .amdhsa_enable_private_segment (((((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&1)>>0
+// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_x (((((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&128)>>7
+// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_y (((((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&256)>>8
+// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_z (((((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&512)>>9
+// ASM-NEXT: .amdhsa_system_sgpr_workgroup_info (((((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&1024)>>10
+// ASM-NEXT: .amdhsa_system_vgpr_workitem_id (((((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&6144)>>11
+// ASM-NEXT: .amdhsa_next_free_vgpr 0
+// ASM-NEXT: .amdhsa_next_free_sgpr 0
+// ASM-NEXT: .amdhsa_float_round_mode_32 (((((((((((((((((((((((((((((0&(~786432))|(3<<18))&(~536870912))|(1<<29))&(~1073741824))|(1<<30))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~2097152))|(defined_boolean<<21))&(~63))|(0<<0))&(~960))|(0<<6))&12288)>>12
+// ASM-NEXT: .amdhsa_float_round_mode_16_64 (((((((((((((((((((((((((((((0&(~786432))|(3<<18))&(~536870912))|(1<<29))&(~1073741824))|(1<<30))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~2097152))|(defined_boolean<<21))&(~63))|(0<<0))&(~960))|(0<<6))&49152)>>14
+// ASM-NEXT: .amdhsa_float_denorm_mode_32 (((((((((((((((((((((((((((((0&(~786432))|(3<<18))&(~536870912))|(1<<29))&(~1073741824))|(1<<30))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~2097152))|(defined_boolean<<21))&(~63))|(0<<0))&(~960))|(0<<6))&196608)>>16
+// ASM-NEXT: .amdhsa_float_denorm_mode_16_64 (((((((((((((((((((((((((((((0&(~786432))|(3<<18))&(~536870912))|(1<<29))&(~1073741824))|(1<<30))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~2097152))|(defined_boolean<<21))&(~63))|(0<<0))&(~960))|(0<<6))&786432)>>18
+// ASM-NEXT: .amdhsa_fp16_overflow (((((((((((((((((((((((((((((0&(~786432))|(3<<18))&(~536870912))|(1<<29))&(~1073741824))|(1<<30))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~2097152))|(defined_boolean<<21))&(~63))|(0<<0))&(~960))|(0<<6))&67108864)>>26
+// ASM-NEXT: .amdhsa_workgroup_processor_mode (((((((((((((((((((((((((((((0&(~786432))|(3<<18))&(~536870912))|(1<<29))&(~1073741824))|(1<<30))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~2097152))|(defined_boolean<<21))&(~63))|(0<<0))&(~960))|(0<<6))&536870912)>>29
+// ASM-NEXT: .amdhsa_memory_ordered (((((((((((((((((((((((((((((0&(~786432))|(3<<18))&(~536870912))|(1<<29))&(~1073741824))|(1<<30))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~2097152))|(defined_boolean<<21))&(~63))|(0<<0))&(~960))|(0<<6))&1073741824)>>30
+// ASM-NEXT: .amdhsa_forward_progress (((((((((((((((((((((((((((((0&(~786432))|(3<<18))&(~536870912))|(1<<29))&(~1073741824))|(1<<30))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~2097152))|(defined_boolean<<21))&(~63))|(0<<0))&(~960))|(0<<6))&2147483648)>>31
+// ASM-NEXT: .amdhsa_round_robin_scheduling (((((((((((((((((((((((((((((0&(~786432))|(3<<18))&(~536870912))|(1<<29))&(~1073741824))|(1<<30))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~2097152))|(defined_boolean<<21))&(~63))|(0<<0))&(~960))|(0<<6))&2097152)>>21
+// ASM-NEXT: .amdhsa_exception_fp_ieee_invalid_op (((((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&16777216)>>24
+// ASM-NEXT: .amdhsa_exception_fp_denorm_src (((((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&33554432)>>25
+// ASM-NEXT: .amdhsa_exception_fp_ieee_div_zero (((((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&67108864)>>26
+// ASM-NEXT: .amdhsa_exception_fp_ieee_overflow (((((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&134217728)>>27
+// ASM-NEXT: .amdhsa_exception_fp_ieee_underflow (((((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&268435456)>>28
+// ASM-NEXT: .amdhsa_exception_fp_ieee_inexact (((((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&536870912)>>29
+// ASM-NEXT: .amdhsa_exception_int_div_zero (((((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&1073741824)>>30
+// ASM-NEXT: .end_amdhsa_kernel
+
+// ASM: .set defined_value, 41
+// ASM-NEXT: .no_dead_strip defined_value
+// ASM-NEXT: .set defined_2_bits, 3
+// ASM-NEXT: .no_dead_strip defined_2_bits
+// ASM-NEXT: .set defined_boolean, 1
+// ASM-NEXT: .no_dead_strip defined_boolean
+
+// ASM: .amdhsa_kernel expr_defined
+// ASM-NEXT: .amdhsa_group_segment_fixed_size 42
+// ASM-NEXT: .amdhsa_private_segment_fixed_size 43
+// ASM-NEXT: .amdhsa_kernarg_size 0
+// ASM-NEXT: .amdhsa_user_sgpr_count 0
+// ASM-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0
+// ASM-NEXT: .amdhsa_user_sgpr_queue_ptr 0
+// ASM-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 0
+// ASM-NEXT: .amdhsa_user_sgpr_dispatch_id 0
+// ASM-NEXT: .amdhsa_user_sgpr_private_segment_size 0
+// ASM-NEXT: .amdhsa_wavefront_size32 1
+// ASM-NEXT: .amdhsa_enable_private_segment 1
+// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1
+// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_y 1
+// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_z 1
+// ASM-NEXT: .amdhsa_system_sgpr_workgroup_info 1
+// ASM-NEXT: .amdhsa_system_vgpr_workitem_id 3
+// ASM-NEXT: .amdhsa_next_free_vgpr 0
+// ASM-NEXT: .amdhsa_next_free_sgpr 0
+// ASM-NEXT: .amdhsa_float_round_mode_32 3
+// ASM-NEXT: .amdhsa_float_round_mode_16_64 3
+// ASM-NEXT: .amdhsa_float_denorm_mode_32 3
+// ASM-NEXT: .amdhsa_float_denorm_mode_16_64 3
+// ASM-NEXT: .amdhsa_fp16_overflow 1
+// ASM-NEXT: .amdhsa_workgroup_processor_mode 1
+// ASM-NEXT: .amdhsa_memory_ordered 1
+// ASM-NEXT: .amdhsa_forward_progress 1
+// ASM-NEXT: .amdhsa_round_robin_scheduling 1
+// ASM-NEXT: .amdhsa_exception_fp_ieee_invalid_op 1
+// ASM-NEXT: .amdhsa_exception_fp_denorm_src 1
+// ASM-NEXT: .amdhsa_exception_fp_ieee_div_zero 1
+// ASM-NEXT: .amdhsa_exception_fp_ieee_overflow 1
+// ASM-NEXT: .amdhsa_exception_fp_ieee_underflow 1
+// ASM-NEXT: .amdhsa_exception_fp_ieee_inexact 1
+// ASM-NEXT: .amdhsa_exception_int_div_zero 1
+// ASM-NEXT: .end_amdhsa_kernel
diff --git a/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx7.s b/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx7.s
new file mode 100644
index 000000000000..c7e05441b45f
--- /dev/null
+++ b/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx7.s
@@ -0,0 +1,168 @@
+// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx700 < %s | FileCheck --check-prefix=ASM %s
+// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx700 -filetype=obj < %s > %t
+// RUN: llvm-objdump -s -j .rodata %t | FileCheck --check-prefix=OBJDUMP %s
+
+// When going from asm -> asm, the expressions should remain the same (i.e., symbolic).
+// When going from asm -> obj, the expressions should get resolved (through fixups),
+
+// OBJDUMP: Contents of section .rodata
+// expr_defined_later
+// OBJDUMP-NEXT: 0000 2b000000 2c000000 00000000 00000000
+// OBJDUMP-NEXT: 0010 00000000 00000000 00000000 00000000
+// OBJDUMP-NEXT: 0020 00000000 00000000 00000000 00000000
+// OBJDUMP-NEXT: 0030 00f0af00 801f007f 00080000 00000000
+// expr_defined
+// OBJDUMP-NEXT: 0040 2a000000 2b000000 00000000 00000000
+// OBJDUMP-NEXT: 0050 00000000 00000000 00000000 00000000
+// OBJDUMP-NEXT: 0060 00000000 00000000 00000000 00000000
+// OBJDUMP-NEXT: 0070 00f0af00 801f007f 00080000 00000000
+
+.text
+// ASM: .text
+
+.amdhsa_code_object_version 4
+// ASM: .amdhsa_code_object_version 4
+
+.p2align 8
+.type expr_defined_later,@function
+expr_defined_later:
+ s_endpgm
+
+.p2align 8
+.type expr_defined,@function
+expr_defined:
+ s_endpgm
+
+.rodata
+// ASM: .rodata
+
+.p2align 6
+.amdhsa_kernel expr_defined_later
+ .amdhsa_group_segment_fixed_size defined_value+2
+ .amdhsa_private_segment_fixed_size defined_value+3
+ .amdhsa_system_vgpr_workitem_id defined_2_bits
+ .amdhsa_float_round_mode_32 defined_2_bits
+ .amdhsa_float_round_mode_16_64 defined_2_bits
+ .amdhsa_float_denorm_mode_32 defined_2_bits
+ .amdhsa_float_denorm_mode_16_64 defined_2_bits
+ .amdhsa_system_sgpr_workgroup_id_x defined_boolean
+ .amdhsa_system_sgpr_workgroup_id_y defined_boolean
+ .amdhsa_system_sgpr_workgroup_id_z defined_boolean
+ .amdhsa_system_sgpr_workgroup_info defined_boolean
+ .amdhsa_exception_fp_ieee_invalid_op defined_boolean
+ .amdhsa_exception_fp_denorm_src defined_boolean
+ .amdhsa_exception_fp_ieee_div_zero defined_boolean
+ .amdhsa_exception_fp_ieee_overflow defined_boolean
+ .amdhsa_exception_fp_ieee_underflow defined_boolean
+ .amdhsa_exception_fp_ieee_inexact defined_boolean
+ .amdhsa_exception_int_div_zero defined_boolean
+ .amdhsa_uses_dynamic_stack defined_boolean
+ .amdhsa_next_free_vgpr 0
+ .amdhsa_next_free_sgpr 0
+.end_amdhsa_kernel
+
+.set defined_value, 41
+.set defined_2_bits, 3
+.set defined_boolean, 1
+
+.p2align 6
+.amdhsa_kernel expr_defined
+ .amdhsa_group_segment_fixed_size defined_value+1
+ .amdhsa_private_segment_fixed_size defined_value+2
+ .amdhsa_system_vgpr_workitem_id defined_2_bits
+ .amdhsa_float_round_mode_32 defined_2_bits
+ .amdhsa_float_round_mode_16_64 defined_2_bits
+ .amdhsa_float_denorm_mode_32 defined_2_bits
+ .amdhsa_float_denorm_mode_16_64 defined_2_bits
+ .amdhsa_system_sgpr_workgroup_id_x defined_boolean
+ .amdhsa_system_sgpr_workgroup_id_y defined_boolean
+ .amdhsa_system_sgpr_workgroup_id_z defined_boolean
+ .amdhsa_system_sgpr_workgroup_info defined_boolean
+ .amdhsa_exception_fp_ieee_invalid_op defined_boolean
+ .amdhsa_exception_fp_denorm_src defined_boolean
+ .amdhsa_exception_fp_ieee_div_zero defined_boolean
+ .amdhsa_exception_fp_ieee_overflow defined_boolean
+ .amdhsa_exception_fp_ieee_underflow defined_boolean
+ .amdhsa_exception_fp_ieee_inexact defined_boolean
+ .amdhsa_exception_int_div_zero defined_boolean
+ .amdhsa_uses_dynamic_stack defined_boolean
+ .amdhsa_next_free_vgpr 0
+ .amdhsa_next_free_sgpr 0
+.end_amdhsa_kernel
+
+// ASM: .amdhsa_kernel expr_defined_later
+// ASM-NEXT: .amdhsa_group_segment_fixed_size defined_value+2
+// ASM-NEXT: .amdhsa_private_segment_fixed_size defined_value+3
+// ASM-NEXT: .amdhsa_kernarg_size 0
+// ASM-NEXT: .amdhsa_user_sgpr_count (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&62)>>1
+// ASM-NEXT: .amdhsa_user_sgpr_private_segment_buffer (((0&(~2048))|(defined_boolean<<11))&1)>>0
+// ASM-NEXT: .amdhsa_user_sgpr_dispatch_ptr (((0&(~2048))|(defined_boolean<<11))&2)>>1
+// ASM-NEXT: .amdhsa_user_sgpr_queue_ptr (((0&(~2048))|(defined_boolean<<11))&4)>>2
+// ASM-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr (((0&(~2048))|(defined_boolean<<11))&8)>>3
+// ASM-NEXT: .amdhsa_user_sgpr_dispatch_id (((0&(~2048))|(defined_boolean<<11))&16)>>4
+// ASM-NEXT: .amdhsa_user_sgpr_flat_scratch_init (((0&(~2048))|(defined_boolean<<11))&32)>>5
+// ASM-NEXT: .amdhsa_user_sgpr_private_segment_size (((0&(~2048))|(defined_boolean<<11))&64)>>6
+// ASM-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&1)>>0
+// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_x (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&128)>>7
+// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_y (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&256)>>8
+// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_z (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&512)>>9
+// ASM-NEXT: .amdhsa_system_sgpr_workgroup_info (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&1024)>>10
+// ASM-NEXT: .amdhsa_system_vgpr_workitem_id (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&6144)>>11
+// ASM-NEXT: .amdhsa_next_free_vgpr 0
+// ASM-NEXT: .amdhsa_next_free_sgpr 0
+// ASM-NEXT: .amdhsa_float_round_mode_32 (((((((((((((((((((0&(~786432))|(3<<18))&(~2097152))|(1<<21))&(~8388608))|(1<<23))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~63))|(0<<0))&(~960))|(0<<6))&12288)>>12
+// ASM-NEXT: .amdhsa_float_round_mode_16_64 (((((((((((((((((((0&(~786432))|(3<<18))&(~2097152))|(1<<21))&(~8388608))|(1<<23))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~63))|(0<<0))&(~960))|(0<<6))&49152)>>14
+// ASM-NEXT: .amdhsa_float_denorm_mode_32 (((((((((((((((((((0&(~786432))|(3<<18))&(~2097152))|(1<<21))&(~8388608))|(1<<23))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~63))|(0<<0))&(~960))|(0<<6))&196608)>>16
+// ASM-NEXT: .amdhsa_float_denorm_mode_16_64 (((((((((((((((((((0&(~786432))|(3<<18))&(~2097152))|(1<<21))&(~8388608))|(1<<23))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~63))|(0<<0))&(~960))|(0<<6))&786432)>>18
+// ASM-NEXT: .amdhsa_dx10_clamp (((((((((((((((((((0&(~786432))|(3<<18))&(~2097152))|(1<<21))&(~8388608))|(1<<23))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~63))|(0<<0))&(~960))|(0<<6))&2097152)>>21
+// ASM-NEXT: .amdhsa_ieee_mode (((((((((((((((((((0&(~786432))|(3<<18))&(~2097152))|(1<<21))&(~8388608))|(1<<23))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~63))|(0<<0))&(~960))|(0<<6))&8388608)>>23
+// ASM-NEXT: .amdhsa_exception_fp_ieee_invalid_op (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&16777216)>>24
+// ASM-NEXT: .amdhsa_exception_fp_denorm_src (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&33554432)>>25
+// ASM-NEXT: .amdhsa_exception_fp_ieee_div_zero (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&67108864)>>26
+// ASM-NEXT: .amdhsa_exception_fp_ieee_overflow (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&134217728)>>27
+// ASM-NEXT: .amdhsa_exception_fp_ieee_underflow (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&268435456)>>28
+// ASM-NEXT: .amdhsa_exception_fp_ieee_inexact (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&536870912)>>29
+// ASM-NEXT: .amdhsa_exception_int_div_zero (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&1073741824)>>30
+// ASM-NEXT: .end_amdhsa_kernel
+
+// ASM: .set defined_value, 41
+// ASM-NEXT: .no_dead_strip defined_value
+// ASM-NEXT: .set defined_2_bits, 3
+// ASM-NEXT: .no_dead_strip defined_2_bits
+// ASM-NEXT: .set defined_boolean, 1
+// ASM-NEXT: .no_dead_strip defined_boolean
+
+// ASM: .amdhsa_kernel expr_defined
+// ASM-NEXT: .amdhsa_group_segment_fixed_size 42
+// ASM-NEXT: .amdhsa_private_segment_fixed_size 43
+// ASM-NEXT: .amdhsa_kernarg_size 0
+// ASM-NEXT: .amdhsa_user_sgpr_count 0
+// ASM-NEXT: .amdhsa_user_sgpr_private_segment_buffer 0
+// ASM-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0
+// ASM-NEXT: .amdhsa_user_sgpr_queue_ptr 0
+// ASM-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 0
+// ASM-NEXT: .amdhsa_user_sgpr_dispatch_id 0
+// ASM-NEXT: .amdhsa_user_sgpr_flat_scratch_init 0
+// ASM-NEXT: .amdhsa_user_sgpr_private_segment_size 0
+// ASM-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 0
+// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1
+// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_y 1
+// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_z 1
+// ASM-NEXT: .amdhsa_system_sgpr_workgroup_info 1
+// ASM-NEXT: .amdhsa_system_vgpr_workitem_id 3
+// ASM-NEXT: .amdhsa_next_free_vgpr 0
+// ASM-NEXT: .amdhsa_next_free_sgpr 0
+// ASM-NEXT: .amdhsa_float_round_mode_32 3
+// ASM-NEXT: .amdhsa_float_round_mode_16_64 3
+// ASM-NEXT: .amdhsa_float_denorm_mode_32 3
+// ASM-NEXT: .amdhsa_float_denorm_mode_16_64 3
+// ASM-NEXT: .amdhsa_dx10_clamp 1
+// ASM-NEXT: .amdhsa_ieee_mode 1
+// ASM-NEXT: .amdhsa_exception_fp_ieee_invalid_op 1
+// ASM-NEXT: .amdhsa_exception_fp_denorm_src 1
+// ASM-NEXT: .amdhsa_exception_fp_ieee_div_zero 1
+// ASM-NEXT: .amdhsa_exception_fp_ieee_overflow 1
+// ASM-NEXT: .amdhsa_exception_fp_ieee_underflow 1
+// ASM-NEXT: .amdhsa_exception_fp_ieee_inexact 1
+// ASM-NEXT: .amdhsa_exception_int_div_zero 1
+// ASM-NEXT: .end_amdhsa_kernel
diff --git a/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx8.s b/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx8.s
new file mode 100644
index 000000000000..49a5015987a6
--- /dev/null
+++ b/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx8.s
@@ -0,0 +1,171 @@
+// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx801 < %s | FileCheck --check-prefix=ASM %s
+
+// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx801 -filetype=obj < %s > %t
+// RUN: llvm-objdump -s -j .rodata %t | FileCheck --check-prefix=OBJDUMP %s
+
+// When going from asm -> asm, the expressions should remain the same (i.e., symbolic).
+// When going from asm -> obj, the expressions should get resolved (through fixups),
+
+// OBJDUMP: Contents of section .rodata
+// expr_defined_later
+// OBJDUMP-NEXT: 0000 2b000000 2c000000 00000000 00000000
+// OBJDUMP-NEXT: 0010 00000000 00000000 00000000 00000000
+// OBJDUMP-NEXT: 0020 00000000 00000000 00000000 00000000
+// OBJDUMP-NEXT: 0030 00f0af00 801f007f 00080000 00000000
+// expr_defined
+// OBJDUMP-NEXT: 0040 2a000000 2b000000 00000000 00000000
+// OBJDUMP-NEXT: 0050 00000000 00000000 00000000 00000000
+// OBJDUMP-NEXT: 0060 00000000 00000000 00000000 00000000
+// OBJDUMP-NEXT: 0070 00f0af00 801f007f 00080000 00000000
+
+.text
+// ASM: .text
+
+.amdhsa_code_object_version 4
+// ASM: .amdhsa_code_object_version 4
+
+.p2align 8
+.type expr_defined_later,@function
+expr_defined_later:
+ s_endpgm
+
+.p2align 8
+.type expr_defined,@function
+expr_defined:
+ s_endpgm
+
+.rodata
+// ASM: .rodata
+
+.p2align 6
+.amdhsa_kernel expr_defined_later
+ .amdhsa_group_segment_fixed_size defined_value+2
+ .amdhsa_private_segment_fixed_size defined_value+3
+ .amdhsa_system_vgpr_workitem_id defined_2_bits
+ .amdhsa_float_round_mode_32 defined_2_bits
+ .amdhsa_float_round_mode_16_64 defined_2_bits
+ .amdhsa_float_denorm_mode_32 defined_2_bits
+ .amdhsa_float_denorm_mode_16_64 defined_2_bits
+ .amdhsa_system_sgpr_workgroup_id_x defined_boolean
+ .amdhsa_system_sgpr_workgroup_id_y defined_boolean
+ .amdhsa_system_sgpr_workgroup_id_z defined_boolean
+ .amdhsa_system_sgpr_workgroup_info defined_boolean
+ .amdhsa_exception_fp_ieee_invalid_op defined_boolean
+ .amdhsa_exception_fp_denorm_src defined_boolean
+ .amdhsa_exception_fp_ieee_div_zero defined_boolean
+ .amdhsa_exception_fp_ieee_overflow defined_boolean
+ .amdhsa_exception_fp_ieee_underflow defined_boolean
+ .amdhsa_exception_fp_ieee_inexact defined_boolean
+ .amdhsa_exception_int_div_zero defined_boolean
+ .amdhsa_uses_dynamic_stack defined_boolean
+ .amdhsa_next_free_vgpr 0
+ .amdhsa_next_free_sgpr 0
+.end_amdhsa_kernel
+
+.set defined_value, 41
+.set defined_2_bits, 3
+.set defined_boolean, 1
+
+.p2align 6
+.amdhsa_kernel expr_defined
+ .amdhsa_group_segment_fixed_size defined_value+1
+ .amdhsa_private_segment_fixed_size defined_value+2
+ .amdhsa_system_vgpr_workitem_id defined_2_bits
+ .amdhsa_float_round_mode_32 defined_2_bits
+ .amdhsa_float_round_mode_16_64 defined_2_bits
+ .amdhsa_float_denorm_mode_32 defined_2_bits
+ .amdhsa_float_denorm_mode_16_64 defined_2_bits
+ .amdhsa_system_sgpr_workgroup_id_x defined_boolean
+ .amdhsa_system_sgpr_workgroup_id_y defined_boolean
+ .amdhsa_system_sgpr_workgroup_id_z defined_boolean
+ .amdhsa_system_sgpr_workgroup_info defined_boolean
+ .amdhsa_exception_fp_ieee_invalid_op defined_boolean
+ .amdhsa_exception_fp_denorm_src defined_boolean
+ .amdhsa_exception_fp_ieee_div_zero defined_boolean
+ .amdhsa_exception_fp_ieee_overflow defined_boolean
+ .amdhsa_exception_fp_ieee_underflow defined_boolean
+ .amdhsa_exception_fp_ieee_inexact defined_boolean
+ .amdhsa_exception_int_div_zero defined_boolean
+ .amdhsa_uses_dynamic_stack defined_boolean
+ .amdhsa_next_free_vgpr 0
+ .amdhsa_next_free_sgpr 0
+.end_amdhsa_kernel
+
+// ASM: .amdhsa_kernel expr_defined_later
+// ASM-NEXT: .amdhsa_group_segment_fixed_size defined_value+2
+// ASM-NEXT: .amdhsa_private_segment_fixed_size defined_value+3
+// ASM-NEXT: .amdhsa_kernarg_size 0
+// ASM-NEXT: .amdhsa_user_sgpr_count (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&62)>>1
+// ASM-NEXT: .amdhsa_user_sgpr_private_segment_buffer (((0&(~2048))|(defined_boolean<<11))&1)>>0
+// ASM-NEXT: .amdhsa_user_sgpr_dispatch_ptr (((0&(~2048))|(defined_boolean<<11))&2)>>1
+// ASM-NEXT: .amdhsa_user_sgpr_queue_ptr (((0&(~2048))|(defined_boolean<<11))&4)>>2
+// ASM-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr (((0&(~2048))|(defined_boolean<<11))&8)>>3
+// ASM-NEXT: .amdhsa_user_sgpr_dispatch_id (((0&(~2048))|(defined_boolean<<11))&16)>>4
+// ASM-NEXT: .amdhsa_user_sgpr_flat_scratch_init (((0&(~2048))|(defined_boolean<<11))&32)>>5
+// ASM-NEXT: .amdhsa_user_sgpr_private_segment_size (((0&(~2048))|(defined_boolean<<11))&64)>>6
+// ASM-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&1)>>0
+// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_x (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&128)>>7
+// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_y (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&256)>>8
+// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_z (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&512)>>9
+// ASM-NEXT: .amdhsa_system_sgpr_workgroup_info (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&1024)>>10
+// ASM-NEXT: .amdhsa_system_vgpr_workitem_id (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&6144)>>11
+// ASM-NEXT: .amdhsa_next_free_vgpr 0
+// ASM-NEXT: .amdhsa_next_free_sgpr 0
+// ASM-NEXT: .amdhsa_reserve_xnack_mask 1
+// ASM-NEXT: .amdhsa_float_round_mode_32 (((((((((((((((((((0&(~786432))|(3<<18))&(~2097152))|(1<<21))&(~8388608))|(1<<23))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~63))|(0<<0))&(~960))|(0<<6))&12288)>>12
+// ASM-NEXT: .amdhsa_float_round_mode_16_64 (((((((((((((((((((0&(~786432))|(3<<18))&(~2097152))|(1<<21))&(~8388608))|(1<<23))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~63))|(0<<0))&(~960))|(0<<6))&49152)>>14
+// ASM-NEXT: .amdhsa_float_denorm_mode_32 (((((((((((((((((((0&(~786432))|(3<<18))&(~2097152))|(1<<21))&(~8388608))|(1<<23))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~63))|(0<<0))&(~960))|(0<<6))&196608)>>16
+// ASM-NEXT: .amdhsa_float_denorm_mode_16_64 (((((((((((((((((((0&(~786432))|(3<<18))&(~2097152))|(1<<21))&(~8388608))|(1<<23))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~63))|(0<<0))&(~960))|(0<<6))&786432)>>18
+// ASM-NEXT: .amdhsa_dx10_clamp (((((((((((((((((((0&(~786432))|(3<<18))&(~2097152))|(1<<21))&(~8388608))|(1<<23))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~63))|(0<<0))&(~960))|(0<<6))&2097152)>>21
+// ASM-NEXT: .amdhsa_ieee_mode (((((((((((((((((((0&(~786432))|(3<<18))&(~2097152))|(1<<21))&(~8388608))|(1<<23))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~63))|(0<<0))&(~960))|(0<<6))&8388608)>>23
+// ASM-NEXT: .amdhsa_exception_fp_ieee_invalid_op (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&16777216)>>24
+// ASM-NEXT: .amdhsa_exception_fp_denorm_src (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&33554432)>>25
+// ASM-NEXT: .amdhsa_exception_fp_ieee_div_zero (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&67108864)>>26
+// ASM-NEXT: .amdhsa_exception_fp_ieee_overflow (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&134217728)>>27
+// ASM-NEXT: .amdhsa_exception_fp_ieee_underflow (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&268435456)>>28
+// ASM-NEXT: .amdhsa_exception_fp_ieee_inexact (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&536870912)>>29
+// ASM-NEXT: .amdhsa_exception_int_div_zero (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&1073741824)>>30
+// ASM-NEXT: .end_amdhsa_kernel
+
+// ASM: .set defined_value, 41
+// ASM-NEXT: .no_dead_strip defined_value
+// ASM-NEXT: .set defined_2_bits, 3
+// ASM-NEXT: .no_dead_strip defined_2_bits
+// ASM-NEXT: .set defined_boolean, 1
+// ASM-NEXT: .no_dead_strip defined_boolean
+
+// ASM: .amdhsa_kernel expr_defined
+// ASM-NEXT: .amdhsa_group_segment_fixed_size 42
+// ASM-NEXT: .amdhsa_private_segment_fixed_size 43
+// ASM-NEXT: .amdhsa_kernarg_size 0
+// ASM-NEXT: .amdhsa_user_sgpr_count 0
+// ASM-NEXT: .amdhsa_user_sgpr_private_segment_buffer 0
+// ASM-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0
+// ASM-NEXT: .amdhsa_user_sgpr_queue_ptr 0
+// ASM-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 0
+// ASM-NEXT: .amdhsa_user_sgpr_dispatch_id 0
+// ASM-NEXT: .amdhsa_user_sgpr_flat_scratch_init 0
+// ASM-NEXT: .amdhsa_user_sgpr_private_segment_size 0
+// ASM-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 0
+// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1
+// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_y 1
+// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_z 1
+// ASM-NEXT: .amdhsa_system_sgpr_workgroup_info 1
+// ASM-NEXT: .amdhsa_system_vgpr_workitem_id 3
+// ASM-NEXT: .amdhsa_next_free_vgpr 0
+// ASM-NEXT: .amdhsa_next_free_sgpr 0
+// ASM-NEXT: .amdhsa_reserve_xnack_mask 1
+// ASM-NEXT: .amdhsa_float_round_mode_32 3
+// ASM-NEXT: .amdhsa_float_round_mode_16_64 3
+// ASM-NEXT: .amdhsa_float_denorm_mode_32 3
+// ASM-NEXT: .amdhsa_float_denorm_mode_16_64 3
+// ASM-NEXT: .amdhsa_dx10_clamp 1
+// ASM-NEXT: .amdhsa_ieee_mode 1
+// ASM-NEXT: .amdhsa_exception_fp_ieee_invalid_op 1
+// ASM-NEXT: .amdhsa_exception_fp_denorm_src 1
+// ASM-NEXT: .amdhsa_exception_fp_ieee_div_zero 1
+// ASM-NEXT: .amdhsa_exception_fp_ieee_overflow 1
+// ASM-NEXT: .amdhsa_exception_fp_ieee_underflow 1
+// ASM-NEXT: .amdhsa_exception_fp_ieee_inexact 1
+// ASM-NEXT: .amdhsa_exception_int_div_zero 1
+// ASM-NEXT: .end_amdhsa_kernel
diff --git a/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx90a.s b/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx90a.s
new file mode 100644
index 000000000000..b7f89239160f
--- /dev/null
+++ b/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx90a.s
@@ -0,0 +1,148 @@
+// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck --check-prefix=ASM %s
+// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx90a -filetype=obj < %s > %t
+// RUN: llvm-objdump -s -j .rodata %t | FileCheck --check-prefix=OBJDUMP %s
+
+// When going from asm -> asm, the expressions should remain the same (i.e., symbolic).
+// When going from asm -> obj, the expressions should get resolved (through fixups),
+
+// OBJDUMP: Contents of section .rodata
+// expr_defined_later
+// OBJDUMP-NEXT: 0000 00000000 00000000 00000000 00000000
+// OBJDUMP-NEXT: 0010 00000000 00000000 00000000 00000000
+// OBJDUMP-NEXT: 0020 00000000 00000000 00000000 00000100
+// OBJDUMP-NEXT: 0030 0000ac04 81000000 00000000 00000000
+// expr_defined
+// OBJDUMP-NEXT: 0040 00000000 00000000 00000000 00000000
+// OBJDUMP-NEXT: 0050 00000000 00000000 00000000 00000000
+// OBJDUMP-NEXT: 0060 00000000 00000000 00000000 00000100
+// OBJDUMP-NEXT: 0070 0000ac04 81000000 00000000 00000000
+
+.text
+// ASM: .text
+
+.amdhsa_code_object_version 4
+// ASM: .amdhsa_code_object_version 4
+
+.p2align 8
+.type expr_defined_later,@function
+expr_defined_later:
+ s_endpgm
+
+.p2align 8
+.type expr_defined,@function
+expr_defined:
+ s_endpgm
+
+.rodata
+// ASM: .rodata
+
+.p2align 6
+.amdhsa_kernel expr_defined_later
+ .amdhsa_system_sgpr_private_segment_wavefront_offset defined_boolean
+ .amdhsa_dx10_clamp defined_boolean
+ .amdhsa_ieee_mode defined_boolean
+ .amdhsa_fp16_overflow defined_boolean
+ .amdhsa_tg_split defined_boolean
+ .amdhsa_next_free_vgpr 0
+ .amdhsa_next_free_sgpr 0
+ .amdhsa_accum_offset 4
+.end_amdhsa_kernel
+
+.set defined_boolean, 1
+
+.p2align 6
+.amdhsa_kernel expr_defined
+ .amdhsa_system_sgpr_private_segment_wavefront_offset defined_boolean
+ .amdhsa_dx10_clamp defined_boolean
+ .amdhsa_ieee_mode defined_boolean
+ .amdhsa_fp16_overflow defined_boolean
+ .amdhsa_tg_split defined_boolean
+ .amdhsa_next_free_vgpr 0
+ .amdhsa_next_free_sgpr 0
+ .amdhsa_accum_offset 4
+.end_amdhsa_kernel
+
+// ASM: .amdhsa_kernel expr_defined_later
+// ASM-NEXT: .amdhsa_group_segment_fixed_size 0
+// ASM-NEXT: .amdhsa_private_segment_fixed_size 0
+// ASM-NEXT: .amdhsa_kernarg_size 0
+// ASM-NEXT: .amdhsa_user_sgpr_count (((((((0&(~128))|(1<<7))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&62)>>1
+// ASM-NEXT: .amdhsa_user_sgpr_private_segment_buffer 0
+// ASM-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0
+// ASM-NEXT: .amdhsa_user_sgpr_queue_ptr 0
+// ASM-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 0
+// ASM-NEXT: .amdhsa_user_sgpr_dispatch_id 0
+// ASM-NEXT: .amdhsa_user_sgpr_flat_scratch_init 0
+// ASM-NEXT: .amdhsa_user_sgpr_kernarg_preload_length 0
+// ASM-NEXT: .amdhsa_user_sgpr_kernarg_preload_offset 0
+// ASM-NEXT: .amdhsa_user_sgpr_private_segment_size 0
+// ASM-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset (((((((0&(~128))|(1<<7))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&1)>>0
+// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_x (((((((0&(~128))|(1<<7))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&128)>>7
+// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_y (((((((0&(~128))|(1<<7))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&256)>>8
+// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_z (((((((0&(~128))|(1<<7))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&512)>>9
+// ASM-NEXT: .amdhsa_system_sgpr_workgroup_info (((((((0&(~128))|(1<<7))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&1024)>>10
+// ASM-NEXT: .amdhsa_system_vgpr_workitem_id (((((((0&(~128))|(1<<7))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&6144)>>11
+// ASM-NEXT: .amdhsa_next_free_vgpr 0
+// ASM-NEXT: .amdhsa_next_free_sgpr 0
+// ASM-NEXT: .amdhsa_accum_offset (((((((0&(~65536))|(defined_boolean<<16))&(~63))|(0<<0))&63)>>0)+1)*4
+// ASM-NEXT: .amdhsa_reserve_xnack_mask 1
+// ASM-NEXT: .amdhsa_float_round_mode_32 (((((((((((((((((0&(~786432))|(3<<18))&(~2097152))|(1<<21))&(~8388608))|(1<<23))&(~2097152))|(defined_boolean<<21))&(~8388608))|(defined_boolean<<23))&(~67108864))|(defined_boolean<<26))&(~63))|(0<<0))&(~960))|(0<<6))&12288)>>12
+// ASM-NEXT: .amdhsa_float_round_mode_16_64 (((((((((((((((((0&(~786432))|(3<<18))&(~2097152))|(1<<21))&(~8388608))|(1<<23))&(~2097152))|(defined_boolean<<21))&(~8388608))|(defined_boolean<<23))&(~67108864))|(defined_boolean<<26))&(~63))|(0<<0))&(~960))|(0<<6))&49152)>>14
+// ASM-NEXT: .amdhsa_float_denorm_mode_32 (((((((((((((((((0&(~786432))|(3<<18))&(~2097152))|(1<<21))&(~8388608))|(1<<23))&(~2097152))|(defined_boolean<<21))&(~8388608))|(defined_boolean<<23))&(~67108864))|(defined_boolean<<26))&(~63))|(0<<0))&(~960))|(0<<6))&196608)>>16
+// ASM-NEXT: .amdhsa_float_denorm_mode_16_64 (((((((((((((((((0&(~786432))|(3<<18))&(~2097152))|(1<<21))&(~8388608))|(1<<23))&(~2097152))|(defined_boolean<<21))&(~8388608))|(defined_boolean<<23))&(~67108864))|(defined_boolean<<26))&(~63))|(0<<0))&(~960))|(0<<6))&786432)>>18
+// ASM-NEXT: .amdhsa_dx10_clamp (((((((((((((((((0&(~786432))|(3<<18))&(~2097152))|(1<<21))&(~8388608))|(1<<23))&(~2097152))|(defined_boolean<<21))&(~8388608))|(defined_boolean<<23))&(~67108864))|(defined_boolean<<26))&(~63))|(0<<0))&(~960))|(0<<6))&2097152)>>21
+// ASM-NEXT: .amdhsa_ieee_mode (((((((((((((((((0&(~786432))|(3<<18))&(~2097152))|(1<<21))&(~8388608))|(1<<23))&(~2097152))|(defined_boolean<<21))&(~8388608))|(defined_boolean<<23))&(~67108864))|(defined_boolean<<26))&(~63))|(0<<0))&(~960))|(0<<6))&8388608)>>23
+// ASM-NEXT: .amdhsa_fp16_overflow (((((((((((((((((0&(~786432))|(3<<18))&(~2097152))|(1<<21))&(~8388608))|(1<<23))&(~2097152))|(defined_boolean<<21))&(~8388608))|(defined_boolean<<23))&(~67108864))|(defined_boolean<<26))&(~63))|(0<<0))&(~960))|(0<<6))&67108864)>>26
+// ASM-NEXT: .amdhsa_tg_split (((((0&(~65536))|(defined_boolean<<16))&(~63))|(0<<0))&65536)>>16
+// ASM-NEXT: .amdhsa_exception_fp_ieee_invalid_op (((((((0&(~128))|(1<<7))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&16777216)>>24
+// ASM-NEXT: .amdhsa_exception_fp_denorm_src (((((((0&(~128))|(1<<7))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&33554432)>>25
+// ASM-NEXT: .amdhsa_exception_fp_ieee_div_zero (((((((0&(~128))|(1<<7))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&67108864)>>26
+// ASM-NEXT: .amdhsa_exception_fp_ieee_overflow (((((((0&(~128))|(1<<7))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&134217728)>>27
+// ASM-NEXT: .amdhsa_exception_fp_ieee_underflow (((((((0&(~128))|(1<<7))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&268435456)>>28
+// ASM-NEXT: .amdhsa_exception_fp_ieee_inexact (((((((0&(~128))|(1<<7))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&536870912)>>29
+// ASM-NEXT: .amdhsa_exception_int_div_zero (((((((0&(~128))|(1<<7))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&1073741824)>>30
+// ASM-NEXT: .end_amdhsa_kernel
+
+// ASM: .set defined_boolean, 1
+// ASM-NEXT: .no_dead_strip defined_boolean
+
+// ASM: .amdhsa_kernel expr_defined
+// ASM-NEXT: .amdhsa_group_segment_fixed_size 0
+// ASM-NEXT: .amdhsa_private_segment_fixed_size 0
+// ASM-NEXT: .amdhsa_kernarg_size 0
+// ASM-NEXT: .amdhsa_user_sgpr_count 0
+// ASM-NEXT: .amdhsa_user_sgpr_private_segment_buffer 0
+// ASM-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0
+// ASM-NEXT: .amdhsa_user_sgpr_queue_ptr 0
+// ASM-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 0
+// ASM-NEXT: .amdhsa_user_sgpr_dispatch_id 0
+// ASM-NEXT: .amdhsa_user_sgpr_flat_scratch_init 0
+// ASM-NEXT: .amdhsa_user_sgpr_kernarg_preload_length 0
+// ASM-NEXT: .amdhsa_user_sgpr_kernarg_preload_offset 0
+// ASM-NEXT: .amdhsa_user_sgpr_private_segment_size 0
+// ASM-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 1
+// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1
+// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_y 0
+// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_z 0
+// ASM-NEXT: .amdhsa_system_sgpr_workgroup_info 0
+// ASM-NEXT: .amdhsa_system_vgpr_workitem_id 0
+// ASM-NEXT: .amdhsa_next_free_vgpr 0
+// ASM-NEXT: .amdhsa_next_free_sgpr 0
+// ASM-NEXT: .amdhsa_accum_offset 4
+// ASM-NEXT: .amdhsa_reserve_xnack_mask 1
+// ASM-NEXT: .amdhsa_float_round_mode_32 0
+// ASM-NEXT: .amdhsa_float_round_mode_16_64 0
+// ASM-NEXT: .amdhsa_float_denorm_mode_32 0
+// ASM-NEXT: .amdhsa_float_denorm_mode_16_64 3
+// ASM-NEXT: .amdhsa_dx10_clamp 1
+// ASM-NEXT: .amdhsa_ieee_mode 1
+// ASM-NEXT: .amdhsa_fp16_overflow 1
+// ASM-NEXT: .amdhsa_tg_split 1
+// ASM-NEXT: .amdhsa_exception_fp_ieee_invalid_op 0
+// ASM-NEXT: .amdhsa_exception_fp_denorm_src 0
+// ASM-NEXT: .amdhsa_exception_fp_ieee_div_zero 0
+// ASM-NEXT: .amdhsa_exception_fp_ieee_overflow 0
+// ASM-NEXT: .amdhsa_exception_fp_ieee_underflow 0
+// ASM-NEXT: .amdhsa_exception_fp_ieee_inexact 0
+// ASM-NEXT: .amdhsa_exception_int_div_zero 0
+// ASM-NEXT: .end_amdhsa_kernel
diff --git a/llvm/test/MC/AMDGPU/hsa-tg-split.s b/llvm/test/MC/AMDGPU/hsa-tg-split.s
new file mode 100644
index 000000000000..5a4d3e2c279c
--- /dev/null
+++ b/llvm/test/MC/AMDGPU/hsa-tg-split.s
@@ -0,0 +1,74 @@
+// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx90a -mattr=+xnack,+tgsplit < %s | FileCheck --check-prefix=ASM %s
+// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx90a -mattr=+xnack,+tgsplit -filetype=obj < %s > %t
+// RUN: llvm-objdump -s -j .rodata %t | FileCheck --check-prefix=OBJDUMP %s
+
+// OBJDUMP: Contents of section .rodata
+// OBJDUMP-NEXT: 0000 00000000 00000000 00000000 00000000
+// OBJDUMP-NEXT: 0010 00000000 00000000 00000000 00000000
+// OBJDUMP-NEXT: 0020 00000000 00000000 00000000 00000100
+// OBJDUMP-NEXT: 0030 0000ac00 80000000 00000000 00000000
+
+.text
+// ASM: .text
+
+.amdgcn_target "amdgcn-amd-amdhsa--gfx90a:xnack+"
+// ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx90a:xnack+"
+
+.amdhsa_code_object_version 4
+// ASM: .amdhsa_code_object_version 4
+
+.p2align 8
+.type minimal,@function
+minimal:
+ s_endpgm
+
+.rodata
+// ASM: .rodata
+
+.p2align 6
+.amdhsa_kernel minimal
+ .amdhsa_next_free_vgpr 0
+ .amdhsa_next_free_sgpr 0
+ .amdhsa_accum_offset 4
+.end_amdhsa_kernel
+
+// ASM: .amdhsa_kernel minimal
+// ASM-NEXT: .amdhsa_group_segment_fixed_size 0
+// ASM-NEXT: .amdhsa_private_segment_fixed_size 0
+// ASM-NEXT: .amdhsa_kernarg_size 0
+// ASM-NEXT: .amdhsa_user_sgpr_count 0
+// ASM-NEXT: .amdhsa_user_sgpr_private_segment_buffer 0
+// ASM-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0
+// ASM-NEXT: .amdhsa_user_sgpr_queue_ptr 0
+// ASM-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 0
+// ASM-NEXT: .amdhsa_user_sgpr_dispatch_id 0
+// ASM-NEXT: .amdhsa_user_sgpr_flat_scratch_init 0
+// ASM-NEXT: .amdhsa_user_sgpr_kernarg_preload_length 0
+// ASM-NEXT: .amdhsa_user_sgpr_kernarg_preload_offset 0
+// ASM-NEXT: .amdhsa_user_sgpr_private_segment_size 0
+// ASM-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 0
+// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1
+// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_y 0
+// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_z 0
+// ASM-NEXT: .amdhsa_system_sgpr_workgroup_info 0
+// ASM-NEXT: .amdhsa_system_vgpr_workitem_id 0
+// ASM-NEXT: .amdhsa_next_free_vgpr 0
+// ASM-NEXT: .amdhsa_next_free_sgpr 0
+// ASM-NEXT: .amdhsa_accum_offset 4
+// ASM-NEXT: .amdhsa_reserve_xnack_mask 1
+// ASM-NEXT: .amdhsa_float_round_mode_32 0
+// ASM-NEXT: .amdhsa_float_round_mode_16_64 0
+// ASM-NEXT: .amdhsa_float_denorm_mode_32 0
+// ASM-NEXT: .amdhsa_float_denorm_mode_16_64 3
+// ASM-NEXT: .amdhsa_dx10_clamp 1
+// ASM-NEXT: .amdhsa_ieee_mode 1
+// ASM-NEXT: .amdhsa_fp16_overflow 0
+// ASM-NEXT: .amdhsa_tg_split 1
+// ASM-NEXT: .amdhsa_exception_fp_ieee_invalid_op 0
+// ASM-NEXT: .amdhsa_exception_fp_denorm_src 0
+// ASM-NEXT: .amdhsa_exception_fp_ieee_div_zero 0
+// ASM-NEXT: .amdhsa_exception_fp_ieee_overflow 0
+// ASM-NEXT: .amdhsa_exception_fp_ieee_underflow 0
+// ASM-NEXT: .amdhsa_exception_fp_ieee_inexact 0
+// ASM-NEXT: .amdhsa_exception_int_div_zero 0
+// ASM-NEXT: .end_amdhsa_kernel
diff --git a/llvm/test/Transforms/GlobalOpt/inalloca-varargs.ll b/llvm/test/Transforms/GlobalOpt/inalloca-varargs.ll
index 188210782edd..4c5a448d12c4 100644
--- a/llvm/test/Transforms/GlobalOpt/inalloca-varargs.ll
+++ b/llvm/test/Transforms/GlobalOpt/inalloca-varargs.ll
@@ -23,7 +23,7 @@ define internal i32 @i(ptr inalloca(ptr) %a, ...) {
; CHECK-LABEL: define {{[^@]+}}@i
; CHECK-SAME: (ptr inalloca(ptr) [[A:%.*]], ...) unnamed_addr {
; CHECK-NEXT: [[AP:%.*]] = alloca ptr, align 4
-; CHECK-NEXT: call void @llvm.va_start(ptr [[AP]])
+; CHECK-NEXT: call void @llvm.va_start.p0(ptr [[AP]])
; CHECK-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[AP]], align 4
; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[ARGP_CUR]], align 4
; CHECK-NEXT: ret i32 [[L]]
diff --git a/llvm/test/Transforms/IROutliner/illegal-vaarg.ll b/llvm/test/Transforms/IROutliner/illegal-vaarg.ll
index ef365d6eaddb..38dfd25e039e 100644
--- a/llvm/test/Transforms/IROutliner/illegal-vaarg.ll
+++ b/llvm/test/Transforms/IROutliner/illegal-vaarg.ll
@@ -17,10 +17,10 @@ define i32 @func1(i32 %a, double %b, ptr %v, ...) nounwind {
; CHECK-NEXT: [[AP:%.*]] = alloca ptr, align 4
; CHECK-NEXT: [[C:%.*]] = alloca i32, align 4
; CHECK-NEXT: call void @outlined_ir_func_0(i32 [[A:%.*]], ptr [[A_ADDR]], double [[B:%.*]], ptr [[B_ADDR]])
-; CHECK-NEXT: call void @llvm.va_start(ptr [[AP]])
+; CHECK-NEXT: call void @llvm.va_start.p0(ptr [[AP]])
; CHECK-NEXT: [[TMP0:%.*]] = va_arg ptr [[AP]], i32
-; CHECK-NEXT: call void @llvm.va_copy(ptr [[V:%.*]], ptr [[AP]])
-; CHECK-NEXT: call void @llvm.va_end(ptr [[AP]])
+; CHECK-NEXT: call void @llvm.va_copy.p0(ptr [[V:%.*]], ptr [[AP]])
+; CHECK-NEXT: call void @llvm.va_end.p0(ptr [[AP]])
; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 -1, ptr [[TMP_LOC]])
; CHECK-NEXT: call void @outlined_ir_func_1(i32 [[TMP0]], ptr [[C]], ptr [[TMP_LOC]])
; CHECK-NEXT: [[TMP_RELOAD:%.*]] = load i32, ptr [[TMP_LOC]], align 4
@@ -52,10 +52,10 @@ define i32 @func2(i32 %a, double %b, ptr %v, ...) nounwind {
; CHECK-NEXT: [[AP:%.*]] = alloca ptr, align 4
; CHECK-NEXT: [[C:%.*]] = alloca i32, align 4
; CHECK-NEXT: call void @outlined_ir_func_0(i32 [[A:%.*]], ptr [[A_ADDR]], double [[B:%.*]], ptr [[B_ADDR]])
-; CHECK-NEXT: call void @llvm.va_start(ptr [[AP]])
+; CHECK-NEXT: call void @llvm.va_start.p0(ptr [[AP]])
; CHECK-NEXT: [[TMP0:%.*]] = va_arg ptr [[AP]], i32
-; CHECK-NEXT: call void @llvm.va_copy(ptr [[V:%.*]], ptr [[AP]])
-; CHECK-NEXT: call void @llvm.va_end(ptr [[AP]])
+; CHECK-NEXT: call void @llvm.va_copy.p0(ptr [[V:%.*]], ptr [[AP]])
+; CHECK-NEXT: call void @llvm.va_end.p0(ptr [[AP]])
; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 -1, ptr [[TMP_LOC]])
; CHECK-NEXT: call void @outlined_ir_func_1(i32 [[TMP0]], ptr [[C]], ptr [[TMP_LOC]])
; CHECK-NEXT: [[TMP_RELOAD:%.*]] = load i32, ptr [[TMP_LOC]], align 4
diff --git a/llvm/test/Transforms/IROutliner/outline-vaarg-intrinsic.ll b/llvm/test/Transforms/IROutliner/outline-vaarg-intrinsic.ll
index 9f565de96057..2d526086fae4 100644
--- a/llvm/test/Transforms/IROutliner/outline-vaarg-intrinsic.ll
+++ b/llvm/test/Transforms/IROutliner/outline-vaarg-intrinsic.ll
@@ -51,7 +51,7 @@ entry:
; CHECK-NEXT: [[C:%.*]] = alloca i32, align 4
; CHECK-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
; CHECK-NEXT: store double [[B]], ptr [[B_ADDR]], align 8
-; CHECK-NEXT: call void @llvm.va_start(ptr [[AP]])
+; CHECK-NEXT: call void @llvm.va_start.p0(ptr [[AP]])
; CHECK-NEXT: [[TMP0:%.*]] = va_arg ptr [[AP]], i32
; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 -1, ptr [[TMP_LOC]])
; CHECK-NEXT: call void @outlined_ir_func_0(ptr [[V]], ptr [[AP]], i32 [[TMP0]], ptr [[C]], ptr [[TMP_LOC]])
@@ -70,7 +70,7 @@ entry:
; CHECK-NEXT: [[C:%.*]] = alloca i32, align 4
; CHECK-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
; CHECK-NEXT: store double [[B]], ptr [[B_ADDR]], align 8
-; CHECK-NEXT: call void @llvm.va_start(ptr [[AP]])
+; CHECK-NEXT: call void @llvm.va_start.p0(ptr [[AP]])
; CHECK-NEXT: [[TMP0:%.*]] = va_arg ptr [[AP]], i32
; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 -1, ptr [[TMP_LOC]])
; CHECK-NEXT: call void @outlined_ir_func_0(ptr [[V]], ptr [[AP]], i32 [[TMP0]], ptr [[C]], ptr [[TMP_LOC]])
@@ -84,8 +84,8 @@ entry:
; CHECK-NEXT: newFuncRoot:
; CHECK-NEXT: br label [[ENTRY_TO_OUTLINE:%.*]]
; CHECK: entry_to_outline:
-; CHECK-NEXT: call void @llvm.va_copy(ptr [[TMP0]], ptr [[TMP1]])
-; CHECK-NEXT: call void @llvm.va_end(ptr [[TMP1]])
+; CHECK-NEXT: call void @llvm.va_copy.p0(ptr [[TMP0]], ptr [[TMP1]])
+; CHECK-NEXT: call void @llvm.va_end.p0(ptr [[TMP1]])
; CHECK-NEXT: store i32 [[TMP2]], ptr [[TMP3]], align 4
; CHECK-NEXT: [[TMP:%.*]] = load i32, ptr [[TMP3]], align 4
; CHECK-NEXT: br label [[ENTRY_AFTER_OUTLINE_EXITSTUB:%.*]]
diff --git a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/mem-intrinsics.ll b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/mem-intrinsics.ll
index 50b0e7a0f547..2f264a2432fc 100644
--- a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/mem-intrinsics.ll
+++ b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/mem-intrinsics.ll
@@ -141,4 +141,4 @@ attributes #1 = { argmemonly nounwind }
!5 = distinct !{!5, !"some domain"}
!6 = !{!7}
!7 = distinct !{!7, !5, !"some scope 2"}
-!8 = !{i64 0, i64 8, null}
+!8 = !{i64 0, i64 8, !0}
diff --git a/llvm/test/Transforms/InstCombine/powi.ll b/llvm/test/Transforms/InstCombine/powi.ll
index 43e34c889106..6c0575e8b719 100644
--- a/llvm/test/Transforms/InstCombine/powi.ll
+++ b/llvm/test/Transforms/InstCombine/powi.ll
@@ -313,7 +313,7 @@ define double @fdiv_pow_powi(double %x) {
; CHECK-NEXT: [[DIV:%.*]] = fmul reassoc nnan double [[X:%.*]], [[X]]
; CHECK-NEXT: ret double [[DIV]]
;
- %p1 = call double @llvm.powi.f64.i32(double %x, i32 3)
+ %p1 = call reassoc double @llvm.powi.f64.i32(double %x, i32 3)
%div = fdiv reassoc nnan double %p1, %x
ret double %div
}
@@ -323,7 +323,7 @@ define float @fdiv_powf_powi(float %x) {
; CHECK-NEXT: [[DIV:%.*]] = call reassoc nnan float @llvm.powi.f32.i32(float [[X:%.*]], i32 99)
; CHECK-NEXT: ret float [[DIV]]
;
- %p1 = call float @llvm.powi.f32.i32(float %x, i32 100)
+ %p1 = call reassoc float @llvm.powi.f32.i32(float %x, i32 100)
%div = fdiv reassoc nnan float %p1, %x
ret float %div
}
@@ -347,10 +347,21 @@ define double @fdiv_pow_powi_multi_use(double %x) {
define float @fdiv_powf_powi_missing_reassoc(float %x) {
; CHECK-LABEL: @fdiv_powf_powi_missing_reassoc(
; CHECK-NEXT: [[P1:%.*]] = call float @llvm.powi.f32.i32(float [[X:%.*]], i32 100)
-; CHECK-NEXT: [[DIV:%.*]] = fdiv nnan float [[P1]], [[X]]
+; CHECK-NEXT: [[DIV:%.*]] = fdiv reassoc nnan float [[P1]], [[X]]
; CHECK-NEXT: ret float [[DIV]]
;
%p1 = call float @llvm.powi.f32.i32(float %x, i32 100)
+ %div = fdiv reassoc nnan float %p1, %x
+ ret float %div
+}
+
+define float @fdiv_powf_powi_missing_reassoc1(float %x) {
+; CHECK-LABEL: @fdiv_powf_powi_missing_reassoc1(
+; CHECK-NEXT: [[P1:%.*]] = call reassoc float @llvm.powi.f32.i32(float [[X:%.*]], i32 100)
+; CHECK-NEXT: [[DIV:%.*]] = fdiv nnan float [[P1]], [[X]]
+; CHECK-NEXT: ret float [[DIV]]
+;
+ %p1 = call reassoc float @llvm.powi.f32.i32(float %x, i32 100)
%div = fdiv nnan float %p1, %x
ret float %div
}
diff --git a/llvm/test/Transforms/InstCombine/struct-assign-tbaa.ll b/llvm/test/Transforms/InstCombine/struct-assign-tbaa.ll
index 996d2c0e67e1..d079c03f1dcb 100644
--- a/llvm/test/Transforms/InstCombine/struct-assign-tbaa.ll
+++ b/llvm/test/Transforms/InstCombine/struct-assign-tbaa.ll
@@ -75,7 +75,7 @@ entry:
!1 = !{!"omnipotent char", !0}
!2 = !{!5, !5, i64 0}
!3 = !{i64 0, i64 4, !2}
-!4 = !{i64 0, i64 8, null}
+!4 = !{i64 0, i64 8, !2}
!5 = !{!"float", !0}
!6 = !{i64 0, i64 4, !2, i64 4, i64 4, !2}
!7 = !{i64 0, i64 2, !2, i64 4, i64 6, !2}
diff --git a/llvm/test/Transforms/NewGVN/pr31483.ll b/llvm/test/Transforms/NewGVN/pr31483.ll
index 0e7461c2612b..82e9a2ab286e 100644
--- a/llvm/test/Transforms/NewGVN/pr31483.ll
+++ b/llvm/test/Transforms/NewGVN/pr31483.ll
@@ -41,7 +41,7 @@ define signext i32 @ham(ptr %arg, ptr %arg1) #0 {
; CHECK: bb22:
; CHECK-NEXT: br label [[BB2]]
; CHECK: bb23:
-; CHECK-NEXT: call void @llvm.va_end(ptr [[TMP]])
+; CHECK-NEXT: call void @llvm.va_end.p0(ptr [[TMP]])
; CHECK-NEXT: ret i32 undef
;
bb:
diff --git a/llvm/test/Transforms/Reassociate/vaarg_movable.ll b/llvm/test/Transforms/Reassociate/vaarg_movable.ll
index 337877a54a90..4e45b219fccd 100644
--- a/llvm/test/Transforms/Reassociate/vaarg_movable.ll
+++ b/llvm/test/Transforms/Reassociate/vaarg_movable.ll
@@ -10,13 +10,13 @@ define i32 @func(i32 %dummy, ...) {
;
; CHECK-LABEL: @func(
; CHECK-NEXT: [[VARARGS:%.*]] = alloca ptr, align 8
-; CHECK-NEXT: call void @llvm.va_start(ptr [[VARARGS]])
+; CHECK-NEXT: call void @llvm.va_start.p0(ptr [[VARARGS]])
; CHECK-NEXT: [[V0:%.*]] = va_arg ptr [[VARARGS]], i32
; CHECK-NEXT: [[V1:%.*]] = va_arg ptr [[VARARGS]], i32
; CHECK-NEXT: [[V0_NEG:%.*]] = sub i32 0, [[V0]]
; CHECK-NEXT: [[SUB:%.*]] = add i32 [[V0_NEG]], 1
; CHECK-NEXT: [[ADD:%.*]] = add i32 [[SUB]], [[V1]]
-; CHECK-NEXT: call void @llvm.va_end(ptr [[VARARGS]])
+; CHECK-NEXT: call void @llvm.va_end.p0(ptr [[VARARGS]])
; CHECK-NEXT: ret i32 [[ADD]]
;
%varargs = alloca ptr, align 8
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/phi-node-bitwidt-op-not.ll b/llvm/test/Transforms/SLPVectorizer/X86/phi-node-bitwidt-op-not.ll
new file mode 100644
index 000000000000..f376ca71c776
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/X86/phi-node-bitwidt-op-not.ll
@@ -0,0 +1,95 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt -S -passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s
+
+define i32 @test(ptr %b, ptr %c, i32 %0, ptr %a, i1 %tobool3.not) {
+; CHECK-LABEL: define i32 @test(
+; CHECK-SAME: ptr [[B:%.*]], ptr [[C:%.*]], i32 [[TMP0:%.*]], ptr [[A:%.*]], i1 [[TOBOOL3_NOT:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br i1 [[TOBOOL3_NOT]], label [[BB1:%.*]], label [[BB2:%.*]]
+; CHECK: bb1:
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> poison, i32 [[TMP0]], i32 0
+; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP3:%.*]] = ashr <4 x i32> [[TMP2]], <i32 16, i32 16, i32 16, i32 16>
+; CHECK-NEXT: [[TMP4:%.*]] = icmp slt <4 x i32> [[TMP3]], [[TMP2]]
+; CHECK-NEXT: [[TMP5:%.*]] = zext <4 x i1> [[TMP4]] to <4 x i16>
+; CHECK-NEXT: br label [[BB3:%.*]]
+; CHECK: bb2:
+; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> poison, i32 [[TMP0]], i32 0
+; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP8:%.*]] = icmp sgt <4 x i32> [[TMP7]], zeroinitializer
+; CHECK-NEXT: [[TMP9:%.*]] = zext <4 x i1> [[TMP8]] to <4 x i32>
+; CHECK-NEXT: [[TMP10:%.*]] = insertelement <4 x i1> poison, i1 [[TOBOOL3_NOT]], i32 0
+; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x i1> [[TMP10]], <4 x i1> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP12:%.*]] = select <4 x i1> [[TMP11]], <4 x i32> [[TMP7]], <4 x i32> [[TMP9]]
+; CHECK-NEXT: [[TMP13:%.*]] = shl <4 x i32> [[TMP12]], <i32 16, i32 16, i32 16, i32 16>
+; CHECK-NEXT: [[TMP14:%.*]] = ashr <4 x i32> [[TMP13]], <i32 16, i32 16, i32 16, i32 16>
+; CHECK-NEXT: [[TMP15:%.*]] = trunc <4 x i32> [[TMP14]] to <4 x i16>
+; CHECK-NEXT: br i1 true, label [[BB3]], label [[BB2]]
+; CHECK: bb3:
+; CHECK-NEXT: [[TMP16:%.*]] = phi <4 x i16> [ [[TMP5]], [[BB1]] ], [ [[TMP15]], [[BB2]] ]
+; CHECK-NEXT: [[TMP17:%.*]] = extractelement <4 x i16> [[TMP16]], i32 0
+; CHECK-NEXT: [[TMP18:%.*]] = sext i16 [[TMP17]] to i32
+; CHECK-NEXT: store i32 [[TMP18]], ptr [[B]], align 16
+; CHECK-NEXT: [[TMP19:%.*]] = extractelement <4 x i16> [[TMP16]], i32 1
+; CHECK-NEXT: [[TMP20:%.*]] = sext i16 [[TMP19]] to i32
+; CHECK-NEXT: store i32 [[TMP20]], ptr [[A]], align 8
+; CHECK-NEXT: [[TMP21:%.*]] = extractelement <4 x i16> [[TMP16]], i32 2
+; CHECK-NEXT: [[TMP22:%.*]] = sext i16 [[TMP21]] to i32
+; CHECK-NEXT: store i32 [[TMP22]], ptr [[C]], align 16
+; CHECK-NEXT: [[TMP23:%.*]] = extractelement <4 x i16> [[TMP16]], i32 3
+; CHECK-NEXT: [[TMP24:%.*]] = sext i16 [[TMP23]] to i32
+; CHECK-NEXT: store i32 [[TMP24]], ptr [[B]], align 8
+; CHECK-NEXT: ret i32 0
+;
+entry:
+ br i1 %tobool3.not, label %bb1, label %bb2
+
+bb1:
+ %conv1.i.us = ashr i32 %0, 16
+ %cmp2.i.us = icmp slt i32 %conv1.i.us, %0
+ %sext26.us = zext i1 %cmp2.i.us to i32
+ %conv1.i.us.5 = ashr i32 %0, 16
+ %cmp2.i.us.5 = icmp slt i32 %conv1.i.us.5, %0
+ %sext26.us.5 = zext i1 %cmp2.i.us.5 to i32
+ %conv1.i.us.6 = ashr i32 %0, 16
+ %cmp2.i.us.6 = icmp slt i32 %conv1.i.us.6, %0
+ %sext26.us.6 = zext i1 %cmp2.i.us.6 to i32
+ %conv1.i.us.7 = ashr i32 %0, 16
+ %cmp2.i.us.7 = icmp slt i32 %conv1.i.us.7, %0
+ %sext26.us.7 = zext i1 %cmp2.i.us.7 to i32
+ br label %bb3
+
+bb2:
+ %cmp2.i = icmp sgt i32 %0, 0
+ %1 = zext i1 %cmp2.i to i32
+ %cond.i = select i1 %tobool3.not, i32 %0, i32 %1
+ %sext26 = shl i32 %cond.i, 16
+ %conv13 = ashr i32 %sext26, 16
+ %cmp2.i.5 = icmp sgt i32 %0, 0
+ %2 = zext i1 %cmp2.i.5 to i32
+ %cond.i.5 = select i1 %tobool3.not, i32 %0, i32 %2
+ %sext26.5 = shl i32 %cond.i.5, 16
+ %conv13.5 = ashr i32 %sext26.5, 16
+ %cmp2.i.6 = icmp sgt i32 %0, 0
+ %3 = zext i1 %cmp2.i.6 to i32
+ %cond.i.6 = select i1 %tobool3.not, i32 %0, i32 %3
+ %sext26.6 = shl i32 %cond.i.6, 16
+ %conv13.6 = ashr i32 %sext26.6, 16
+ %cmp2.i.7 = icmp sgt i32 %0, 0
+ %4 = zext i1 %cmp2.i.7 to i32
+ %cond.i.7 = select i1 %tobool3.not, i32 %0, i32 %4
+ %sext26.7 = shl i32 %cond.i.7, 16
+ %conv13.7 = ashr i32 %sext26.7, 16
+ br i1 true, label %bb3, label %bb2
+
+bb3:
+ %conv13p = phi i32 [ %sext26.us, %bb1 ], [ %conv13, %bb2 ]
+ %conv13.5p = phi i32 [ %sext26.us.5, %bb1 ], [ %conv13.5, %bb2 ]
+ %conv13.6p = phi i32 [ %sext26.us.6, %bb1 ], [ %conv13.6, %bb2 ]
+ %conv13.7p = phi i32 [ %sext26.us.7, %bb1 ], [ %conv13.7, %bb2 ]
+ store i32 %conv13p, ptr %b, align 16
+ store i32 %conv13.5p, ptr %a, align 8
+ store i32 %conv13.6p, ptr %c, align 16
+ store i32 %conv13.7p, ptr %b, align 8
+ ret i32 0
+}
diff --git a/llvm/test/Transforms/SROA/tbaa-struct3.ll b/llvm/test/Transforms/SROA/tbaa-struct3.ll
index 0fcd787fef97..61034de81e4b 100644
--- a/llvm/test/Transforms/SROA/tbaa-struct3.ll
+++ b/llvm/test/Transforms/SROA/tbaa-struct3.ll
@@ -539,7 +539,7 @@ declare void @llvm.memcpy.p0.p0.i64(ptr noalias nocapture writeonly, ptr noalias
!6 = !{!5, !5, i64 0}
!7 = !{i64 0, i64 8, !6, i64 8, i64 4, !1}
!8 = !{i64 0, i64 4, !1, i64 4, i64 8, !6}
-!9 = !{i64 0, i64 8, !6, i64 4, i64 8, !1}
+!9 = !{i64 0, i64 8, !6, i64 8, i64 8, !1}
!10 = !{i64 0, i64 2, !1, i64 2, i64 2, !1}
!11 = !{i64 0, i64 1, !1, i64 1, i64 3, !1}
!12 = !{i64 0, i64 2, !1, i64 2, i64 6, !1}
diff --git a/llvm/test/Transforms/Scalarizer/basic-inseltpoison.ll b/llvm/test/Transforms/Scalarizer/basic-inseltpoison.ll
index bbcdcb6f5867..73ae66dd76c6 100644
--- a/llvm/test/Transforms/Scalarizer/basic-inseltpoison.ll
+++ b/llvm/test/Transforms/Scalarizer/basic-inseltpoison.ll
@@ -836,5 +836,6 @@ define <2 x i32> @f23_crash(<2 x i32> %srcvec, i32 %v1) {
!2 = !{ !"set2", !0 }
!3 = !{ !3, !{!"llvm.loop.parallel_accesses", !13} }
!4 = !{ float 4.0 }
-!5 = !{ i64 0, i64 8, null }
+!5 = !{ i64 0, i64 8, !6 }
+!6 = !{ !1, !1, i64 0 }
!13 = distinct !{}
diff --git a/llvm/test/Transforms/Scalarizer/basic.ll b/llvm/test/Transforms/Scalarizer/basic.ll
index db7c5f535f7e..87a70ccd3fc7 100644
--- a/llvm/test/Transforms/Scalarizer/basic.ll
+++ b/llvm/test/Transforms/Scalarizer/basic.ll
@@ -870,5 +870,6 @@ define <2 x float> @f25(<2 x float> %src) {
!2 = !{ !"set2", !0 }
!3 = !{ !3, !{!"llvm.loop.parallel_accesses", !13} }
!4 = !{ float 4.0 }
-!5 = !{ i64 0, i64 8, null }
+!5 = !{ i64 0, i64 8, !6 }
+!6 = !{ !1, !1, i64 0 }
!13 = distinct !{}
diff --git a/llvm/test/Verifier/tbaa-struct.ll b/llvm/test/Verifier/tbaa-struct.ll
index b8ddc7cee496..14c19a19d5ae 100644
--- a/llvm/test/Verifier/tbaa-struct.ll
+++ b/llvm/test/Verifier/tbaa-struct.ll
@@ -1,28 +1,36 @@
-; RUN: llvm-as < %s 2>&1
-
-; FIXME: The verifer should reject the invalid !tbaa.struct nodes below.
+; RUN: not llvm-as < %s 2>&1 | FileCheck %s
define void @test_overlapping_regions(ptr %a1) {
+; CHECK: Overlapping tbaa.struct regions
+; CHECK-NEXT: %ld = load i8, ptr %a1, align 1, !tbaa.struct !0
%ld = load i8, ptr %a1, align 1, !tbaa.struct !0
ret void
}
define void @test_size_not_integer(ptr %a1) {
+; CHECK: Size must be a constant integer
+; CHECK-NEXT: store i8 1, ptr %a1, align 1, !tbaa.struct !5
store i8 1, ptr %a1, align 1, !tbaa.struct !5
ret void
}
define void @test_offset_not_integer(ptr %a1, ptr %a2) {
+; CHECK: Offset must be a constant integer
+; CHECK-NEXT: tail call void @llvm.memcpy.p0.p0.i64(ptr align 8 %a1, ptr align 8 %a2, i64 16, i1 false), !tbaa.struct !6
tail call void @llvm.memcpy.p0.p0.i64(ptr align 8 %a1, ptr align 8 %a2, i64 16, i1 false), !tbaa.struct !6
ret void
}
define void @test_tbaa_missing(ptr %a1, ptr %a2) {
+; CHECK: TBAA tag missing
+; CHECK-NEXT: tail call void @llvm.memcpy.p0.p0.i64(ptr align 8 %a1, ptr align 8 %a2, i64 16, i1 false), !tbaa.struct !7
tail call void @llvm.memcpy.p0.p0.i64(ptr align 8 %a1, ptr align 8 %a2, i64 16, i1 false), !tbaa.struct !7
ret void
}
define void @test_tbaa_invalid(ptr %a1) {
+; CHECK: Old-style TBAA is no longer allowed, use struct-path TBAA instead
+; CHECK-NEXT: store i8 1, ptr %a1, align 1, !tbaa.struct !8
store i8 1, ptr %a1, align 1, !tbaa.struct !8
ret void
}
diff --git a/llvm/test/tools/llvm-lib/arm64ec-implib.test b/llvm/test/tools/llvm-lib/arm64ec-implib.test
index 9ce53fe0fea0..e9987d0ca2e6 100644
--- a/llvm/test/tools/llvm-lib/arm64ec-implib.test
+++ b/llvm/test/tools/llvm-lib/arm64ec-implib.test
@@ -14,6 +14,8 @@ ARMAP-NEXT: Archive EC map
ARMAP-NEXT: #expname in test.dll
ARMAP-NEXT: #funcexp in test.dll
ARMAP-NEXT: #mangledfunc in test.dll
+ARMAP-NEXT: #manglednonamefunc in test.dll
+ARMAP-NEXT: #nonamefunc in test.dll
ARMAP-NEXT: ?test_cpp_func@@$$hYAHPEAX@Z in test.dll
ARMAP-NEXT: ?test_cpp_func@@YAHPEAX@Z in test.dll
ARMAP-NEXT: __IMPORT_DESCRIPTOR_test in test.dll
@@ -23,13 +25,19 @@ ARMAP-NEXT: __imp_aux_?test_cpp_func@@YAHPEAX@Z in test.dll
ARMAP-NEXT: __imp_aux_expname in test.dll
ARMAP-NEXT: __imp_aux_funcexp in test.dll
ARMAP-NEXT: __imp_aux_mangledfunc in test.dll
+ARMAP-NEXT: __imp_aux_manglednonamefunc in test.dll
+ARMAP-NEXT: __imp_aux_nonamefunc in test.dll
ARMAP-NEXT: __imp_dataexp in test.dll
ARMAP-NEXT: __imp_expname in test.dll
ARMAP-NEXT: __imp_funcexp in test.dll
ARMAP-NEXT: __imp_mangledfunc in test.dll
+ARMAP-NEXT: __imp_manglednonamefunc in test.dll
+ARMAP-NEXT: __imp_nonamefunc in test.dll
ARMAP-NEXT: expname in test.dll
ARMAP-NEXT: funcexp in test.dll
ARMAP-NEXT: mangledfunc in test.dll
+ARMAP-NEXT: manglednonamefunc in test.dll
+ARMAP-NEXT: nonamefunc in test.dll
ARMAP-NEXT: test_NULL_THUNK_DATA in test.dll
RUN: llvm-readobj test.lib | FileCheck -check-prefix=READOBJ %s
@@ -95,6 +103,25 @@ READOBJ-NEXT: Type: data
READOBJ-NEXT: Name type: name
READOBJ-NEXT: Export name: dataexp
READOBJ-NEXT: Symbol: __imp_dataexp
+READOBJ-EMPTY:
+READOBJ-NEXT: File: test.dll
+READOBJ-NEXT: Format: COFF-import-file-ARM64EC
+READOBJ-NEXT: Type: code
+READOBJ-NEXT: Name type: ordinal
+READOBJ-NEXT: Symbol: __imp_nonamefunc
+READOBJ-NEXT: Symbol: nonamefunc
+READOBJ-NEXT: Symbol: __imp_aux_nonamefunc
+READOBJ-NEXT: Symbol: #nonamefunc
+READOBJ-EMPTY:
+READOBJ-NEXT: File: test.dll
+READOBJ-NEXT: Format: COFF-import-file-ARM64EC
+READOBJ-NEXT: Type: code
+READOBJ-NEXT: Name type: ordinal
+READOBJ-NEXT: Symbol: __imp_manglednonamefunc
+READOBJ-NEXT: Symbol: manglednonamefunc
+READOBJ-NEXT: Symbol: __imp_aux_manglednonamefunc
+READOBJ-NEXT: Symbol: #manglednonamefunc
+
Using -machine:arm64x gives the same output.
RUN: llvm-lib -machine:arm64x -def:test.def -out:testx.lib
@@ -112,22 +139,28 @@ RUN: llvm-nm --print-armap testx.lib | FileCheck -check-prefix=ARMAPX %s
ARMAPX: Archive map
ARMAPX-NEXT: #mangledfunc in test.dll
+ARMAPX-NEXT: #manglednonamefunc in test.dll
ARMAPX-NEXT: ?test_cpp_func@@YAHPEAX@Z in test.dll
ARMAPX-NEXT: __IMPORT_DESCRIPTOR_test in test.dll
ARMAPX-NEXT: __NULL_IMPORT_DESCRIPTOR in test.dll
ARMAPX-NEXT: __imp_#mangledfunc in test.dll
+ARMAPX-NEXT: __imp_#manglednonamefunc in test.dll
ARMAPX-NEXT: __imp_?test_cpp_func@@YAHPEAX@Z in test.dll
ARMAPX-NEXT: __imp_dataexp in test.dll
ARMAPX-NEXT: __imp_expname in test.dll
ARMAPX-NEXT: __imp_funcexp in test.dll
+ARMAPX-NEXT: __imp_nonamefunc in test.dll
ARMAPX-NEXT: expname in test.dll
ARMAPX-NEXT: funcexp in test.dll
+ARMAPX-NEXT: nonamefunc in test.dll
ARMAPX-NEXT: test_NULL_THUNK_DATA in test.dll
ARMAPX-EMPTY:
ARMAPX-NEXT: Archive EC map
ARMAPX-NEXT: #expname in test.dll
ARMAPX-NEXT: #funcexp in test.dll
ARMAPX-NEXT: #mangledfunc in test.dll
+ARMAPX-NEXT: #manglednonamefunc in test.dll
+ARMAPX-NEXT: #nonamefunc in test.dll
ARMAPX-NEXT: ?test_cpp_func@@$$hYAHPEAX@Z in test.dll
ARMAPX-NEXT: ?test_cpp_func@@YAHPEAX@Z in test.dll
ARMAPX-NEXT: __IMPORT_DESCRIPTOR_test in test.dll
@@ -137,13 +170,19 @@ ARMAPX-NEXT: __imp_aux_?test_cpp_func@@YAHPEAX@Z in test.dll
ARMAPX-NEXT: __imp_aux_expname in test.dll
ARMAPX-NEXT: __imp_aux_funcexp in test.dll
ARMAPX-NEXT: __imp_aux_mangledfunc in test.dll
+ARMAPX-NEXT: __imp_aux_manglednonamefunc in test.dll
+ARMAPX-NEXT: __imp_aux_nonamefunc in test.dll
ARMAPX-NEXT: __imp_dataexp in test.dll
ARMAPX-NEXT: __imp_expname in test.dll
ARMAPX-NEXT: __imp_funcexp in test.dll
ARMAPX-NEXT: __imp_mangledfunc in test.dll
+ARMAPX-NEXT: __imp_manglednonamefunc in test.dll
+ARMAPX-NEXT: __imp_nonamefunc in test.dll
ARMAPX-NEXT: expname in test.dll
ARMAPX-NEXT: funcexp in test.dll
ARMAPX-NEXT: mangledfunc in test.dll
+ARMAPX-NEXT: manglednonamefunc in test.dll
+ARMAPX-NEXT: nonamefunc in test.dll
ARMAPX-NEXT: test_NULL_THUNK_DATA in test.dll
RUN: llvm-readobj testx.lib | FileCheck -check-prefix=READOBJX %s
@@ -211,6 +250,24 @@ READOBJX-NEXT: Export name: dataexp
READOBJX-NEXT: Symbol: __imp_dataexp
READOBJX-EMPTY:
READOBJX-NEXT: File: test.dll
+READOBJX-NEXT: Format: COFF-import-file-ARM64EC
+READOBJX-NEXT: Type: code
+READOBJX-NEXT: Name type: ordinal
+READOBJX-NEXT: Symbol: __imp_nonamefunc
+READOBJX-NEXT: Symbol: nonamefunc
+READOBJX-NEXT: Symbol: __imp_aux_nonamefunc
+READOBJX-NEXT: Symbol: #nonamefunc
+READOBJX-EMPTY:
+READOBJX-NEXT: File: test.dll
+READOBJX-NEXT: Format: COFF-import-file-ARM64EC
+READOBJX-NEXT: Type: code
+READOBJX-NEXT: Name type: ordinal
+READOBJX-NEXT: Symbol: __imp_manglednonamefunc
+READOBJX-NEXT: Symbol: manglednonamefunc
+READOBJX-NEXT: Symbol: __imp_aux_manglednonamefunc
+READOBJX-NEXT: Symbol: #manglednonamefunc
+READOBJX-EMPTY:
+READOBJX-NEXT: File: test.dll
READOBJX-NEXT: Format: COFF-import-file-ARM64
READOBJX-NEXT: Type: code
READOBJX-NEXT: Name type: name
@@ -248,6 +305,20 @@ READOBJX-NEXT: Type: data
READOBJX-NEXT: Name type: name
READOBJX-NEXT: Export name: dataexp
READOBJX-NEXT: Symbol: __imp_dataexp
+READOBJX-EMPTY:
+READOBJX-NEXT: File: test.dll
+READOBJX-NEXT: Format: COFF-import-file-ARM64
+READOBJX-NEXT: Type: code
+READOBJX-NEXT: Name type: ordinal
+READOBJX-NEXT: Symbol: __imp_nonamefunc
+READOBJX-NEXT: Symbol: nonamefunc
+READOBJX-EMPTY:
+READOBJX-NEXT: File: test.dll
+READOBJX-NEXT: Format: COFF-import-file-ARM64
+READOBJX-NEXT: Type: code
+READOBJX-NEXT: Name type: ordinal
+READOBJX-NEXT: Symbol: __imp_#manglednonamefunc
+READOBJX-NEXT: Symbol: #manglednonamefunc
RUN: llvm-lib -machine:arm64ec -def:test.def -defArm64Native:test2.def -out:test2.lib
@@ -266,6 +337,8 @@ ARMAPX2-NEXT: Archive EC map
ARMAPX2-NEXT: #expname in test2.dll
ARMAPX2-NEXT: #funcexp in test2.dll
ARMAPX2-NEXT: #mangledfunc in test2.dll
+ARMAPX2-NEXT: #manglednonamefunc in test2.dll
+ARMAPX2-NEXT: #nonamefunc in test2.dll
ARMAPX2-NEXT: ?test_cpp_func@@$$hYAHPEAX@Z in test2.dll
ARMAPX2-NEXT: ?test_cpp_func@@YAHPEAX@Z in test2.dll
ARMAPX2-NEXT: __IMPORT_DESCRIPTOR_test2 in test2.dll
@@ -275,13 +348,19 @@ ARMAPX2-NEXT: __imp_aux_?test_cpp_func@@YAHPEAX@Z in test2.dll
ARMAPX2-NEXT: __imp_aux_expname in test2.dll
ARMAPX2-NEXT: __imp_aux_funcexp in test2.dll
ARMAPX2-NEXT: __imp_aux_mangledfunc in test2.dll
+ARMAPX2-NEXT: __imp_aux_manglednonamefunc in test2.dll
+ARMAPX2-NEXT: __imp_aux_nonamefunc in test2.dll
ARMAPX2-NEXT: __imp_dataexp in test2.dll
ARMAPX2-NEXT: __imp_expname in test2.dll
ARMAPX2-NEXT: __imp_funcexp in test2.dll
ARMAPX2-NEXT: __imp_mangledfunc in test2.dll
+ARMAPX2-NEXT: __imp_manglednonamefunc in test2.dll
+ARMAPX2-NEXT: __imp_nonamefunc in test2.dll
ARMAPX2-NEXT: expname in test2.dll
ARMAPX2-NEXT: funcexp in test2.dll
ARMAPX2-NEXT: mangledfunc in test2.dll
+ARMAPX2-NEXT: manglednonamefunc in test2.dll
+ARMAPX2-NEXT: nonamefunc in test2.dll
ARMAPX2-NEXT: test2_NULL_THUNK_DATA in test2.dll
ARMAPX2: test2.dll:
@@ -312,6 +391,18 @@ ARMAPX2-NEXT: test2.dll:
ARMAPX2-NEXT: 00000000 D __imp_dataexp
ARMAPX2-EMPTY:
ARMAPX2-NEXT: test2.dll:
+ARMAPX2-NEXT: 00000000 T #nonamefunc
+ARMAPX2-NEXT: 00000000 T __imp_aux_nonamefunc
+ARMAPX2-NEXT: 00000000 T __imp_nonamefunc
+ARMAPX2-NEXT: 00000000 T nonamefunc
+ARMAPX2-EMPTY:
+ARMAPX2-NEXT: test2.dll:
+ARMAPX2-NEXT: 00000000 T #manglednonamefunc
+ARMAPX2-NEXT: 00000000 T __imp_aux_manglednonamefunc
+ARMAPX2-NEXT: 00000000 T __imp_manglednonamefunc
+ARMAPX2-NEXT: 00000000 T manglednonamefunc
+ARMAPX2-EMPTY:
+ARMAPX2-NEXT: test2.dll:
ARMAPX2-NEXT: 00000000 T __imp_otherfunc
ARMAPX2-NEXT: 00000000 T otherfunc
@@ -406,6 +497,8 @@ EXPORTS
?test_cpp_func@@YAHPEAX@Z
expname=impname
dataexp DATA
+ nonamefunc @1 NONAME
+ #manglednonamefunc @2 NONAME
#--- test2.def
LIBRARY test2.dll
diff --git a/llvm/unittests/Object/GOFFObjectFileTest.cpp b/llvm/unittests/Object/GOFFObjectFileTest.cpp
index 734dac6b8507..69f60d016a80 100644
--- a/llvm/unittests/Object/GOFFObjectFileTest.cpp
+++ b/llvm/unittests/Object/GOFFObjectFileTest.cpp
@@ -502,3 +502,100 @@ TEST(GOFFObjectFileTest, InvalidERSymbolType) {
FailedWithMessage("ESD record 1 has unknown Executable type 0x03"));
}
}
+
+TEST(GOFFObjectFileTest, TXTConstruct) {
+ char GOFFData[GOFF::RecordLength * 6] = {};
+
+ // HDR record.
+ GOFFData[0] = 0x03;
+ GOFFData[1] = 0xF0;
+ GOFFData[50] = 0x01;
+
+ // ESD record.
+ GOFFData[GOFF::RecordLength] = 0x03;
+ GOFFData[GOFF::RecordLength + 7] = 0x01; // ESDID.
+ GOFFData[GOFF::RecordLength + 71] = 0x05; // Size of symbol name.
+ GOFFData[GOFF::RecordLength + 72] = 0xa5; // Symbol name is v.
+ GOFFData[GOFF::RecordLength + 73] = 0x81; // Symbol name is a.
+ GOFFData[GOFF::RecordLength + 74] = 0x99; // Symbol name is r.
+ GOFFData[GOFF::RecordLength + 75] = 0x7b; // Symbol name is #.
+ GOFFData[GOFF::RecordLength + 76] = 0x83; // Symbol name is c.
+
+ // ESD record.
+ GOFFData[GOFF::RecordLength * 2] = 0x03;
+ GOFFData[GOFF::RecordLength * 2 + 3] = 0x01;
+ GOFFData[GOFF::RecordLength * 2 + 7] = 0x02; // ESDID.
+ GOFFData[GOFF::RecordLength * 2 + 11] = 0x01; // Parent ESDID.
+ GOFFData[GOFF::RecordLength * 2 + 27] = 0x08; // Length.
+ GOFFData[GOFF::RecordLength * 2 + 40] = 0x01; // Name Space ID.
+ GOFFData[GOFF::RecordLength * 2 + 41] = 0x80;
+ GOFFData[GOFF::RecordLength * 2 + 60] = 0x04; // Size of symbol name.
+ GOFFData[GOFF::RecordLength * 2 + 61] = 0x04; // Size of symbol name.
+ GOFFData[GOFF::RecordLength * 2 + 63] = 0x0a; // Size of symbol name.
+ GOFFData[GOFF::RecordLength * 2 + 66] = 0x03; // Size of symbol name.
+ GOFFData[GOFF::RecordLength * 2 + 71] = 0x08; // Size of symbol name.
+ GOFFData[GOFF::RecordLength * 2 + 72] = 0xc3; // Symbol name is c.
+ GOFFData[GOFF::RecordLength * 2 + 73] = 0x6d; // Symbol name is _.
+ GOFFData[GOFF::RecordLength * 2 + 74] = 0xc3; // Symbol name is c.
+ GOFFData[GOFF::RecordLength * 2 + 75] = 0xd6; // Symbol name is o.
+ GOFFData[GOFF::RecordLength * 2 + 76] = 0xc4; // Symbol name is D.
+ GOFFData[GOFF::RecordLength * 2 + 77] = 0xc5; // Symbol name is E.
+ GOFFData[GOFF::RecordLength * 2 + 78] = 0xf6; // Symbol name is 6.
+ GOFFData[GOFF::RecordLength * 2 + 79] = 0xf4; // Symbol name is 4.
+
+ // ESD record.
+ GOFFData[GOFF::RecordLength * 3] = 0x03;
+ GOFFData[GOFF::RecordLength * 3 + 3] = 0x02;
+ GOFFData[GOFF::RecordLength * 3 + 7] = 0x03; // ESDID.
+ GOFFData[GOFF::RecordLength * 3 + 11] = 0x02; // Parent ESDID.
+ GOFFData[GOFF::RecordLength * 3 + 71] = 0x05; // Size of symbol name.
+ GOFFData[GOFF::RecordLength * 3 + 72] = 0xa5; // Symbol name is v.
+ GOFFData[GOFF::RecordLength * 3 + 73] = 0x81; // Symbol name is a.
+ GOFFData[GOFF::RecordLength * 3 + 74] = 0x99; // Symbol name is r.
+ GOFFData[GOFF::RecordLength * 3 + 75] = 0x7b; // Symbol name is #.
+ GOFFData[GOFF::RecordLength * 3 + 76] = 0x83; // Symbol name is c.
+
+ // TXT record.
+ GOFFData[GOFF::RecordLength * 4] = 0x03;
+ GOFFData[GOFF::RecordLength * 4 + 1] = 0x10;
+ GOFFData[GOFF::RecordLength * 4 + 7] = 0x02;
+ GOFFData[GOFF::RecordLength * 4 + 23] = 0x08; // Data Length.
+ GOFFData[GOFF::RecordLength * 4 + 24] = 0x12;
+ GOFFData[GOFF::RecordLength * 4 + 25] = 0x34;
+ GOFFData[GOFF::RecordLength * 4 + 26] = 0x56;
+ GOFFData[GOFF::RecordLength * 4 + 27] = 0x78;
+ GOFFData[GOFF::RecordLength * 4 + 28] = 0x9a;
+ GOFFData[GOFF::RecordLength * 4 + 29] = 0xbc;
+ GOFFData[GOFF::RecordLength * 4 + 30] = 0xde;
+ GOFFData[GOFF::RecordLength * 4 + 31] = 0xf0;
+
+ // END record.
+ GOFFData[GOFF::RecordLength * 5] = 0x03;
+ GOFFData[GOFF::RecordLength * 5 + 1] = 0x40;
+ GOFFData[GOFF::RecordLength * 5 + 11] = 0x06;
+
+ StringRef Data(GOFFData, GOFF::RecordLength * 6);
+
+ Expected<std::unique_ptr<ObjectFile>> GOFFObjOrErr =
+ object::ObjectFile::createGOFFObjectFile(
+ MemoryBufferRef(Data, "dummyGOFF"));
+
+ ASSERT_THAT_EXPECTED(GOFFObjOrErr, Succeeded());
+
+ GOFFObjectFile *GOFFObj = dyn_cast<GOFFObjectFile>((*GOFFObjOrErr).get());
+ auto Symbols = GOFFObj->symbols();
+ ASSERT_EQ(std::distance(Symbols.begin(), Symbols.end()), 1);
+ SymbolRef Symbol = *Symbols.begin();
+ Expected<StringRef> SymbolNameOrErr = GOFFObj->getSymbolName(Symbol);
+ ASSERT_THAT_EXPECTED(SymbolNameOrErr, Succeeded());
+ StringRef SymbolName = SymbolNameOrErr.get();
+ EXPECT_EQ(SymbolName, "var#c");
+
+ auto Sections = GOFFObj->sections();
+ ASSERT_EQ(std::distance(Sections.begin(), Sections.end()), 1);
+ SectionRef Section = *Sections.begin();
+ Expected<StringRef> SectionContent = Section.getContents();
+ ASSERT_THAT_EXPECTED(SectionContent, Succeeded());
+ StringRef Contents = SectionContent.get();
+ EXPECT_EQ(Contents, "\x12\x34\x56\x78\x9a\xbc\xde\xf0");
+}
diff --git a/llvm/unittests/TargetParser/TargetParserTest.cpp b/llvm/unittests/TargetParser/TargetParserTest.cpp
index a7d0b1687a7f..2c72a7229b52 100644
--- a/llvm/unittests/TargetParser/TargetParserTest.cpp
+++ b/llvm/unittests/TargetParser/TargetParserTest.cpp
@@ -2347,13 +2347,6 @@ AArch64ExtensionDependenciesBaseArchTestParams
{},
{"aes", "sha2", "sha3", "sm4"}},
- // +sve implies +f32mm if the base architecture is v8.6A+ or v9.1A+, but
- // not earlier architectures.
- {AArch64::ARMV8_5A, {"sve"}, {"sve"}, {"f32mm"}},
- {AArch64::ARMV9A, {"sve"}, {"sve"}, {"f32mm"}},
- {AArch64::ARMV8_6A, {"sve"}, {"sve", "f32mm"}, {}},
- {AArch64::ARMV9_1A, {"sve"}, {"sve", "f32mm"}, {}},
-
// +fp16 implies +fp16fml for v8.4A+, but not v9.0-A+
{AArch64::ARMV8_3A, {"fp16"}, {"fullfp16"}, {"fp16fml"}},
{AArch64::ARMV9A, {"fp16"}, {"fullfp16"}, {"fp16fml"}},
@@ -2520,10 +2513,10 @@ AArch64ExtensionDependenciesBaseCPUTestParams
{}},
{"cortex-a520",
{},
- {"v9.2a", "bf16", "crc", "dotprod", "f32mm", "flagm",
- "fp-armv8", "fullfp16", "fp16fml", "i8mm", "lse", "mte",
- "pauth", "perfmon", "predres", "ras", "rcpc", "rdm",
- "sb", "neon", "ssbs", "sve", "sve2-bitperm", "sve2"},
+ {"v9.2a", "bf16", "crc", "dotprod", "flagm", "fp-armv8",
+ "fullfp16", "fp16fml", "i8mm", "lse", "mte", "pauth",
+ "perfmon", "predres", "ras", "rcpc", "rdm", "sb",
+ "neon", "ssbs", "sve", "sve2-bitperm", "sve2"},
{}},
// Negative modifiers
diff --git a/llvm/utils/TableGen/Common/CMakeLists.txt b/llvm/utils/TableGen/Common/CMakeLists.txt
index 0440f027f286..c31ed5a1de69 100644
--- a/llvm/utils/TableGen/Common/CMakeLists.txt
+++ b/llvm/utils/TableGen/Common/CMakeLists.txt
@@ -12,10 +12,12 @@ set(LLVM_LINK_COMPONENTS
add_llvm_library(LLVMTableGenCommon STATIC OBJECT EXCLUDE_FROM_ALL
GlobalISel/CodeExpander.cpp
+ GlobalISel/CombinerUtils.cpp
GlobalISel/CXXPredicates.cpp
GlobalISel/GlobalISelMatchTable.cpp
GlobalISel/GlobalISelMatchTableExecutorEmitter.cpp
GlobalISel/MatchDataInfo.cpp
+ GlobalISel/PatternParser.cpp
GlobalISel/Patterns.cpp
AsmWriterInst.cpp
diff --git a/llvm/utils/TableGen/Common/GlobalISel/CombinerUtils.cpp b/llvm/utils/TableGen/Common/GlobalISel/CombinerUtils.cpp
new file mode 100644
index 000000000000..37e630605095
--- /dev/null
+++ b/llvm/utils/TableGen/Common/GlobalISel/CombinerUtils.cpp
@@ -0,0 +1,23 @@
+//===- CombinerUtils.cpp --------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "CombinerUtils.h"
+#include "llvm/ADT/StringSet.h"
+
+namespace llvm {
+
+StringRef insertStrRef(StringRef S) {
+ if (S.empty())
+ return {};
+
+ static StringSet<> Pool;
+ auto [It, Inserted] = Pool.insert(S);
+ return It->getKey();
+}
+
+} // namespace llvm
diff --git a/llvm/utils/TableGen/Common/GlobalISel/CombinerUtils.h b/llvm/utils/TableGen/Common/GlobalISel/CombinerUtils.h
index 8cb2514a10e8..82a64c63edbd 100644
--- a/llvm/utils/TableGen/Common/GlobalISel/CombinerUtils.h
+++ b/llvm/utils/TableGen/Common/GlobalISel/CombinerUtils.h
@@ -65,6 +65,10 @@ inline const DagInit *getDagWithOperatorOfSubClass(const Init &N,
return I;
return nullptr;
}
+
+/// Copies a StringRef into a static pool to preserve it.
+StringRef insertStrRef(StringRef S);
+
} // namespace llvm
#endif
diff --git a/llvm/utils/TableGen/Common/GlobalISel/PatternParser.cpp b/llvm/utils/TableGen/Common/GlobalISel/PatternParser.cpp
new file mode 100644
index 000000000000..1d6c4c73a264
--- /dev/null
+++ b/llvm/utils/TableGen/Common/GlobalISel/PatternParser.cpp
@@ -0,0 +1,462 @@
+//===- PatternParser.cpp ----------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "Common/GlobalISel/PatternParser.h"
+#include "Basic/CodeGenIntrinsics.h"
+#include "Common/CodeGenTarget.h"
+#include "Common/GlobalISel/CombinerUtils.h"
+#include "Common/GlobalISel/Patterns.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/PrettyStackTrace.h"
+#include "llvm/Support/SaveAndRestore.h"
+#include "llvm/TableGen/Error.h"
+#include "llvm/TableGen/Record.h"
+
+namespace llvm {
+namespace gi {
+static constexpr StringLiteral MIFlagsEnumClassName = "MIFlagEnum";
+
+namespace {
+class PrettyStackTraceParse : public PrettyStackTraceEntry {
+ const Record &Def;
+
+public:
+ PrettyStackTraceParse(const Record &Def) : Def(Def) {}
+
+ void print(raw_ostream &OS) const override {
+ if (Def.isSubClassOf("GICombineRule"))
+ OS << "Parsing GICombineRule '" << Def.getName() << '\'';
+ else if (Def.isSubClassOf(PatFrag::ClassName))
+ OS << "Parsing " << PatFrag::ClassName << " '" << Def.getName() << '\'';
+ else
+ OS << "Parsing '" << Def.getName() << '\'';
+ OS << '\n';
+ }
+};
+} // namespace
+
+bool PatternParser::parsePatternList(
+ const DagInit &List,
+ function_ref<bool(std::unique_ptr<Pattern>)> ParseAction,
+ StringRef Operator, StringRef AnonPatNamePrefix) {
+ if (List.getOperatorAsDef(DiagLoc)->getName() != Operator) {
+ PrintError(DiagLoc, "Expected " + Operator + " operator");
+ return false;
+ }
+
+ if (List.getNumArgs() == 0) {
+ PrintError(DiagLoc, Operator + " pattern list is empty");
+ return false;
+ }
+
+ // The match section consists of a list of matchers and predicates. Parse each
+ // one and add the equivalent GIMatchDag nodes, predicates, and edges.
+ for (unsigned I = 0; I < List.getNumArgs(); ++I) {
+ Init *Arg = List.getArg(I);
+ std::string Name = List.getArgName(I)
+ ? List.getArgName(I)->getValue().str()
+ : ("__" + AnonPatNamePrefix + "_" + Twine(I)).str();
+
+ if (auto Pat = parseInstructionPattern(*Arg, Name)) {
+ if (!ParseAction(std::move(Pat)))
+ return false;
+ continue;
+ }
+
+ if (auto Pat = parseWipMatchOpcodeMatcher(*Arg, Name)) {
+ if (!ParseAction(std::move(Pat)))
+ return false;
+ continue;
+ }
+
+ // Parse arbitrary C++ code
+ if (const auto *StringI = dyn_cast<StringInit>(Arg)) {
+ auto CXXPat = std::make_unique<CXXPattern>(*StringI, insertStrRef(Name));
+ if (!ParseAction(std::move(CXXPat)))
+ return false;
+ continue;
+ }
+
+ PrintError(DiagLoc,
+ "Failed to parse pattern: '" + Arg->getAsString() + '\'');
+ return false;
+ }
+
+ return true;
+}
+
+static const CodeGenInstruction &
+getInstrForIntrinsic(const CodeGenTarget &CGT, const CodeGenIntrinsic *I) {
+ StringRef Opc;
+ if (I->isConvergent) {
+ Opc = I->hasSideEffects ? "G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS"
+ : "G_INTRINSIC_CONVERGENT";
+ } else {
+ Opc = I->hasSideEffects ? "G_INTRINSIC_W_SIDE_EFFECTS" : "G_INTRINSIC";
+ }
+
+ RecordKeeper &RK = I->TheDef->getRecords();
+ return CGT.getInstruction(RK.getDef(Opc));
+}
+
+static const CodeGenIntrinsic *getCodeGenIntrinsic(Record *R) {
+ // Intrinsics need to have a static lifetime because the match table keeps
+ // references to CodeGenIntrinsic objects.
+ static DenseMap<const Record *, std::unique_ptr<CodeGenIntrinsic>>
+ AllIntrinsics;
+
+ auto &Ptr = AllIntrinsics[R];
+ if (!Ptr)
+ Ptr = std::make_unique<CodeGenIntrinsic>(R, std::vector<Record *>());
+ return Ptr.get();
+}
+
+std::unique_ptr<Pattern>
+PatternParser::parseInstructionPattern(const Init &Arg, StringRef Name) {
+ const DagInit *DagPat = dyn_cast<DagInit>(&Arg);
+ if (!DagPat)
+ return nullptr;
+
+ std::unique_ptr<InstructionPattern> Pat;
+ if (const DagInit *IP = getDagWithOperatorOfSubClass(Arg, "Instruction")) {
+ auto &Instr = CGT.getInstruction(IP->getOperatorAsDef(DiagLoc));
+ Pat =
+ std::make_unique<CodeGenInstructionPattern>(Instr, insertStrRef(Name));
+ } else if (const DagInit *IP =
+ getDagWithOperatorOfSubClass(Arg, "Intrinsic")) {
+ Record *TheDef = IP->getOperatorAsDef(DiagLoc);
+ const CodeGenIntrinsic *Intrin = getCodeGenIntrinsic(TheDef);
+ const CodeGenInstruction &Instr = getInstrForIntrinsic(CGT, Intrin);
+ Pat =
+ std::make_unique<CodeGenInstructionPattern>(Instr, insertStrRef(Name));
+ cast<CodeGenInstructionPattern>(*Pat).setIntrinsic(Intrin);
+ } else if (const DagInit *PFP =
+ getDagWithOperatorOfSubClass(Arg, PatFrag::ClassName)) {
+ const Record *Def = PFP->getOperatorAsDef(DiagLoc);
+ const PatFrag *PF = parsePatFrag(Def);
+ if (!PF)
+ return nullptr; // Already diagnosed by parsePatFrag
+ Pat = std::make_unique<PatFragPattern>(*PF, insertStrRef(Name));
+ } else if (const DagInit *BP =
+ getDagWithOperatorOfSubClass(Arg, BuiltinPattern::ClassName)) {
+ Pat = std::make_unique<BuiltinPattern>(*BP->getOperatorAsDef(DiagLoc),
+ insertStrRef(Name));
+ } else
+ return nullptr;
+
+ for (unsigned K = 0; K < DagPat->getNumArgs(); ++K) {
+ Init *Arg = DagPat->getArg(K);
+ if (auto *DagArg = getDagWithSpecificOperator(*Arg, "MIFlags")) {
+ if (!parseInstructionPatternMIFlags(*Pat, DagArg))
+ return nullptr;
+ continue;
+ }
+
+ if (!parseInstructionPatternOperand(*Pat, Arg, DagPat->getArgName(K)))
+ return nullptr;
+ }
+
+ if (!Pat->checkSemantics(DiagLoc))
+ return nullptr;
+
+ return std::move(Pat);
+}
+
+std::unique_ptr<Pattern>
+PatternParser::parseWipMatchOpcodeMatcher(const Init &Arg, StringRef Name) {
+ const DagInit *Matcher = getDagWithSpecificOperator(Arg, "wip_match_opcode");
+ if (!Matcher)
+ return nullptr;
+
+ if (Matcher->getNumArgs() == 0) {
+ PrintError(DiagLoc, "Empty wip_match_opcode");
+ return nullptr;
+ }
+
+ // Each argument is an opcode that can match.
+ auto Result = std::make_unique<AnyOpcodePattern>(insertStrRef(Name));
+ for (const auto &Arg : Matcher->getArgs()) {
+ Record *OpcodeDef = getDefOfSubClass(*Arg, "Instruction");
+ if (OpcodeDef) {
+ Result->addOpcode(&CGT.getInstruction(OpcodeDef));
+ continue;
+ }
+
+ PrintError(DiagLoc, "Arguments to wip_match_opcode must be instructions");
+ return nullptr;
+ }
+
+ return std::move(Result);
+}
+
+bool PatternParser::parseInstructionPatternOperand(InstructionPattern &IP,
+ const Init *OpInit,
+ const StringInit *OpName) {
+ const auto ParseErr = [&]() {
+ PrintError(DiagLoc,
+ "cannot parse operand '" + OpInit->getAsUnquotedString() + "' ");
+ if (OpName)
+ PrintNote(DiagLoc,
+ "operand name is '" + OpName->getAsUnquotedString() + '\'');
+ return false;
+ };
+
+ // untyped immediate, e.g. 0
+ if (const auto *IntImm = dyn_cast<IntInit>(OpInit)) {
+ std::string Name = OpName ? OpName->getAsUnquotedString() : "";
+ IP.addOperand(IntImm->getValue(), insertStrRef(Name), PatternType());
+ return true;
+ }
+
+ // typed immediate, e.g. (i32 0)
+ if (const auto *DagOp = dyn_cast<DagInit>(OpInit)) {
+ if (DagOp->getNumArgs() != 1)
+ return ParseErr();
+
+ const Record *TyDef = DagOp->getOperatorAsDef(DiagLoc);
+ auto ImmTy = PatternType::get(DiagLoc, TyDef,
+ "cannot parse immediate '" +
+ DagOp->getAsUnquotedString() + '\'');
+ if (!ImmTy)
+ return false;
+
+ if (!IP.hasAllDefs()) {
+ PrintError(DiagLoc, "out operand of '" + IP.getInstName() +
+ "' cannot be an immediate");
+ return false;
+ }
+
+ const auto *Val = dyn_cast<IntInit>(DagOp->getArg(0));
+ if (!Val)
+ return ParseErr();
+
+ std::string Name = OpName ? OpName->getAsUnquotedString() : "";
+ IP.addOperand(Val->getValue(), insertStrRef(Name), *ImmTy);
+ return true;
+ }
+
+ // Typed operand e.g. $x/$z in (G_FNEG $x, $z)
+ if (auto *DefI = dyn_cast<DefInit>(OpInit)) {
+ if (!OpName) {
+ PrintError(DiagLoc, "expected an operand name after '" +
+ OpInit->getAsString() + '\'');
+ return false;
+ }
+ const Record *Def = DefI->getDef();
+ auto Ty = PatternType::get(DiagLoc, Def, "cannot parse operand type");
+ if (!Ty)
+ return false;
+ IP.addOperand(insertStrRef(OpName->getAsUnquotedString()), *Ty);
+ return true;
+ }
+
+ // Untyped operand e.g. $x/$z in (G_FNEG $x, $z)
+ if (isa<UnsetInit>(OpInit)) {
+ assert(OpName && "Unset w/ no OpName?");
+ IP.addOperand(insertStrRef(OpName->getAsUnquotedString()), PatternType());
+ return true;
+ }
+
+ return ParseErr();
+}
+
+bool PatternParser::parseInstructionPatternMIFlags(InstructionPattern &IP,
+ const DagInit *Op) {
+ auto *CGIP = dyn_cast<CodeGenInstructionPattern>(&IP);
+ if (!CGIP) {
+ PrintError(DiagLoc,
+ "matching/writing MIFlags is only allowed on CodeGenInstruction "
+ "patterns");
+ return false;
+ }
+
+ const auto CheckFlagEnum = [&](const Record *R) {
+ if (!R->isSubClassOf(MIFlagsEnumClassName)) {
+ PrintError(DiagLoc, "'" + R->getName() + "' is not a subclass of '" +
+ MIFlagsEnumClassName + "'");
+ return false;
+ }
+
+ return true;
+ };
+
+ if (CGIP->getMIFlagsInfo()) {
+ PrintError(DiagLoc, "MIFlags can only be present once on an instruction");
+ return false;
+ }
+
+ auto &FI = CGIP->getOrCreateMIFlagsInfo();
+ for (unsigned K = 0; K < Op->getNumArgs(); ++K) {
+ const Init *Arg = Op->getArg(K);
+
+ // Match/set a flag: (MIFlags FmNoNans)
+ if (const auto *Def = dyn_cast<DefInit>(Arg)) {
+ const Record *R = Def->getDef();
+ if (!CheckFlagEnum(R))
+ return false;
+
+ FI.addSetFlag(R);
+ continue;
+ }
+
+ // Do not match a flag/unset a flag: (MIFlags (not FmNoNans))
+ if (const DagInit *NotDag = getDagWithSpecificOperator(*Arg, "not")) {
+ for (const Init *NotArg : NotDag->getArgs()) {
+ const DefInit *DefArg = dyn_cast<DefInit>(NotArg);
+ if (!DefArg) {
+ PrintError(DiagLoc, "cannot parse '" + NotArg->getAsUnquotedString() +
+ "': expected a '" + MIFlagsEnumClassName +
+ "'");
+ return false;
+ }
+
+ const Record *R = DefArg->getDef();
+ if (!CheckFlagEnum(R))
+ return false;
+
+ FI.addUnsetFlag(R);
+ continue;
+ }
+
+ continue;
+ }
+
+ // Copy flags from a matched instruction: (MIFlags $mi)
+ if (isa<UnsetInit>(Arg)) {
+ FI.addCopyFlag(insertStrRef(Op->getArgName(K)->getAsUnquotedString()));
+ continue;
+ }
+ }
+
+ return true;
+}
+
+std::unique_ptr<PatFrag> PatternParser::parsePatFragImpl(const Record *Def) {
+ auto StackTrace = PrettyStackTraceParse(*Def);
+ if (!Def->isSubClassOf(PatFrag::ClassName))
+ return nullptr;
+
+ const DagInit *Ins = Def->getValueAsDag("InOperands");
+ if (Ins->getOperatorAsDef(Def->getLoc())->getName() != "ins") {
+ PrintError(Def, "expected 'ins' operator for " + PatFrag::ClassName +
+ " in operands list");
+ return nullptr;
+ }
+
+ const DagInit *Outs = Def->getValueAsDag("OutOperands");
+ if (Outs->getOperatorAsDef(Def->getLoc())->getName() != "outs") {
+ PrintError(Def, "expected 'outs' operator for " + PatFrag::ClassName +
+ " out operands list");
+ return nullptr;
+ }
+
+ auto Result = std::make_unique<PatFrag>(*Def);
+ if (!parsePatFragParamList(*Outs, [&](StringRef Name, unsigned Kind) {
+ Result->addOutParam(insertStrRef(Name), (PatFrag::ParamKind)Kind);
+ return true;
+ }))
+ return nullptr;
+
+ if (!parsePatFragParamList(*Ins, [&](StringRef Name, unsigned Kind) {
+ Result->addInParam(insertStrRef(Name), (PatFrag::ParamKind)Kind);
+ return true;
+ }))
+ return nullptr;
+
+ const ListInit *Alts = Def->getValueAsListInit("Alternatives");
+ unsigned AltIdx = 0;
+ for (const Init *Alt : *Alts) {
+ const auto *PatDag = dyn_cast<DagInit>(Alt);
+ if (!PatDag) {
+ PrintError(Def, "expected dag init for PatFrag pattern alternative");
+ return nullptr;
+ }
+
+ PatFrag::Alternative &A = Result->addAlternative();
+ const auto AddPat = [&](std::unique_ptr<Pattern> Pat) {
+ A.Pats.push_back(std::move(Pat));
+ return true;
+ };
+
+ SaveAndRestore<ArrayRef<SMLoc>> DiagLocSAR(DiagLoc, Def->getLoc());
+ if (!parsePatternList(
+ *PatDag, AddPat, "pattern",
+ /*AnonPatPrefix*/
+ (Def->getName() + "_alt" + Twine(AltIdx++) + "_pattern").str()))
+ return nullptr;
+ }
+
+ if (!Result->buildOperandsTables() || !Result->checkSemantics())
+ return nullptr;
+
+ return Result;
+}
+
+bool PatternParser::parsePatFragParamList(
+ const DagInit &OpsList,
+ function_ref<bool(StringRef, unsigned)> ParseAction) {
+ for (unsigned K = 0; K < OpsList.getNumArgs(); ++K) {
+ const StringInit *Name = OpsList.getArgName(K);
+ const Init *Ty = OpsList.getArg(K);
+
+ if (!Name) {
+ PrintError(DiagLoc, "all operands must be named'");
+ return false;
+ }
+ const std::string NameStr = Name->getAsUnquotedString();
+
+ PatFrag::ParamKind OpKind;
+ if (isSpecificDef(*Ty, "gi_imm"))
+ OpKind = PatFrag::PK_Imm;
+ else if (isSpecificDef(*Ty, "root"))
+ OpKind = PatFrag::PK_Root;
+ else if (isa<UnsetInit>(Ty) ||
+ isSpecificDef(*Ty, "gi_mo")) // no type = gi_mo.
+ OpKind = PatFrag::PK_MachineOperand;
+ else {
+ PrintError(
+ DiagLoc,
+ '\'' + NameStr +
+ "' operand type was expected to be 'root', 'gi_imm' or 'gi_mo'");
+ return false;
+ }
+
+ if (!ParseAction(NameStr, (unsigned)OpKind))
+ return false;
+ }
+
+ return true;
+}
+
+const PatFrag *PatternParser::parsePatFrag(const Record *Def) {
+ // Cache already parsed PatFrags to avoid doing extra work.
+ static DenseMap<const Record *, std::unique_ptr<PatFrag>> ParsedPatFrags;
+
+ auto It = ParsedPatFrags.find(Def);
+ if (It != ParsedPatFrags.end()) {
+ SeenPatFrags.insert(It->second.get());
+ return It->second.get();
+ }
+
+ std::unique_ptr<PatFrag> NewPatFrag = parsePatFragImpl(Def);
+ if (!NewPatFrag) {
+ PrintError(Def, "Could not parse " + PatFrag::ClassName + " '" +
+ Def->getName() + "'");
+ // Put a nullptr in the map so we don't attempt parsing this again.
+ ParsedPatFrags[Def] = nullptr;
+ return nullptr;
+ }
+
+ const auto *Res = NewPatFrag.get();
+ ParsedPatFrags[Def] = std::move(NewPatFrag);
+ SeenPatFrags.insert(Res);
+ return Res;
+}
+
+} // namespace gi
+} // namespace llvm
diff --git a/llvm/utils/TableGen/Common/GlobalISel/PatternParser.h b/llvm/utils/TableGen/Common/GlobalISel/PatternParser.h
new file mode 100644
index 000000000000..cd6f524075cd
--- /dev/null
+++ b/llvm/utils/TableGen/Common/GlobalISel/PatternParser.h
@@ -0,0 +1,118 @@
+//===- PatternParser.h ------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file Contains tools to parse MIR patterns from TableGen DAG elements.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_UTILS_GLOBALISEL_PATTERNPARSER_H
+#define LLVM_UTILS_GLOBALISEL_PATTERNPARSER_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/STLFunctionalExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/Support/SMLoc.h"
+#include <memory>
+
+namespace llvm {
+class CodeGenTarget;
+class DagInit;
+class Init;
+class Record;
+class StringRef;
+class StringInit;
+
+namespace gi {
+class InstructionPattern;
+class Pattern;
+class PatFrag;
+
+/// Helper class to parse MIR Pattern lists.
+///
+/// e.g., `(match (G_FADD $x, $y, $z), (G_FNEG $y, $z))`
+class PatternParser {
+ const CodeGenTarget &CGT;
+ ArrayRef<SMLoc> DiagLoc;
+
+ mutable SmallPtrSet<const PatFrag *, 2> SeenPatFrags;
+
+public:
+ PatternParser(const CodeGenTarget &CGT, ArrayRef<SMLoc> DiagLoc)
+ : CGT(CGT), DiagLoc(DiagLoc) {}
+
+ /// Parses a list of patterns such as:
+ /// (Operator (Pattern1 ...), (Pattern2 ...))
+ /// \param List DagInit of the expected pattern list.
+ /// \param ParseAction Callback to handle a succesfully parsed pattern.
+ /// \param Operator The name of the operator, e.g. "match"
+ /// \param AnonPatNamePrefix Prefix for anonymous pattern names.
+ /// \return true on success, false on failure.
+ bool
+ parsePatternList(const DagInit &List,
+ function_ref<bool(std::unique_ptr<Pattern>)> ParseAction,
+ StringRef Operator, StringRef AnonPatNamePrefix);
+
+ /// \returns all PatFrags encountered by this PatternParser.
+ const auto &getSeenPatFrags() const { return SeenPatFrags; }
+
+private:
+ /// Parse any InstructionPattern from a TableGen Init.
+ /// \param Arg Init to parse.
+ /// \param PatName Name of the pattern that will be parsed.
+ /// \return the parsed pattern on success, nullptr on failure.
+ std::unique_ptr<Pattern> parseInstructionPattern(const Init &Arg,
+ StringRef PatName);
+
+ /// Parse a WipOpcodeMatcher from a TableGen Init.
+ /// \param Arg Init to parse.
+ /// \param PatName Name of the pattern that will be parsed.
+ /// \return the parsed pattern on success, nullptr on failure.
+ std::unique_ptr<Pattern> parseWipMatchOpcodeMatcher(const Init &Arg,
+ StringRef PatName);
+
+ /// Parses an Operand of an InstructionPattern from a TableGen Init.
+ /// \param IP InstructionPattern for which we're parsing.
+ /// \param OpInit Init to parse.
+ /// \param OpName Name of the operand to parse.
+ /// \return true on success, false on failure.
+ bool parseInstructionPatternOperand(InstructionPattern &IP,
+ const Init *OpInit,
+ const StringInit *OpName);
+
+ /// Parses a MIFlag for an InstructionPattern from a TableGen Init.
+ /// \param IP InstructionPattern for which we're parsing.
+ /// \param Op Init to parse.
+ /// \return true on success, false on failure.
+ bool parseInstructionPatternMIFlags(InstructionPattern &IP,
+ const DagInit *Op);
+
+ /// (Uncached) PatFrag parsing implementation.
+ /// \param Def PatFrag def to parsee.
+ /// \return the parsed PatFrag on success, nullptr on failure.
+ std::unique_ptr<PatFrag> parsePatFragImpl(const Record *Def);
+
+ /// Parses the in or out parameter list of a PatFrag.
+ /// \param OpsList Init to parse.
+ /// \param ParseAction Callback on successful parse, with the name of
+ /// the parameter and its \ref PatFrag::ParamKind
+ /// \return true on success, false on failure.
+ bool
+ parsePatFragParamList(const DagInit &OpsList,
+ function_ref<bool(StringRef, unsigned)> ParseAction);
+
+ /// Cached PatFrag parser. This avoids duplicate work by keeping track of
+ /// already-parsed PatFrags.
+ /// \param Def PatFrag def to parsee.
+ /// \return the parsed PatFrag on success, nullptr on failure.
+ const PatFrag *parsePatFrag(const Record *Def);
+};
+
+} // namespace gi
+} // namespace llvm
+
+#endif
diff --git a/llvm/utils/TableGen/GlobalISelCombinerEmitter.cpp b/llvm/utils/TableGen/GlobalISelCombinerEmitter.cpp
index 39b9f8a2ae17..1ae6efd4a7d0 100644
--- a/llvm/utils/TableGen/GlobalISelCombinerEmitter.cpp
+++ b/llvm/utils/TableGen/GlobalISelCombinerEmitter.cpp
@@ -36,6 +36,7 @@
#include "Common/GlobalISel/GlobalISelMatchTable.h"
#include "Common/GlobalISel/GlobalISelMatchTableExecutorEmitter.h"
#include "Common/GlobalISel/MatchDataInfo.h"
+#include "Common/GlobalISel/PatternParser.h"
#include "Common/GlobalISel/Patterns.h"
#include "Common/SubtargetFeatureInfo.h"
#include "llvm/ADT/APInt.h"
@@ -80,7 +81,6 @@ cl::opt<bool> DebugTypeInfer("gicombiner-debug-typeinfer",
constexpr StringLiteral CXXApplyPrefix = "GICXXCustomAction_CombineApply";
constexpr StringLiteral CXXPredPrefix = "GICXXPred_MI_Predicate_";
-constexpr StringLiteral MIFlagsEnumClassName = "MIFlagEnum";
//===- CodeExpansions Helpers --------------------------------------------===//
@@ -109,17 +109,6 @@ void declareTempRegExpansion(CodeExpansions &CE, unsigned TempRegID,
//===- Misc. Helpers -----------------------------------------------------===//
-/// Copies a StringRef into a static pool to preserve it.
-/// Most Pattern classes use StringRef so we need this.
-StringRef insertStrRef(StringRef S) {
- if (S.empty())
- return {};
-
- static StringSet<> Pool;
- auto [It, Inserted] = Pool.insert(S);
- return It->getKey();
-}
-
template <typename Container> auto keys(Container &&C) {
return map_range(C, [](auto &Entry) -> auto & { return Entry.first; });
}
@@ -639,8 +628,9 @@ public:
SubtargetFeatureInfoMap &SubtargetFeatures,
Record &RuleDef, unsigned ID,
std::vector<RuleMatcher> &OutRMs)
- : CGT(CGT), SubtargetFeatures(SubtargetFeatures), RuleDef(RuleDef),
- RuleID(ID), OutRMs(OutRMs) {}
+ : Parser(CGT, RuleDef.getLoc()), CGT(CGT),
+ SubtargetFeatures(SubtargetFeatures), RuleDef(RuleDef), RuleID(ID),
+ OutRMs(OutRMs) {}
/// Parses all fields in the RuleDef record.
bool parseAll();
@@ -718,26 +708,6 @@ private:
bool buildRuleOperandsTable();
bool parseDefs(const DagInit &Def);
- bool
- parsePatternList(const DagInit &List,
- function_ref<bool(std::unique_ptr<Pattern>)> ParseAction,
- StringRef Operator, ArrayRef<SMLoc> DiagLoc,
- StringRef AnonPatNamePrefix) const;
-
- std::unique_ptr<Pattern> parseInstructionPattern(const Init &Arg,
- StringRef PatName) const;
- std::unique_ptr<Pattern> parseWipMatchOpcodeMatcher(const Init &Arg,
- StringRef PatName) const;
- bool parseInstructionPatternOperand(InstructionPattern &IP,
- const Init *OpInit,
- const StringInit *OpName) const;
- bool parseInstructionPatternMIFlags(InstructionPattern &IP,
- const DagInit *Op) const;
- std::unique_ptr<PatFrag> parsePatFragImpl(const Record *Def) const;
- bool parsePatFragParamList(
- ArrayRef<SMLoc> DiagLoc, const DagInit &OpsList,
- function_ref<bool(StringRef, PatFrag::ParamKind)> ParseAction) const;
- const PatFrag *parsePatFrag(const Record *Def) const;
bool emitMatchPattern(CodeExpansions &CE, const PatternAlternatives &Alts,
const InstructionPattern &IP);
@@ -781,6 +751,7 @@ private:
DenseSet<const Pattern *> &SeenPats, OperandDefLookupFn LookupOperandDef,
OperandMapperFnRef OperandMapper = [](const auto &O) { return O; });
+ PatternParser Parser;
const CodeGenTarget &CGT;
SubtargetFeatureInfoMap &SubtargetFeatures;
Record &RuleDef;
@@ -808,9 +779,6 @@ private:
SmallVector<MatchDataInfo, 2> MatchDatas;
SmallVector<PatternAlternatives, 1> PermutationsToEmit;
-
- // print()/debug-only members.
- mutable SmallPtrSet<const PatFrag *, 2> SeenPatFrags;
};
bool CombineRuleBuilder::parseAll() {
@@ -819,16 +787,16 @@ bool CombineRuleBuilder::parseAll() {
if (!parseDefs(*RuleDef.getValueAsDag("Defs")))
return false;
- if (!parsePatternList(
+ if (!Parser.parsePatternList(
*RuleDef.getValueAsDag("Match"),
[this](auto Pat) { return addMatchPattern(std::move(Pat)); }, "match",
- RuleDef.getLoc(), (RuleDef.getName() + "_match").str()))
+ (RuleDef.getName() + "_match").str()))
return false;
- if (!parsePatternList(
+ if (!Parser.parsePatternList(
*RuleDef.getValueAsDag("Apply"),
[this](auto Pat) { return addApplyPattern(std::move(Pat)); }, "apply",
- RuleDef.getLoc(), (RuleDef.getName() + "_apply").str()))
+ (RuleDef.getName() + "_apply").str()))
return false;
if (!buildRuleOperandsTable() || !typecheckPatterns() || !findRoots() ||
@@ -884,9 +852,10 @@ void CombineRuleBuilder::print(raw_ostream &OS) const {
OS << " )\n";
}
- if (!SeenPatFrags.empty()) {
+ const auto &SeenPFs = Parser.getSeenPatFrags();
+ if (!SeenPFs.empty()) {
OS << " (PatFrags\n";
- for (const auto *PF : SeenPatFrags) {
+ for (const auto *PF : Parser.getSeenPatFrags()) {
PF->print(OS, /*Indent=*/" ");
OS << '\n';
}
@@ -1500,426 +1469,6 @@ bool CombineRuleBuilder::parseDefs(const DagInit &Def) {
return true;
}
-bool CombineRuleBuilder::parsePatternList(
- const DagInit &List,
- function_ref<bool(std::unique_ptr<Pattern>)> ParseAction,
- StringRef Operator, ArrayRef<SMLoc> DiagLoc,
- StringRef AnonPatNamePrefix) const {
- if (List.getOperatorAsDef(RuleDef.getLoc())->getName() != Operator) {
- ::PrintError(DiagLoc, "Expected " + Operator + " operator");
- return false;
- }
-
- if (List.getNumArgs() == 0) {
- ::PrintError(DiagLoc, Operator + " pattern list is empty");
- return false;
- }
-
- // The match section consists of a list of matchers and predicates. Parse each
- // one and add the equivalent GIMatchDag nodes, predicates, and edges.
- for (unsigned I = 0; I < List.getNumArgs(); ++I) {
- Init *Arg = List.getArg(I);
- std::string Name = List.getArgName(I)
- ? List.getArgName(I)->getValue().str()
- : ("__" + AnonPatNamePrefix + "_" + Twine(I)).str();
-
- if (auto Pat = parseInstructionPattern(*Arg, Name)) {
- if (!ParseAction(std::move(Pat)))
- return false;
- continue;
- }
-
- if (auto Pat = parseWipMatchOpcodeMatcher(*Arg, Name)) {
- if (!ParseAction(std::move(Pat)))
- return false;
- continue;
- }
-
- // Parse arbitrary C++ code
- if (const auto *StringI = dyn_cast<StringInit>(Arg)) {
- auto CXXPat = std::make_unique<CXXPattern>(*StringI, insertStrRef(Name));
- if (!ParseAction(std::move(CXXPat)))
- return false;
- continue;
- }
-
- ::PrintError(DiagLoc,
- "Failed to parse pattern: '" + Arg->getAsString() + "'");
- return false;
- }
-
- return true;
-}
-
-static const CodeGenInstruction &
-getInstrForIntrinsic(const CodeGenTarget &CGT, const CodeGenIntrinsic *I) {
- StringRef Opc;
- if (I->isConvergent) {
- Opc = I->hasSideEffects ? "G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS"
- : "G_INTRINSIC_CONVERGENT";
- } else {
- Opc = I->hasSideEffects ? "G_INTRINSIC_W_SIDE_EFFECTS" : "G_INTRINSIC";
- }
-
- RecordKeeper &RK = I->TheDef->getRecords();
- return CGT.getInstruction(RK.getDef(Opc));
-}
-
-static const CodeGenIntrinsic *getCodeGenIntrinsic(Record *R) {
- // Intrinsics need to have a static lifetime because the match table keeps
- // references to CodeGenIntrinsic objects.
- static DenseMap<const Record *, std::unique_ptr<CodeGenIntrinsic>>
- AllIntrinsics;
-
- auto &Ptr = AllIntrinsics[R];
- if (!Ptr)
- Ptr = std::make_unique<CodeGenIntrinsic>(R, std::vector<Record *>());
- return Ptr.get();
-}
-
-std::unique_ptr<Pattern>
-CombineRuleBuilder::parseInstructionPattern(const Init &Arg,
- StringRef Name) const {
- const DagInit *DagPat = dyn_cast<DagInit>(&Arg);
- if (!DagPat)
- return nullptr;
-
- std::unique_ptr<InstructionPattern> Pat;
- if (const DagInit *IP = getDagWithOperatorOfSubClass(Arg, "Instruction")) {
- auto &Instr = CGT.getInstruction(IP->getOperatorAsDef(RuleDef.getLoc()));
- Pat =
- std::make_unique<CodeGenInstructionPattern>(Instr, insertStrRef(Name));
- } else if (const DagInit *IP =
- getDagWithOperatorOfSubClass(Arg, "Intrinsic")) {
- Record *TheDef = IP->getOperatorAsDef(RuleDef.getLoc());
- const CodeGenIntrinsic *Intrin = getCodeGenIntrinsic(TheDef);
- const CodeGenInstruction &Instr = getInstrForIntrinsic(CGT, Intrin);
- Pat =
- std::make_unique<CodeGenInstructionPattern>(Instr, insertStrRef(Name));
- cast<CodeGenInstructionPattern>(*Pat).setIntrinsic(Intrin);
- } else if (const DagInit *PFP =
- getDagWithOperatorOfSubClass(Arg, PatFrag::ClassName)) {
- const Record *Def = PFP->getOperatorAsDef(RuleDef.getLoc());
- const PatFrag *PF = parsePatFrag(Def);
- if (!PF)
- return nullptr; // Already diagnosed by parsePatFrag
- Pat = std::make_unique<PatFragPattern>(*PF, insertStrRef(Name));
- } else if (const DagInit *BP =
- getDagWithOperatorOfSubClass(Arg, BuiltinPattern::ClassName)) {
- Pat = std::make_unique<BuiltinPattern>(
- *BP->getOperatorAsDef(RuleDef.getLoc()), insertStrRef(Name));
- } else
- return nullptr;
-
- for (unsigned K = 0; K < DagPat->getNumArgs(); ++K) {
- Init *Arg = DagPat->getArg(K);
- if (auto *DagArg = getDagWithSpecificOperator(*Arg, "MIFlags")) {
- if (!parseInstructionPatternMIFlags(*Pat, DagArg))
- return nullptr;
- continue;
- }
-
- if (!parseInstructionPatternOperand(*Pat, Arg, DagPat->getArgName(K)))
- return nullptr;
- }
-
- if (!Pat->checkSemantics(RuleDef.getLoc()))
- return nullptr;
-
- return std::move(Pat);
-}
-
-std::unique_ptr<Pattern>
-CombineRuleBuilder::parseWipMatchOpcodeMatcher(const Init &Arg,
- StringRef Name) const {
- const DagInit *Matcher = getDagWithSpecificOperator(Arg, "wip_match_opcode");
- if (!Matcher)
- return nullptr;
-
- if (Matcher->getNumArgs() == 0) {
- PrintError("Empty wip_match_opcode");
- return nullptr;
- }
-
- // Each argument is an opcode that can match.
- auto Result = std::make_unique<AnyOpcodePattern>(insertStrRef(Name));
- for (const auto &Arg : Matcher->getArgs()) {
- Record *OpcodeDef = getDefOfSubClass(*Arg, "Instruction");
- if (OpcodeDef) {
- Result->addOpcode(&CGT.getInstruction(OpcodeDef));
- continue;
- }
-
- PrintError("Arguments to wip_match_opcode must be instructions");
- return nullptr;
- }
-
- return std::move(Result);
-}
-
-bool CombineRuleBuilder::parseInstructionPatternOperand(
- InstructionPattern &IP, const Init *OpInit,
- const StringInit *OpName) const {
- const auto ParseErr = [&]() {
- PrintError("cannot parse operand '" + OpInit->getAsUnquotedString() + "' ");
- if (OpName)
- PrintNote("operand name is '" + OpName->getAsUnquotedString() + "'");
- return false;
- };
-
- // untyped immediate, e.g. 0
- if (const auto *IntImm = dyn_cast<IntInit>(OpInit)) {
- std::string Name = OpName ? OpName->getAsUnquotedString() : "";
- IP.addOperand(IntImm->getValue(), insertStrRef(Name), PatternType());
- return true;
- }
-
- // typed immediate, e.g. (i32 0)
- if (const auto *DagOp = dyn_cast<DagInit>(OpInit)) {
- if (DagOp->getNumArgs() != 1)
- return ParseErr();
-
- const Record *TyDef = DagOp->getOperatorAsDef(RuleDef.getLoc());
- auto ImmTy = PatternType::get(RuleDef.getLoc(), TyDef,
- "cannot parse immediate '" +
- DagOp->getAsUnquotedString() + "'");
- if (!ImmTy)
- return false;
-
- if (!IP.hasAllDefs()) {
- PrintError("out operand of '" + IP.getInstName() +
- "' cannot be an immediate");
- return false;
- }
-
- const auto *Val = dyn_cast<IntInit>(DagOp->getArg(0));
- if (!Val)
- return ParseErr();
-
- std::string Name = OpName ? OpName->getAsUnquotedString() : "";
- IP.addOperand(Val->getValue(), insertStrRef(Name), *ImmTy);
- return true;
- }
-
- // Typed operand e.g. $x/$z in (G_FNEG $x, $z)
- if (auto *DefI = dyn_cast<DefInit>(OpInit)) {
- if (!OpName) {
- PrintError("expected an operand name after '" + OpInit->getAsString() +
- "'");
- return false;
- }
- const Record *Def = DefI->getDef();
- auto Ty =
- PatternType::get(RuleDef.getLoc(), Def, "cannot parse operand type");
- if (!Ty)
- return false;
- IP.addOperand(insertStrRef(OpName->getAsUnquotedString()), *Ty);
- return true;
- }
-
- // Untyped operand e.g. $x/$z in (G_FNEG $x, $z)
- if (isa<UnsetInit>(OpInit)) {
- assert(OpName && "Unset w/ no OpName?");
- IP.addOperand(insertStrRef(OpName->getAsUnquotedString()), PatternType());
- return true;
- }
-
- return ParseErr();
-}
-
-bool CombineRuleBuilder::parseInstructionPatternMIFlags(
- InstructionPattern &IP, const DagInit *Op) const {
- auto *CGIP = dyn_cast<CodeGenInstructionPattern>(&IP);
- if (!CGIP) {
- PrintError("matching/writing MIFlags is only allowed on CodeGenInstruction "
- "patterns");
- return false;
- }
-
- const auto CheckFlagEnum = [&](const Record *R) {
- if (!R->isSubClassOf(MIFlagsEnumClassName)) {
- PrintError("'" + R->getName() + "' is not a subclass of '" +
- MIFlagsEnumClassName + "'");
- return false;
- }
-
- return true;
- };
-
- if (CGIP->getMIFlagsInfo()) {
- PrintError("MIFlags can only be present once on an instruction");
- return false;
- }
-
- auto &FI = CGIP->getOrCreateMIFlagsInfo();
- for (unsigned K = 0; K < Op->getNumArgs(); ++K) {
- const Init *Arg = Op->getArg(K);
-
- // Match/set a flag: (MIFlags FmNoNans)
- if (const auto *Def = dyn_cast<DefInit>(Arg)) {
- const Record *R = Def->getDef();
- if (!CheckFlagEnum(R))
- return false;
-
- FI.addSetFlag(R);
- continue;
- }
-
- // Do not match a flag/unset a flag: (MIFlags (not FmNoNans))
- if (const DagInit *NotDag = getDagWithSpecificOperator(*Arg, "not")) {
- for (const Init *NotArg : NotDag->getArgs()) {
- const DefInit *DefArg = dyn_cast<DefInit>(NotArg);
- if (!DefArg) {
- PrintError("cannot parse '" + NotArg->getAsUnquotedString() +
- "': expected a '" + MIFlagsEnumClassName + "'");
- return false;
- }
-
- const Record *R = DefArg->getDef();
- if (!CheckFlagEnum(R))
- return false;
-
- FI.addUnsetFlag(R);
- continue;
- }
-
- continue;
- }
-
- // Copy flags from a matched instruction: (MIFlags $mi)
- if (isa<UnsetInit>(Arg)) {
- FI.addCopyFlag(insertStrRef(Op->getArgName(K)->getAsUnquotedString()));
- continue;
- }
- }
-
- return true;
-}
-
-std::unique_ptr<PatFrag>
-CombineRuleBuilder::parsePatFragImpl(const Record *Def) const {
- auto StackTrace = PrettyStackTraceParse(*Def);
- if (!Def->isSubClassOf(PatFrag::ClassName))
- return nullptr;
-
- const DagInit *Ins = Def->getValueAsDag("InOperands");
- if (Ins->getOperatorAsDef(Def->getLoc())->getName() != "ins") {
- ::PrintError(Def, "expected 'ins' operator for " + PatFrag::ClassName +
- " in operands list");
- return nullptr;
- }
-
- const DagInit *Outs = Def->getValueAsDag("OutOperands");
- if (Outs->getOperatorAsDef(Def->getLoc())->getName() != "outs") {
- ::PrintError(Def, "expected 'outs' operator for " + PatFrag::ClassName +
- " out operands list");
- return nullptr;
- }
-
- auto Result = std::make_unique<PatFrag>(*Def);
- if (!parsePatFragParamList(Def->getLoc(), *Outs,
- [&](StringRef Name, PatFrag::ParamKind Kind) {
- Result->addOutParam(insertStrRef(Name), Kind);
- return true;
- }))
- return nullptr;
-
- if (!parsePatFragParamList(Def->getLoc(), *Ins,
- [&](StringRef Name, PatFrag::ParamKind Kind) {
- Result->addInParam(insertStrRef(Name), Kind);
- return true;
- }))
- return nullptr;
-
- const ListInit *Alts = Def->getValueAsListInit("Alternatives");
- unsigned AltIdx = 0;
- for (const Init *Alt : *Alts) {
- const auto *PatDag = dyn_cast<DagInit>(Alt);
- if (!PatDag) {
- ::PrintError(Def, "expected dag init for PatFrag pattern alternative");
- return nullptr;
- }
-
- PatFrag::Alternative &A = Result->addAlternative();
- const auto AddPat = [&](std::unique_ptr<Pattern> Pat) {
- A.Pats.push_back(std::move(Pat));
- return true;
- };
-
- if (!parsePatternList(
- *PatDag, AddPat, "pattern", Def->getLoc(),
- /*AnonPatPrefix*/
- (Def->getName() + "_alt" + Twine(AltIdx++) + "_pattern").str()))
- return nullptr;
- }
-
- if (!Result->buildOperandsTables() || !Result->checkSemantics())
- return nullptr;
-
- return Result;
-}
-
-bool CombineRuleBuilder::parsePatFragParamList(
- ArrayRef<SMLoc> DiagLoc, const DagInit &OpsList,
- function_ref<bool(StringRef, PatFrag::ParamKind)> ParseAction) const {
- for (unsigned K = 0; K < OpsList.getNumArgs(); ++K) {
- const StringInit *Name = OpsList.getArgName(K);
- const Init *Ty = OpsList.getArg(K);
-
- if (!Name) {
- ::PrintError(DiagLoc, "all operands must be named'");
- return false;
- }
- const std::string NameStr = Name->getAsUnquotedString();
-
- PatFrag::ParamKind OpKind;
- if (isSpecificDef(*Ty, "gi_imm"))
- OpKind = PatFrag::PK_Imm;
- else if (isSpecificDef(*Ty, "root"))
- OpKind = PatFrag::PK_Root;
- else if (isa<UnsetInit>(Ty) ||
- isSpecificDef(*Ty, "gi_mo")) // no type = gi_mo.
- OpKind = PatFrag::PK_MachineOperand;
- else {
- ::PrintError(
- DiagLoc,
- "'" + NameStr +
- "' operand type was expected to be 'root', 'gi_imm' or 'gi_mo'");
- return false;
- }
-
- if (!ParseAction(NameStr, OpKind))
- return false;
- }
-
- return true;
-}
-
-const PatFrag *CombineRuleBuilder::parsePatFrag(const Record *Def) const {
- // Cache already parsed PatFrags to avoid doing extra work.
- static DenseMap<const Record *, std::unique_ptr<PatFrag>> ParsedPatFrags;
-
- auto It = ParsedPatFrags.find(Def);
- if (It != ParsedPatFrags.end()) {
- SeenPatFrags.insert(It->second.get());
- return It->second.get();
- }
-
- std::unique_ptr<PatFrag> NewPatFrag = parsePatFragImpl(Def);
- if (!NewPatFrag) {
- ::PrintError(Def, "Could not parse " + PatFrag::ClassName + " '" +
- Def->getName() + "'");
- // Put a nullptr in the map so we don't attempt parsing this again.
- ParsedPatFrags[Def] = nullptr;
- return nullptr;
- }
-
- const auto *Res = NewPatFrag.get();
- ParsedPatFrags[Def] = std::move(NewPatFrag);
- SeenPatFrags.insert(Res);
- return Res;
-}
-
bool CombineRuleBuilder::emitMatchPattern(CodeExpansions &CE,
const PatternAlternatives &Alts,
const InstructionPattern &IP) {
@@ -2956,8 +2505,8 @@ GICombinerEmitter::buildMatchTable(MutableArrayRef<RuleMatcher> Rules) {
const Matcher *B) {
auto *L = static_cast<const RuleMatcher *>(A);
auto *R = static_cast<const RuleMatcher *>(B);
- return std::tuple(OpcodeOrder[L->getOpcode()], L->getNumOperands()) <
- std::tuple(OpcodeOrder[R->getOpcode()], R->getNumOperands());
+ return std::make_tuple(OpcodeOrder[L->getOpcode()], L->getNumOperands()) <
+ std::make_tuple(OpcodeOrder[R->getOpcode()], R->getNumOperands());
});
for (Matcher *Rule : InputRules)
diff --git a/llvm/utils/gn/secondary/llvm/lib/Target/AMDGPU/MCTargetDesc/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Target/AMDGPU/MCTargetDesc/BUILD.gn
index 12d875cf40c9..5ba91fcec83a 100644
--- a/llvm/utils/gn/secondary/llvm/lib/Target/AMDGPU/MCTargetDesc/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/lib/Target/AMDGPU/MCTargetDesc/BUILD.gn
@@ -104,6 +104,7 @@ static_library("MCTargetDesc") {
"AMDGPUMCAsmInfo.cpp",
"AMDGPUMCCodeEmitter.cpp",
"AMDGPUMCExpr.cpp",
+ "AMDGPUMCKernelDescriptor.cpp",
"AMDGPUMCTargetDesc.cpp",
"AMDGPUTargetStreamer.cpp",
"R600InstPrinter.cpp",
diff --git a/llvm/utils/gn/secondary/llvm/utils/TableGen/Common/BUILD.gn b/llvm/utils/gn/secondary/llvm/utils/TableGen/Common/BUILD.gn
index c0ea62716fd2..daa3278d56d7 100644
--- a/llvm/utils/gn/secondary/llvm/utils/TableGen/Common/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/utils/TableGen/Common/BUILD.gn
@@ -18,9 +18,11 @@ static_library("Common") {
"DAGISelMatcher.cpp",
"GlobalISel/CXXPredicates.cpp",
"GlobalISel/CodeExpander.cpp",
+ "GlobalISel/CombinerUtils.cpp",
"GlobalISel/GlobalISelMatchTable.cpp",
"GlobalISel/GlobalISelMatchTableExecutorEmitter.cpp",
"GlobalISel/MatchDataInfo.cpp",
+ "GlobalISel/PatternParser.cpp",
"GlobalISel/Patterns.cpp",
"InfoByHwMode.cpp",
"OptEmitter.cpp",
diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMIntrinsicOps.td b/mlir/include/mlir/Dialect/LLVMIR/LLVMIntrinsicOps.td
index f4bac9376f2e..28526f1a1560 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/LLVMIntrinsicOps.td
+++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMIntrinsicOps.td
@@ -611,19 +611,19 @@ def LLVM_DbgLabelOp : LLVM_IntrOp<"dbg.label", [], [], [], 0> {
// Variadic function intrinsics.
//
-def LLVM_VaStartOp : LLVM_ZeroResultIntrOp<"vastart">,
+def LLVM_VaStartOp : LLVM_ZeroResultIntrOp<"vastart", [0]>,
Arguments<(ins LLVM_AnyPointer:$arg_list)> {
let assemblyFormat = "$arg_list attr-dict `:` qualified(type($arg_list))";
let summary = "Initializes `arg_list` for subsequent variadic argument extractions.";
}
-def LLVM_VaCopyOp : LLVM_ZeroResultIntrOp<"vacopy">,
+def LLVM_VaCopyOp : LLVM_ZeroResultIntrOp<"vacopy", [0]>,
Arguments<(ins LLVM_AnyPointer:$dest_list, LLVM_AnyPointer:$src_list)> {
let assemblyFormat = "$src_list `to` $dest_list attr-dict `:` type(operands)";
let summary = "Copies the current argument position from `src_list` to `dest_list`.";
}
-def LLVM_VaEndOp : LLVM_ZeroResultIntrOp<"vaend">,
+def LLVM_VaEndOp : LLVM_ZeroResultIntrOp<"vaend", [0]>,
Arguments<(ins LLVM_AnyPointer:$arg_list)> {
let assemblyFormat = "$arg_list attr-dict `:` qualified(type($arg_list))";
let summary = "Destroys `arg_list`, which has been initialized by `intr.vastart` or `intr.vacopy`.";
diff --git a/mlir/include/mlir/IR/Dialect.h b/mlir/include/mlir/IR/Dialect.h
index 6c8a170a03c7..f7c1f4df16fc 100644
--- a/mlir/include/mlir/IR/Dialect.h
+++ b/mlir/include/mlir/IR/Dialect.h
@@ -210,7 +210,7 @@ public:
/// registration. The promised interface type can be an interface of any type
/// not just a dialect interface, i.e. it may also be an
/// AttributeInterface/OpInterface/TypeInterface/etc.
- template <typename ConcreteT, typename InterfaceT>
+ template <typename InterfaceT, typename ConcreteT>
void declarePromisedInterface() {
unresolvedPromisedInterfaces.insert(
{TypeID::get<ConcreteT>(), InterfaceT::getInterfaceID()});
@@ -221,7 +221,7 @@ public:
// declarePromisedInterfaces<FunctionOpInterface, MyFuncType1, MyFuncType2>()
template <typename InterfaceT, typename... ConcreteT>
void declarePromisedInterfaces() {
- (declarePromisedInterface<ConcreteT, InterfaceT>(), ...);
+ (declarePromisedInterface<InterfaceT, ConcreteT>(), ...);
}
/// Checks if the given interface, which is attempting to be used, is a
diff --git a/mlir/lib/Dialect/Arith/IR/ArithDialect.cpp b/mlir/lib/Dialect/Arith/IR/ArithDialect.cpp
index 6a593185cced..042acf610090 100644
--- a/mlir/lib/Dialect/Arith/IR/ArithDialect.cpp
+++ b/mlir/lib/Dialect/Arith/IR/ArithDialect.cpp
@@ -48,9 +48,9 @@ void arith::ArithDialect::initialize() {
#include "mlir/Dialect/Arith/IR/ArithOpsAttributes.cpp.inc"
>();
addInterfaces<ArithInlinerInterface>();
- declarePromisedInterface<ArithDialect, ConvertToLLVMPatternInterface>();
- declarePromisedInterface<SelectOp,
- bufferization::BufferDeallocationOpInterface>();
+ declarePromisedInterface<ConvertToLLVMPatternInterface, ArithDialect>();
+ declarePromisedInterface<bufferization::BufferDeallocationOpInterface,
+ SelectOp>();
declarePromisedInterfaces<bufferization::BufferizableOpInterface, ConstantOp,
IndexCastOp, SelectOp>();
declarePromisedInterfaces<ValueBoundsOpInterface, AddIOp, ConstantOp, SubIOp,
diff --git a/mlir/lib/Dialect/Complex/IR/ComplexDialect.cpp b/mlir/lib/Dialect/Complex/IR/ComplexDialect.cpp
index ca57171af156..0bdcf434e062 100644
--- a/mlir/lib/Dialect/Complex/IR/ComplexDialect.cpp
+++ b/mlir/lib/Dialect/Complex/IR/ComplexDialect.cpp
@@ -40,7 +40,7 @@ void complex::ComplexDialect::initialize() {
#define GET_ATTRDEF_LIST
#include "mlir/Dialect/Complex/IR/ComplexAttributes.cpp.inc"
>();
- declarePromisedInterface<ComplexDialect, ConvertToLLVMPatternInterface>();
+ declarePromisedInterface<ConvertToLLVMPatternInterface, ComplexDialect>();
addInterfaces<ComplexInlinerInterface>();
}
diff --git a/mlir/lib/Dialect/ControlFlow/IR/ControlFlowOps.cpp b/mlir/lib/Dialect/ControlFlow/IR/ControlFlowOps.cpp
index c6b02b9703e7..5d11f8f6cc45 100644
--- a/mlir/lib/Dialect/ControlFlow/IR/ControlFlowOps.cpp
+++ b/mlir/lib/Dialect/ControlFlow/IR/ControlFlowOps.cpp
@@ -70,11 +70,11 @@ void ControlFlowDialect::initialize() {
#include "mlir/Dialect/ControlFlow/IR/ControlFlowOps.cpp.inc"
>();
addInterfaces<ControlFlowInlinerInterface>();
- declarePromisedInterface<ControlFlowDialect, ConvertToLLVMPatternInterface>();
+ declarePromisedInterface<ConvertToLLVMPatternInterface, ControlFlowDialect>();
declarePromisedInterfaces<bufferization::BufferizableOpInterface, BranchOp,
CondBranchOp>();
- declarePromisedInterface<CondBranchOp,
- bufferization::BufferDeallocationOpInterface>();
+ declarePromisedInterface<bufferization::BufferDeallocationOpInterface,
+ CondBranchOp>();
}
//===----------------------------------------------------------------------===//
diff --git a/mlir/lib/Dialect/Func/IR/FuncOps.cpp b/mlir/lib/Dialect/Func/IR/FuncOps.cpp
index ed2ecfe9d0fb..95589e8989e2 100644
--- a/mlir/lib/Dialect/Func/IR/FuncOps.cpp
+++ b/mlir/lib/Dialect/Func/IR/FuncOps.cpp
@@ -42,8 +42,8 @@ void FuncDialect::initialize() {
#define GET_OP_LIST
#include "mlir/Dialect/Func/IR/FuncOps.cpp.inc"
>();
- declarePromisedInterface<FuncDialect, DialectInlinerInterface>();
- declarePromisedInterface<FuncDialect, ConvertToLLVMPatternInterface>();
+ declarePromisedInterface<DialectInlinerInterface, FuncDialect>();
+ declarePromisedInterface<ConvertToLLVMPatternInterface, FuncDialect>();
declarePromisedInterfaces<bufferization::BufferizableOpInterface, CallOp,
FuncOp, ReturnOp>();
}
diff --git a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
index a02eca8b1179..f1b9ca5c5002 100644
--- a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
+++ b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
@@ -216,8 +216,8 @@ void GPUDialect::initialize() {
#include "mlir/Dialect/GPU/IR/GPUOpsAttributes.cpp.inc"
>();
addInterfaces<GPUInlinerInterface>();
- declarePromisedInterface<TerminatorOp,
- bufferization::BufferDeallocationOpInterface>();
+ declarePromisedInterface<bufferization::BufferDeallocationOpInterface,
+ TerminatorOp>();
}
static std::string getSparseHandleKeyword(SparseHandleKind kind) {
diff --git a/mlir/lib/Dialect/Index/IR/IndexDialect.cpp b/mlir/lib/Dialect/Index/IR/IndexDialect.cpp
index d631afa63b9a..183d0e33b252 100644
--- a/mlir/lib/Dialect/Index/IR/IndexDialect.cpp
+++ b/mlir/lib/Dialect/Index/IR/IndexDialect.cpp
@@ -19,7 +19,7 @@ using namespace mlir::index;
void IndexDialect::initialize() {
registerAttributes();
registerOperations();
- declarePromisedInterface<IndexDialect, ConvertToLLVMPatternInterface>();
+ declarePromisedInterface<ConvertToLLVMPatternInterface, IndexDialect>();
}
//===----------------------------------------------------------------------===//
diff --git a/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp b/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp
index 9e8407451a08..94197e473ce0 100644
--- a/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp
+++ b/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp
@@ -1044,8 +1044,8 @@ void NVVMDialect::initialize() {
// Support unknown operations because not all NVVM operations are
// registered.
allowUnknownOperations();
- declarePromisedInterface<NVVMDialect, ConvertToLLVMPatternInterface>();
- declarePromisedInterface<NVVMTargetAttr, gpu::TargetAttrInterface>();
+ declarePromisedInterface<ConvertToLLVMPatternInterface, NVVMDialect>();
+ declarePromisedInterface<gpu::TargetAttrInterface, NVVMTargetAttr>();
}
LogicalResult NVVMDialect::verifyOperationAttribute(Operation *op,
diff --git a/mlir/lib/Dialect/LLVMIR/IR/ROCDLDialect.cpp b/mlir/lib/Dialect/LLVMIR/IR/ROCDLDialect.cpp
index 0f2e75cd7e8b..65b770ae3261 100644
--- a/mlir/lib/Dialect/LLVMIR/IR/ROCDLDialect.cpp
+++ b/mlir/lib/Dialect/LLVMIR/IR/ROCDLDialect.cpp
@@ -247,7 +247,7 @@ void ROCDLDialect::initialize() {
// Support unknown operations because not all ROCDL operations are registered.
allowUnknownOperations();
- declarePromisedInterface<ROCDLTargetAttr, gpu::TargetAttrInterface>();
+ declarePromisedInterface<gpu::TargetAttrInterface, ROCDLTargetAttr>();
}
LogicalResult ROCDLDialect::verifyOperationAttribute(Operation *op,
diff --git a/mlir/lib/Dialect/Linalg/IR/LinalgDialect.cpp b/mlir/lib/Dialect/Linalg/IR/LinalgDialect.cpp
index a6936fde4370..9e50c355c504 100644
--- a/mlir/lib/Dialect/Linalg/IR/LinalgDialect.cpp
+++ b/mlir/lib/Dialect/Linalg/IR/LinalgDialect.cpp
@@ -123,16 +123,16 @@ void mlir::linalg::LinalgDialect::initialize() {
addInterfaces<LinalgInlinerInterface>();
- declarePromisedInterface<GenericOp, mesh::ShardingInterface>();
+ declarePromisedInterface<mesh::ShardingInterface, GenericOp>();
declarePromisedInterfaces<mesh::ShardingInterface,
#define GET_OP_LIST
#include "mlir/Dialect/Linalg/IR/LinalgStructuredOps.cpp.inc"
>();
- declarePromisedInterface<CopyOp, SubsetOpInterface>();
- declarePromisedInterface<CopyOp, SubsetInsertionOpInterface>();
- declarePromisedInterface<IndexOp, ValueBoundsOpInterface>();
- declarePromisedInterface<linalg::GenericOp, TilingInterface>();
- declarePromisedInterface<linalg::GenericOp, PartialReductionOpInterface>();
+ declarePromisedInterface<SubsetOpInterface, CopyOp>();
+ declarePromisedInterface<SubsetInsertionOpInterface, CopyOp>();
+ declarePromisedInterface<ValueBoundsOpInterface, IndexOp>();
+ declarePromisedInterface<TilingInterface, linalg::GenericOp>();
+ declarePromisedInterface<PartialReductionOpInterface, linalg::GenericOp>();
declarePromisedInterfaces<TilingInterface,
#define GET_OP_LIST
#include "mlir/Dialect/Linalg/IR/LinalgStructuredOps.cpp.inc"
diff --git a/mlir/lib/Dialect/Math/IR/MathDialect.cpp b/mlir/lib/Dialect/Math/IR/MathDialect.cpp
index a71b24cb1b97..285b5ca59405 100644
--- a/mlir/lib/Dialect/Math/IR/MathDialect.cpp
+++ b/mlir/lib/Dialect/Math/IR/MathDialect.cpp
@@ -35,5 +35,5 @@ void mlir::math::MathDialect::initialize() {
#include "mlir/Dialect/Math/IR/MathOps.cpp.inc"
>();
addInterfaces<MathInlinerInterface>();
- declarePromisedInterface<MathDialect, ConvertToLLVMPatternInterface>();
+ declarePromisedInterface<ConvertToLLVMPatternInterface, MathDialect>();
}
diff --git a/mlir/lib/Dialect/MemRef/IR/MemRefDialect.cpp b/mlir/lib/Dialect/MemRef/IR/MemRefDialect.cpp
index 41082a85a485..3a8bd12ba258 100644
--- a/mlir/lib/Dialect/MemRef/IR/MemRefDialect.cpp
+++ b/mlir/lib/Dialect/MemRef/IR/MemRefDialect.cpp
@@ -47,14 +47,14 @@ void mlir::memref::MemRefDialect::initialize() {
#include "mlir/Dialect/MemRef/IR/MemRefOps.cpp.inc"
>();
addInterfaces<MemRefInlinerInterface>();
- declarePromisedInterface<MemRefDialect, ConvertToLLVMPatternInterface>();
+ declarePromisedInterface<ConvertToLLVMPatternInterface, MemRefDialect>();
declarePromisedInterfaces<bufferization::AllocationOpInterface, AllocOp,
AllocaOp, ReallocOp>();
declarePromisedInterfaces<RuntimeVerifiableOpInterface, CastOp, ExpandShapeOp,
LoadOp, ReinterpretCastOp, StoreOp, SubViewOp>();
declarePromisedInterfaces<ValueBoundsOpInterface, AllocOp, AllocaOp, CastOp,
DimOp, GetGlobalOp, RankOp, SubViewOp>();
- declarePromisedInterface<MemRefType, DestructurableTypeInterface>();
+ declarePromisedInterface<DestructurableTypeInterface, MemRefType>();
}
/// Finds the unique dealloc operation (if one exists) for `allocValue`.
diff --git a/mlir/lib/Dialect/SCF/IR/SCF.cpp b/mlir/lib/Dialect/SCF/IR/SCF.cpp
index ddb9676eb4f6..5bca8e85f889 100644
--- a/mlir/lib/Dialect/SCF/IR/SCF.cpp
+++ b/mlir/lib/Dialect/SCF/IR/SCF.cpp
@@ -79,7 +79,7 @@ void SCFDialect::initialize() {
declarePromisedInterfaces<bufferization::BufferizableOpInterface, ConditionOp,
ExecuteRegionOp, ForOp, IfOp, IndexSwitchOp,
ForallOp, InParallelOp, WhileOp, YieldOp>();
- declarePromisedInterface<ForOp, ValueBoundsOpInterface>();
+ declarePromisedInterface<ValueBoundsOpInterface, ForOp>();
}
/// Default callback for IfOp builders. Inserts a yield without arguments.
diff --git a/mlir/lib/Dialect/SPIRV/IR/SPIRVDialect.cpp b/mlir/lib/Dialect/SPIRV/IR/SPIRVDialect.cpp
index e914f46bdef6..72488d6e5d0b 100644
--- a/mlir/lib/Dialect/SPIRV/IR/SPIRVDialect.cpp
+++ b/mlir/lib/Dialect/SPIRV/IR/SPIRVDialect.cpp
@@ -135,7 +135,7 @@ void SPIRVDialect::initialize() {
// Allow unknown operations because SPIR-V is extensible.
allowUnknownOperations();
- declarePromisedInterface<TargetEnvAttr, gpu::TargetAttrInterface>();
+ declarePromisedInterface<gpu::TargetAttrInterface, TargetEnvAttr>();
}
std::string SPIRVDialect::getAttributeName(Decoration decoration) {
diff --git a/mlir/lib/Dialect/Tensor/IR/TensorDialect.cpp b/mlir/lib/Dialect/Tensor/IR/TensorDialect.cpp
index 4b3156728cc9..002077753b13 100644
--- a/mlir/lib/Dialect/Tensor/IR/TensorDialect.cpp
+++ b/mlir/lib/Dialect/Tensor/IR/TensorDialect.cpp
@@ -62,7 +62,7 @@ void TensorDialect::initialize() {
ParallelInsertSliceOp>();
declarePromisedInterfaces<SubsetInsertionOpInterface, InsertSliceOp,
ParallelInsertSliceOp>();
- declarePromisedInterface<ExtractSliceOp, SubsetExtractionOpInterface>();
+ declarePromisedInterface<SubsetExtractionOpInterface, ExtractSliceOp>();
declarePromisedInterfaces<TilingInterface, PadOp, PackOp, UnPackOp>();
declarePromisedInterfaces<ValueBoundsOpInterface, CastOp, DimOp, EmptyOp,
ExtractSliceOp, PadOp, RankOp>();
diff --git a/mlir/lib/Dialect/UB/IR/UBOps.cpp b/mlir/lib/Dialect/UB/IR/UBOps.cpp
index 3a2010cdcb5c..5b2cfe7bf426 100644
--- a/mlir/lib/Dialect/UB/IR/UBOps.cpp
+++ b/mlir/lib/Dialect/UB/IR/UBOps.cpp
@@ -46,7 +46,7 @@ void UBDialect::initialize() {
#include "mlir/Dialect/UB/IR/UBOpsAttributes.cpp.inc"
>();
addInterfaces<UBInlinerInterface>();
- declarePromisedInterface<UBDialect, ConvertToLLVMPatternInterface>();
+ declarePromisedInterface<ConvertToLLVMPatternInterface, UBDialect>();
}
Operation *UBDialect::materializeConstant(OpBuilder &builder, Attribute value,
diff --git a/mlir/lib/Dialect/Vector/IR/VectorOps.cpp b/mlir/lib/Dialect/Vector/IR/VectorOps.cpp
index 35296824246e..e566bfacf379 100644
--- a/mlir/lib/Dialect/Vector/IR/VectorOps.cpp
+++ b/mlir/lib/Dialect/Vector/IR/VectorOps.cpp
@@ -382,8 +382,8 @@ void VectorDialect::initialize() {
YieldOp>();
declarePromisedInterfaces<SubsetOpInterface, TransferReadOp,
TransferWriteOp>();
- declarePromisedInterface<TransferReadOp, SubsetExtractionOpInterface>();
- declarePromisedInterface<TransferWriteOp, SubsetInsertionOpInterface>();
+ declarePromisedInterface<SubsetExtractionOpInterface, TransferReadOp>();
+ declarePromisedInterface<SubsetInsertionOpInterface, TransferWriteOp>();
}
/// Materialize a single constant operation from a given attribute value with
diff --git a/mlir/test/Target/LLVMIR/Import/basic.ll b/mlir/test/Target/LLVMIR/Import/basic.ll
index a059425d9780..448b0ebe2574 100644
--- a/mlir/test/Target/LLVMIR/Import/basic.ll
+++ b/mlir/test/Target/LLVMIR/Import/basic.ll
@@ -72,26 +72,26 @@ define i32 @useFreezeOp(i32 %x) {
; Varadic function definition
%struct.va_list = type { ptr }
-declare void @llvm.va_start(ptr)
-declare void @llvm.va_copy(ptr, ptr)
-declare void @llvm.va_end(ptr)
+declare void @llvm.va_start.p0(ptr)
+declare void @llvm.va_copy.p0(ptr, ptr)
+declare void @llvm.va_end.p0(ptr)
; CHECK-LABEL: llvm.func @variadic_function
define void @variadic_function(i32 %X, ...) {
; CHECK: %[[ALLOCA0:.+]] = llvm.alloca %{{.*}} x !llvm.struct<"struct.va_list", (ptr)> {{.*}} : (i32) -> !llvm.ptr
%ap = alloca %struct.va_list
; CHECK: llvm.intr.vastart %[[ALLOCA0]]
- call void @llvm.va_start(ptr %ap)
+ call void @llvm.va_start.p0(ptr %ap)
; CHECK: %[[ALLOCA1:.+]] = llvm.alloca %{{.*}} x !llvm.ptr {{.*}} : (i32) -> !llvm.ptr
%aq = alloca ptr
; CHECK: llvm.intr.vacopy %[[ALLOCA0]] to %[[ALLOCA1]]
- call void @llvm.va_copy(ptr %aq, ptr %ap)
+ call void @llvm.va_copy.p0(ptr %aq, ptr %ap)
; CHECK: llvm.intr.vaend %[[ALLOCA1]]
- call void @llvm.va_end(ptr %aq)
+ call void @llvm.va_end.p0(ptr %aq)
; CHECK: llvm.intr.vaend %[[ALLOCA0]]
- call void @llvm.va_end(ptr %ap)
+ call void @llvm.va_end.p0(ptr %ap)
; CHECK: llvm.return
ret void
}
diff --git a/mlir/test/Target/LLVMIR/Import/intrinsic.ll b/mlir/test/Target/LLVMIR/Import/intrinsic.ll
index 85561839f31a..0cefb4f8983a 100644
--- a/mlir/test/Target/LLVMIR/Import/intrinsic.ll
+++ b/mlir/test/Target/LLVMIR/Import/intrinsic.ll
@@ -599,11 +599,11 @@ define void @ushl_sat_test(i32 %0, i32 %1, <8 x i32> %2, <8 x i32> %3) {
; CHECK-LABEL: llvm.func @va_intrinsics_test
define void @va_intrinsics_test(ptr %0, ptr %1) {
; CHECK: llvm.intr.vastart %{{.*}}
- call void @llvm.va_start(ptr %0)
+ call void @llvm.va_start.p0(ptr %0)
; CHECK: llvm.intr.vacopy %{{.*}} to %{{.*}}
- call void @llvm.va_copy(ptr %1, ptr %0)
+ call void @llvm.va_copy.p0(ptr %1, ptr %0)
; CHECK: llvm.intr.vaend %{{.*}}
- call void @llvm.va_end(ptr %0)
+ call void @llvm.va_end.p0(ptr %0)
ret void
}
@@ -1076,9 +1076,9 @@ declare ptr @llvm.stacksave.p0()
declare ptr addrspace(1) @llvm.stacksave.p1()
declare void @llvm.stackrestore.p0(ptr)
declare void @llvm.stackrestore.p1(ptr addrspace(1))
-declare void @llvm.va_start(ptr)
-declare void @llvm.va_copy(ptr, ptr)
-declare void @llvm.va_end(ptr)
+declare void @llvm.va_start.p0(ptr)
+declare void @llvm.va_copy.p0(ptr, ptr)
+declare void @llvm.va_end.p0(ptr)
declare <8 x i32> @llvm.vp.add.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32)
declare <8 x i32> @llvm.vp.sub.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32)
declare <8 x i32> @llvm.vp.mul.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32)
diff --git a/mlir/test/Target/LLVMIR/llvmir.mlir b/mlir/test/Target/LLVMIR/llvmir.mlir
index c38c7ea587d2..97f37939551d 100644
--- a/mlir/test/Target/LLVMIR/llvmir.mlir
+++ b/mlir/test/Target/LLVMIR/llvmir.mlir
@@ -2251,14 +2251,14 @@ llvm.func @vararg_function(%arg0: i32, ...) {
%1 = llvm.mlir.constant(1 : i32) : i32
// CHECK: %[[ALLOCA0:.+]] = alloca %struct.va_list, align 8
%2 = llvm.alloca %1 x !llvm.struct<"struct.va_list", (ptr)> {alignment = 8 : i64} : (i32) -> !llvm.ptr
- // CHECK: call void @llvm.va_start(ptr %[[ALLOCA0]])
+ // CHECK: call void @llvm.va_start.p0(ptr %[[ALLOCA0]])
llvm.intr.vastart %2 : !llvm.ptr
// CHECK: %[[ALLOCA1:.+]] = alloca ptr, align 8
%4 = llvm.alloca %0 x !llvm.ptr {alignment = 8 : i64} : (i32) -> !llvm.ptr
- // CHECK: call void @llvm.va_copy(ptr %[[ALLOCA1]], ptr %[[ALLOCA0]])
+ // CHECK: call void @llvm.va_copy.p0(ptr %[[ALLOCA1]], ptr %[[ALLOCA0]])
llvm.intr.vacopy %2 to %4 : !llvm.ptr, !llvm.ptr
- // CHECK: call void @llvm.va_end(ptr %[[ALLOCA1]])
- // CHECK: call void @llvm.va_end(ptr %[[ALLOCA0]])
+ // CHECK: call void @llvm.va_end.p0(ptr %[[ALLOCA1]])
+ // CHECK: call void @llvm.va_end.p0(ptr %[[ALLOCA0]])
llvm.intr.vaend %4 : !llvm.ptr
llvm.intr.vaend %2 : !llvm.ptr
// CHECK: ret void
diff --git a/mlir/unittests/IR/InterfaceAttachmentTest.cpp b/mlir/unittests/IR/InterfaceAttachmentTest.cpp
index 16de34c45ec6..58049a9969e3 100644
--- a/mlir/unittests/IR/InterfaceAttachmentTest.cpp
+++ b/mlir/unittests/IR/InterfaceAttachmentTest.cpp
@@ -431,8 +431,8 @@ TEST(InterfaceAttachmentTest, PromisedInterfaces) {
attr.hasPromiseOrImplementsInterface<TestExternalAttrInterface>());
// Add a promise `TestExternalAttrInterface`.
- testDialect->declarePromisedInterface<test::SimpleAAttr,
- TestExternalAttrInterface>();
+ testDialect->declarePromisedInterface<TestExternalAttrInterface,
+ test::SimpleAAttr>();
EXPECT_TRUE(
attr.hasPromiseOrImplementsInterface<TestExternalAttrInterface>());
diff --git a/openmp/runtime/src/kmp_runtime.cpp b/openmp/runtime/src/kmp_runtime.cpp
index a60bdb968371..a242049286a7 100644
--- a/openmp/runtime/src/kmp_runtime.cpp
+++ b/openmp/runtime/src/kmp_runtime.cpp
@@ -6752,11 +6752,11 @@ void __kmp_register_library_startup(void) {
int fd1 = -1;
shm_name = __kmp_str_format("/%s", name);
int shm_preexist = 0;
- fd1 = shm_open(shm_name, O_CREAT | O_EXCL | O_RDWR, 0666);
+ fd1 = shm_open(shm_name, O_CREAT | O_EXCL | O_RDWR, 0600);
if ((fd1 == -1) && (errno == EEXIST)) {
// file didn't open because it already exists.
// try opening existing file
- fd1 = shm_open(shm_name, O_RDWR, 0666);
+ fd1 = shm_open(shm_name, O_RDWR, 0600);
if (fd1 == -1) { // file didn't open
KMP_WARNING(FunctionError, "Can't open SHM");
__kmp_shm_available = false;
@@ -6800,11 +6800,11 @@ void __kmp_register_library_startup(void) {
int fd1 = -1;
temp_reg_status_file_name = __kmp_str_format("/tmp/%s", name);
int tmp_preexist = 0;
- fd1 = open(temp_reg_status_file_name, O_CREAT | O_EXCL | O_RDWR, 0666);
+ fd1 = open(temp_reg_status_file_name, O_CREAT | O_EXCL | O_RDWR, 0600);
if ((fd1 == -1) && (errno == EEXIST)) {
// file didn't open because it already exists.
// try opening existing file
- fd1 = open(temp_reg_status_file_name, O_RDWR, 0666);
+ fd1 = open(temp_reg_status_file_name, O_RDWR, 0600);
if (fd1 == -1) { // file didn't open if (fd1 == -1) {
KMP_WARNING(FunctionError, "Can't open TEMP");
__kmp_tmp_available = false;
@@ -6944,7 +6944,7 @@ void __kmp_unregister_library(void) {
int fd1;
if (__kmp_shm_available) {
shm_name = __kmp_str_format("/%s", name);
- fd1 = shm_open(shm_name, O_RDONLY, 0666);
+ fd1 = shm_open(shm_name, O_RDONLY, 0600);
if (fd1 != -1) { // File opened successfully
char *data1 = (char *)mmap(0, SHM_SIZE, PROT_READ, MAP_SHARED, fd1, 0);
if (data1 != MAP_FAILED) {
diff --git a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel
index e0790e0ab59e..eb0afbb6dd6f 100644
--- a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel
@@ -216,7 +216,6 @@ libc_support_library(
":__support_cpp_limits",
":__support_cpp_type_traits",
":__support_macros_attributes",
- ":__support_macros_config",
":__support_macros_sanitizer",
],
)
@@ -383,7 +382,6 @@ libc_support_library(
],
deps = [
":__support_macros_attributes",
- ":__support_macros_config",
":__support_macros_properties_types",
":llvm_libc_macros_stdfix_macros",
],
@@ -663,7 +661,6 @@ libc_support_library(
":__support_cpp_limits",
":__support_cpp_type_traits",
":__support_macros_attributes",
- ":__support_macros_config",
],
)
@@ -2318,7 +2315,6 @@ libc_support_library(
":__support_cpp_cstddef",
":__support_cpp_type_traits",
":__support_macros_attributes",
- ":__support_macros_config",
":__support_macros_optimization",
":__support_macros_properties_architectures",
":__support_macros_properties_cpu_features",
diff --git a/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel b/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel
index 0e658353c36f..3c3e17bfec66 100644
--- a/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel
@@ -292,6 +292,10 @@ cc_library(
"-ldl",
"-lm",
],
+ "@platforms//os:macos": [
+ "-pthread",
+ "-ldl",
+ ],
"//conditions:default": [
"-pthread",
"-ldl",