summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorwanglei <wanglei@loongson.cn>2024-06-07 09:07:47 +0800
committerwanglei <wanglei@loongson.cn>2024-06-07 09:07:47 +0800
commit8bef7cca63d3bbb8063eb39e58cc163a9af010de (patch)
tree13aa9177d7049736abafb823f1c62168a3edad8d
parentd508acdda28dbf524e1159d6bbc67865c5ad0345 (diff)
parent7e7c29ba087e38056b91f1d783db0883dcc33ef7 (diff)
Created using spr 1.3.5-bogner
-rw-r--r--.github/workflows/containers/github-action-ci/stage1.Dockerfile2
-rw-r--r--bolt/include/bolt/Core/GDBIndex.h61
-rw-r--r--bolt/lib/Core/CMakeLists.txt1
-rw-r--r--bolt/lib/Core/GDBIndex.cpp185
-rw-r--r--clang-tools-extra/clang-tidy/misc/CMakeLists.txt1
-rw-r--r--clang-tools-extra/clang-tidy/misc/HeaderIncludeCycleCheck.cpp6
-rw-r--r--clang-tools-extra/clang-tidy/readability/ContainerSizeEmptyCheck.cpp9
-rw-r--r--clang-tools-extra/docs/ReleaseNotes.rst5
-rw-r--r--clang-tools-extra/test/clang-tidy/checkers/misc/header-include-cycle.self.cpp3
-rw-r--r--clang-tools-extra/test/clang-tidy/checkers/readability/container-size-empty.cpp6
-rw-r--r--clang/docs/LanguageExtensions.rst24
-rw-r--r--clang/docs/ReleaseNotes.rst3
-rw-r--r--clang/include/clang/AST/Type.h14
-rw-r--r--clang/include/clang/Basic/BuiltinsAMDGPU.def2
-rw-r--r--clang/include/clang/Basic/Cuda.h1
-rw-r--r--clang/include/clang/Basic/DiagnosticOptions.h2
-rw-r--r--clang/include/clang/Basic/DiagnosticSemaKinds.td6
-rw-r--r--clang/include/clang/Basic/TokenKinds.def2
-rw-r--r--clang/include/clang/Basic/riscv_vector.td3
-rw-r--r--clang/lib/AST/CMakeLists.txt1
-rw-r--r--clang/lib/AST/Interp/ByteCodeExprGen.cpp102
-rw-r--r--clang/lib/AST/Interp/Context.cpp15
-rw-r--r--clang/lib/AST/Interp/Context.h2
-rw-r--r--clang/lib/AST/Interp/Descriptor.cpp1
-rw-r--r--clang/lib/AST/Interp/Disasm.cpp3
-rw-r--r--clang/lib/AST/Interp/Function.cpp3
-rw-r--r--clang/lib/AST/Interp/Interp.cpp39
-rw-r--r--clang/lib/AST/Interp/Interp.h107
-rw-r--r--clang/lib/AST/Interp/InterpFrame.cpp1
-rw-r--r--clang/lib/AST/Interp/InterpStack.cpp1
-rw-r--r--clang/lib/AST/Interp/InterpStack.h3
-rw-r--r--clang/lib/AST/Interp/MemberPointer.cpp76
-rw-r--r--clang/lib/AST/Interp/MemberPointer.h112
-rw-r--r--clang/lib/AST/Interp/Opcodes.td18
-rw-r--r--clang/lib/AST/Interp/Pointer.cpp1
-rw-r--r--clang/lib/AST/Interp/Pointer.h1
-rw-r--r--clang/lib/AST/Interp/PrimType.cpp1
-rw-r--r--clang/lib/AST/Interp/PrimType.h8
-rw-r--r--clang/lib/AST/ParentMap.cpp16
-rw-r--r--clang/lib/AST/TextNodeDumper.cpp21
-rw-r--r--clang/lib/AST/Type.cpp38
-rw-r--r--clang/lib/Analysis/CFG.cpp50
-rw-r--r--clang/lib/Basic/Cuda.cpp1
-rw-r--r--clang/lib/Basic/Targets/LoongArch.h2
-rw-r--r--clang/lib/Basic/Targets/NVPTX.cpp1
-rw-r--r--clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp1
-rw-r--r--clang/lib/CodeGen/Targets/AMDGPU.cpp6
-rw-r--r--clang/lib/ExtractAPI/Serialization/SymbolGraphSerializer.cpp5
-rw-r--r--clang/lib/Format/ContinuationIndenter.cpp2
-rw-r--r--clang/lib/Interpreter/Interpreter.cpp165
-rw-r--r--clang/lib/Parse/ParseStmt.cpp5
-rw-r--r--clang/lib/Sema/Scope.cpp4
-rw-r--r--clang/lib/Sema/SemaAMDGPU.cpp6
-rw-r--r--clang/lib/Sema/SemaExpr.cpp35
-rw-r--r--clang/lib/Sema/SemaExprCXX.cpp6
-rw-r--r--clang/lib/Sema/SemaInit.cpp19
-rw-r--r--clang/lib/Sema/TreeTransform.h12
-rw-r--r--clang/lib/StaticAnalyzer/Core/ExprEngine.cpp56
-rw-r--r--clang/test/AST/Interp/arrays.cpp14
-rw-r--r--clang/test/AST/Interp/cxx23.cpp22
-rw-r--r--clang/test/AST/Interp/eval-order.cpp4
-rw-r--r--clang/test/AST/Interp/literals.cpp9
-rw-r--r--clang/test/AST/Interp/memberpointers.cpp197
-rw-r--r--clang/test/AST/ast-dump-default-init-json.cpp6
-rw-r--r--clang/test/AST/ast-dump-default-init.cpp2
-rw-r--r--clang/test/Analysis/cxx-uninitialized-object.cpp12
-rw-r--r--clang/test/Analysis/lifetime-extended-regions.cpp10
-rw-r--r--clang/test/CXX/drs/cwg16xx.cpp2
-rw-r--r--clang/test/CXX/drs/cwg18xx.cpp19
-rw-r--r--clang/test/CXX/special/class.temporary/p6.cpp34
-rw-r--r--clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vcpopv.c264
-rw-r--r--clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vcpopv.c264
-rw-r--r--clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/policy/non-overloaded/vcpopv.c528
-rw-r--r--clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/policy/overloaded/vcpopv.c528
-rw-r--r--clang/test/CodeGen/voidptr-vaarg.c478
-rw-r--r--clang/test/CodeGenCUDA/cuda-builtin-vars.cu24
-rw-r--r--clang/test/CodeGenCXX/inline-then-fold-variadics.cpp181
-rw-r--r--clang/test/CodeGenCXX/pointers-to-data-members.cpp1
-rw-r--r--clang/test/CodeGenCXX/template-param-objects-linkage.cpp1
-rw-r--r--clang/test/CodeGenOpenCL/amdgpu-features.cl2
-rw-r--r--clang/test/CodeGenOpenCL/builtins-amdgcn-gfx11.cl1
-rw-r--r--clang/test/Driver/aarch64-oryon-1.c19
-rw-r--r--clang/test/Driver/amdgpu-macros.cl1
-rw-r--r--clang/test/Driver/amdgpu-mcpu.cl2
-rw-r--r--clang/test/Interpreter/pretty-print.c11
-rw-r--r--clang/test/Misc/target-invalid-cpu-note.c8
-rw-r--r--clang/test/SemaCXX/attr-weak.cpp1
-rw-r--r--clang/test/SemaCXX/builtin-is-bitwise-cloneable-fsanitize.cpp34
-rw-r--r--clang/test/SemaCXX/builtin-is-bitwise-cloneable.cpp8
-rw-r--r--clang/test/SemaCXX/constexpr-default-arg.cpp4
-rw-r--r--clang/test/SemaCXX/cxx11-default-member-initializers.cpp74
-rw-r--r--clang/test/SemaCXX/eval-crashes.cpp6
-rw-r--r--clang/test/SemaCXX/nullptr_in_arithmetic_ops.cpp1
-rw-r--r--clang/test/SemaObjCXX/arc-type-traits.mm9
-rw-r--r--clang/test/SemaOpenCL/builtins-amdgcn-gfx940-err.cl6
-rw-r--r--clang/unittests/AST/Interp/toAPValue.cpp46
-rw-r--r--clang/unittests/Format/FormatTest.cpp8
-rwxr-xr-xclang/www/cxx_dr_status.html2
-rw-r--r--compiler-rt/lib/sanitizer_common/sanitizer_bitvector.h8
-rw-r--r--compiler-rt/lib/sanitizer_common/sanitizer_linux_libcdep.cpp16
-rw-r--r--compiler-rt/lib/sanitizer_common/tests/sanitizer_allocator_test.cpp9
-rw-r--r--compiler-rt/test/dfsan/sscanf.c19
-rw-r--r--flang/cmake/modules/AddFlangOffloadRuntime.cmake1
-rw-r--r--flang/docs/Intrinsics.md31
-rw-r--r--flang/include/flang/Optimizer/Builder/IntrinsicCall.h2
-rw-r--r--flang/include/flang/Optimizer/Builder/Runtime/Command.h5
-rw-r--r--flang/include/flang/Optimizer/Dialect/FIRAttr.td30
-rw-r--r--flang/include/flang/Optimizer/Dialect/FIROps.td35
-rw-r--r--flang/include/flang/Optimizer/Transforms/Passes.h3
-rw-r--r--flang/include/flang/Optimizer/Transforms/Passes.td1
-rw-r--r--flang/include/flang/Runtime/command.h4
-rw-r--r--flang/include/flang/Runtime/magic-numbers.h5
-rw-r--r--flang/include/flang/Tools/CLOptions.inc5
-rw-r--r--flang/lib/Evaluate/intrinsics.cpp12
-rw-r--r--flang/lib/Lower/OpenMP/Clauses.h50
-rw-r--r--flang/lib/Lower/OpenMP/DataSharingProcessor.h4
-rw-r--r--flang/lib/Lower/OpenMP/Utils.cpp2
-rw-r--r--flang/lib/Optimizer/Builder/IntrinsicCall.cpp35
-rw-r--r--flang/lib/Optimizer/Builder/Runtime/Command.cpp13
-rw-r--r--flang/lib/Optimizer/Dialect/FIRAttr.cpp4
-rw-r--r--flang/lib/Optimizer/Dialect/FIROps.cpp73
-rw-r--r--flang/lib/Optimizer/Transforms/ExternalNameConversion.cpp23
-rw-r--r--flang/runtime/command.cpp26
-rw-r--r--flang/runtime/stat.h1
-rw-r--r--flang/test/Fir/loop03.fir17
-rw-r--r--flang/test/Lower/Intrinsics/getcwd-function.f9023
-rw-r--r--flang/test/Lower/Intrinsics/getcwd-optional.f9029
-rw-r--r--flang/test/Lower/Intrinsics/getcwd.f9044
-rw-r--r--flang/test/Lower/OpenMP/critical.f9024
-rw-r--r--flang/test/Lower/OpenMP/map-component-ref.f9041
-rw-r--r--flang/test/Semantics/getcwd.f9035
-rw-r--r--libc/config/gpu/entrypoints.txt11
-rw-r--r--libc/config/linux/aarch64/entrypoints.txt15
-rw-r--r--libc/config/linux/x86_64/entrypoints.txt15
-rw-r--r--libc/docs/c23.rst18
-rw-r--r--libc/docs/math/index.rst24
-rw-r--r--libc/hdr/types/CMakeLists.txt9
-rw-r--r--libc/hdr/types/atexithandler_t.h22
-rw-r--r--libc/spec/stdc.td15
-rw-r--r--libc/src/__support/fixedvector.h18
-rw-r--r--libc/src/math/CMakeLists.txt12
-rw-r--r--libc/src/math/fmaximum_mag_numf16.h20
-rw-r--r--libc/src/math/fmaximum_magf16.h20
-rw-r--r--libc/src/math/fmaximum_numf16.h20
-rw-r--r--libc/src/math/fmaximumf16.h20
-rw-r--r--libc/src/math/fminimum_mag_numf16.h20
-rw-r--r--libc/src/math/fminimum_magf16.h20
-rw-r--r--libc/src/math/fminimum_numf16.h20
-rw-r--r--libc/src/math/fminimumf16.h20
-rw-r--r--libc/src/math/generic/CMakeLists.txt158
-rw-r--r--libc/src/math/generic/fmaximum_mag_numf16.cpp19
-rw-r--r--libc/src/math/generic/fmaximum_magf16.cpp19
-rw-r--r--libc/src/math/generic/fmaximum_numf16.cpp19
-rw-r--r--libc/src/math/generic/fmaximumf16.cpp19
-rw-r--r--libc/src/math/generic/fminimum_mag_numf16.cpp19
-rw-r--r--libc/src/math/generic/fminimum_magf16.cpp19
-rw-r--r--libc/src/math/generic/fminimum_numf16.cpp19
-rw-r--r--libc/src/math/generic/fminimumf16.cpp19
-rw-r--r--libc/src/math/generic/nextafterf16.cpp19
-rw-r--r--libc/src/math/generic/nextdownf16.cpp19
-rw-r--r--libc/src/math/generic/nexttowardf16.cpp21
-rw-r--r--libc/src/math/generic/nextupf16.cpp19
-rw-r--r--libc/src/math/nextafterf16.h20
-rw-r--r--libc/src/math/nextdownf16.h20
-rw-r--r--libc/src/math/nexttowardf16.h20
-rw-r--r--libc/src/math/nextupf16.h20
-rw-r--r--libc/src/stdlib/CMakeLists.txt36
-rw-r--r--libc/src/stdlib/at_quick_exit.cpp22
-rw-r--r--libc/src/stdlib/at_quick_exit.h20
-rw-r--r--libc/src/stdlib/atexit.cpp79
-rw-r--r--libc/src/stdlib/atexit.h7
-rw-r--r--libc/src/stdlib/exit_handler.cpp42
-rw-r--r--libc/src/stdlib/exit_handler.h53
-rw-r--r--libc/src/stdlib/quick_exit.cpp6
-rw-r--r--libc/test/src/__support/CMakeLists.txt5
-rw-r--r--libc/test/src/__support/fixedvector_test.cpp27
-rw-r--r--libc/test/src/math/smoke/CMakeLists.txt253
-rw-r--r--libc/test/src/math/smoke/FMaxTest.h10
-rw-r--r--libc/test/src/math/smoke/FMaximumMagNumTest.h15
-rw-r--r--libc/test/src/math/smoke/FMaximumMagTest.h15
-rw-r--r--libc/test/src/math/smoke/FMaximumNumTest.h15
-rw-r--r--libc/test/src/math/smoke/FMaximumTest.h15
-rw-r--r--libc/test/src/math/smoke/FMinTest.h10
-rw-r--r--libc/test/src/math/smoke/FMinimumMagNumTest.h15
-rw-r--r--libc/test/src/math/smoke/FMinimumMagTest.h15
-rw-r--r--libc/test/src/math/smoke/FMinimumNumTest.h15
-rw-r--r--libc/test/src/math/smoke/FMinimumTest.h15
-rw-r--r--libc/test/src/math/smoke/NextAfterTest.h10
-rw-r--r--libc/test/src/math/smoke/NextTowardTest.h11
-rw-r--r--libc/test/src/math/smoke/fmaximum_mag_numf16_test.cpp13
-rw-r--r--libc/test/src/math/smoke/fmaximum_magf16_test.cpp13
-rw-r--r--libc/test/src/math/smoke/fmaximum_numf16_test.cpp13
-rw-r--r--libc/test/src/math/smoke/fmaximumf16_test.cpp13
-rw-r--r--libc/test/src/math/smoke/fminimum_mag_numf16_test.cpp13
-rw-r--r--libc/test/src/math/smoke/fminimum_magf16_test.cpp13
-rw-r--r--libc/test/src/math/smoke/fminimum_numf16_test.cpp13
-rw-r--r--libc/test/src/math/smoke/fminimumf16_test.cpp13
-rw-r--r--libc/test/src/math/smoke/nextafterf16_test.cpp13
-rw-r--r--libc/test/src/math/smoke/nextdownf16_test.cpp13
-rw-r--r--libc/test/src/math/smoke/nexttowardf16_test.cpp13
-rw-r--r--libc/test/src/math/smoke/nextupf16_test.cpp13
-rw-r--r--libc/test/src/stdlib/CMakeLists.txt15
-rw-r--r--libc/test/src/stdlib/at_quick_exit_test.cpp90
-rw-r--r--libclc/CMakeLists.txt2
-rw-r--r--libcxx/docs/Hardening.rst373
-rw-r--r--libcxx/docs/ReleaseNotes/18.rst2
-rw-r--r--libcxx/docs/TestingLibcxx.rst45
-rw-r--r--libcxx/include/__configuration/abi.h3
-rw-r--r--libcxx/test/std/experimental/simd/simd.class/simd_copy.pass.cpp4
-rw-r--r--lld/ELF/Writer.cpp27
-rw-r--r--lld/test/ELF/linkerscript/memory-nonalloc-no-warn.test22
-rw-r--r--lld/test/ELF/linkerscript/sections-nonalloc.s33
-rw-r--r--lld/test/ELF/linkerscript/sections.s4
-rw-r--r--lld/test/ELF/relocatable-comdat.s77
-rwxr-xr-xlldb/examples/python/crashlog.py13
-rw-r--r--lldb/examples/python/crashlog_scripted_process.py5
-rw-r--r--lldb/source/Expression/DWARFExpression.cpp178
-rw-r--r--lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParser.h2
-rw-r--r--lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp401
-rw-r--r--lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.h197
-rw-r--r--lldb/source/Plugins/SymbolFile/DWARF/DebugNamesDWARFIndex.cpp5
-rw-r--r--lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp51
-rw-r--r--lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.h15
-rw-r--r--lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDebugMap.h9
-rw-r--r--lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDwo.cpp2
-rw-r--r--lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDwo.h3
-rw-r--r--lldb/source/Plugins/SymbolFile/DWARF/UniqueDWARFASTType.cpp117
-rw-r--r--lldb/source/Plugins/SymbolFile/DWARF/UniqueDWARFASTType.h36
-rw-r--r--lldb/source/Target/Statistics.cpp10
-rw-r--r--lldb/source/Target/ThreadPlanStepOverRange.cpp2
-rw-r--r--lldb/test/API/functionalities/stats_api/TestStatisticsAPI.py7
-rw-r--r--lldb/test/API/tools/lldb-server/TestPtyServer.py1
-rw-r--r--lldb/test/Shell/ScriptInterpreter/Python/Crashlog/app_specific_backtrace_crashlog.test4
-rw-r--r--lldb/test/Shell/ScriptInterpreter/Python/Crashlog/interactive_crashlog_invalid_target.test2
-rw-r--r--lldb/test/Shell/ScriptInterpreter/Python/Crashlog/interactive_crashlog_json.test2
-rw-r--r--lldb/test/Shell/ScriptInterpreter/Python/Crashlog/interactive_crashlog_legacy.test2
-rw-r--r--lldb/test/Shell/ScriptInterpreter/Python/Crashlog/last_exception_backtrace_crashlog.test4
-rw-r--r--lldb/test/Shell/ScriptInterpreter/Python/Crashlog/lit.local.cfg3
-rw-r--r--lldb/test/Shell/ScriptInterpreter/Python/Crashlog/skipped_status_interactive_crashlog.test2
-rw-r--r--lldb/test/Shell/SymbolFile/DWARF/delayed-definition-die-searching.test36
-rw-r--r--lldb/test/Shell/SymbolFile/DWARF/x86/simple-template-names-context.cpp44
-rw-r--r--llvm/docs/AMDGPUUsage.rst15
-rw-r--r--llvm/docs/ReleaseNotes.rst4
-rw-r--r--llvm/include/llvm/Analysis/CodeMetrics.h9
-rw-r--r--llvm/include/llvm/Analysis/LoopInfo.h3
-rw-r--r--llvm/include/llvm/AsmParser/LLToken.h1
-rw-r--r--llvm/include/llvm/BinaryFormat/ELF.h2
-rw-r--r--llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h8
-rw-r--r--llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h41
-rw-r--r--llvm/include/llvm/CodeGen/MachineFunction.h9
-rw-r--r--llvm/include/llvm/CodeGen/SDPatternMatch.h55
-rw-r--r--llvm/include/llvm/CodeGen/SelectionDAG.h3
-rw-r--r--llvm/include/llvm/CodeGen/TargetLowering.h2
-rw-r--r--llvm/include/llvm/IR/CallingConv.h3
-rw-r--r--llvm/include/llvm/IR/IRBuilderFolder.h1
-rw-r--r--llvm/include/llvm/IR/InstrTypes.h17
-rw-r--r--llvm/include/llvm/IR/IntrinsicInst.h19
-rw-r--r--llvm/include/llvm/InitializePasses.h1
-rw-r--r--llvm/include/llvm/MC/MCSymbolWasm.h11
-rw-r--r--llvm/include/llvm/ProfileData/MemProf.h140
-rw-r--r--llvm/include/llvm/Support/Endian.h3
-rw-r--r--llvm/include/llvm/Target/GlobalISel/Combine.td37
-rw-r--r--llvm/include/llvm/TargetParser/AArch64TargetParser.h5
-rw-r--r--llvm/include/llvm/TargetParser/TargetParser.h1
-rw-r--r--llvm/include/llvm/Transforms/IPO/ExpandVariadics.h40
-rw-r--r--llvm/include/llvm/Transforms/Utils/UnrollLoop.h7
-rw-r--r--llvm/lib/Analysis/CodeMetrics.cpp53
-rw-r--r--llvm/lib/Analysis/LoopInfo.cpp20
-rw-r--r--llvm/lib/Analysis/ValueTracking.cpp11
-rw-r--r--llvm/lib/AsmParser/LLLexer.cpp1
-rw-r--r--llvm/lib/AsmParser/LLParser.cpp4
-rw-r--r--llvm/lib/CodeGen/GlobalISel/CombinerHelperVectorOps.cpp86
-rw-r--r--llvm/lib/CodeGen/RegisterCoalescer.cpp5
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp16
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp7
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp7
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp9
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp10
-rw-r--r--llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp2
-rw-r--r--llvm/lib/IR/AsmWriter.cpp3
-rw-r--r--llvm/lib/IR/AutoUpgrade.cpp4
-rw-r--r--llvm/lib/MC/WasmObjectWriter.cpp5
-rw-r--r--llvm/lib/Object/ELFObjectFile.cpp2
-rw-r--r--llvm/lib/ObjectYAML/ELFYAML.cpp1
-rw-r--r--llvm/lib/Passes/PassBuilder.cpp1
-rw-r--r--llvm/lib/Passes/PassRegistry.def1
-rw-r--r--llvm/lib/ProfileData/InstrProf.cpp13
-rw-r--r--llvm/lib/ProfileData/MemProf.cpp162
-rw-r--r--llvm/lib/ProfileData/MemProfReader.cpp2
-rw-r--r--llvm/lib/Support/VirtualFileSystem.cpp61
-rw-r--r--llvm/lib/Target/AArch64/AArch64.td5
-rw-r--r--llvm/lib/Target/AArch64/AArch64CallingConvention.td8
-rw-r--r--llvm/lib/Target/AArch64/AArch64Processors.td30
-rw-r--r--llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp33
-rw-r--r--llvm/lib/Target/AArch64/AArch64SchedOryon.td1659
-rw-r--r--llvm/lib/Target/AArch64/AArch64Subtarget.cpp7
-rw-r--r--llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp19
-rw-r--r--llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h3
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPU.td6
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp2
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp5
-rw-r--r--llvm/lib/Target/AMDGPU/GCNProcessors.td6
-rw-r--r--llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp2
-rw-r--r--llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp7
-rw-r--r--llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp103
-rw-r--r--llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp103
-rw-r--r--llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.cpp14
-rw-r--r--llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.h3
-rw-r--r--llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp22
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp8
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp2
-rw-r--r--llvm/lib/Target/NVPTX/NVPTX.h7
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp32
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp6
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXUtilities.cpp57
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXUtilities.h16
-rw-r--r--llvm/lib/Target/NVPTX/NVVMIntrRange.cpp197
-rw-r--r--llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp5
-rw-r--r--llvm/lib/Target/PowerPC/PPCFastISel.cpp16
-rw-r--r--llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp11
-rw-r--r--llvm/lib/Target/PowerPC/PPCInstr64Bit.td6
-rw-r--r--llvm/lib/Target/PowerPC/PPCInstrInfo.td6
-rw-r--r--llvm/lib/Target/RISCV/RISCVISelLowering.cpp32
-rw-r--r--llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp16
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td15
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td8
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td5
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp47
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVBuiltins.td1
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp149
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.h16
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVMergeRegionExitTargets.cpp2
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVPreLegalizer.cpp6
-rw-r--r--llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp12
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyUtilities.cpp3
-rw-r--r--llvm/lib/Target/X86/X86ISelDAGToDAG.cpp9
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp13
-rw-r--r--llvm/lib/Target/X86/X86InstrAMX.td2
-rw-r--r--llvm/lib/Target/X86/X86LowerAMXType.cpp16
-rw-r--r--llvm/lib/Target/X86/X86LowerTileCopy.cpp23
-rw-r--r--llvm/lib/Target/X86/X86MachineFunctionInfo.h12
-rw-r--r--llvm/lib/Target/X86/X86SchedIceLake.td18
-rw-r--r--llvm/lib/Target/X86/X86SchedSkylakeServer.td4
-rw-r--r--llvm/lib/TargetParser/Host.cpp1
-rw-r--r--llvm/lib/TargetParser/TargetParser.cpp4
-rw-r--r--llvm/lib/Transforms/IPO/CMakeLists.txt1
-rw-r--r--llvm/lib/Transforms/IPO/ExpandVariadics.cpp1012
-rw-r--r--llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp251
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp25
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp61
-rw-r--r--llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp56
-rw-r--r--llvm/lib/Transforms/Instrumentation/MemProfiler.cpp4
-rw-r--r--llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp3
-rw-r--r--llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp8
-rw-r--r--llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp57
-rw-r--r--llvm/lib/Transforms/Utils/InlineFunction.cpp1
-rw-r--r--llvm/lib/Transforms/Utils/LoopRotationUtils.cpp2
-rw-r--r--llvm/lib/Transforms/Utils/LoopUnroll.cpp47
-rw-r--r--llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp17
-rw-r--r--llvm/test/CodeGen/AArch64/GlobalISel/combine-vscale.mir113
-rw-r--r--llvm/test/CodeGen/AArch64/arm64-2011-10-18-LdStOptBug.ll2
-rw-r--r--llvm/test/CodeGen/AArch64/arm64-ldp-cluster.ll12
-rw-r--r--llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions-predicated-scalable.ll112
-rw-r--r--llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions-scalable.ll126
-rw-r--r--llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions.ll17
-rw-r--r--llvm/test/CodeGen/AArch64/sme-support-routines-calling-convention.ll20
-rw-r--r--llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization.ll14
-rw-r--r--llvm/test/CodeGen/AArch64/vecreduce-fmin-legalization.ll14
-rw-r--r--llvm/test/CodeGen/AArch64/zext-to-tbl.ll195
-rw-r--r--llvm/test/CodeGen/AMDGPU/directive-amdgcn-target.ll2
-rw-r--r--llvm/test/CodeGen/AMDGPU/elf-header-flags-mach.ll2
-rw-r--r--llvm/test/CodeGen/AMDGPU/expand-variadic-call.ll545
-rw-r--r--llvm/test/CodeGen/AMDGPU/fmaximum3.ll600
-rw-r--r--llvm/test/CodeGen/AMDGPU/fminimum3.ll600
-rw-r--r--llvm/test/CodeGen/AMDGPU/llc-pipeline.ll5
-rw-r--r--llvm/test/CodeGen/AMDGPU/llvm.maximum.f16.ll226
-rw-r--r--llvm/test/CodeGen/AMDGPU/llvm.maximum.f32.ll3449
-rw-r--r--llvm/test/CodeGen/AMDGPU/llvm.maximum.f64.ll6331
-rw-r--r--llvm/test/CodeGen/AMDGPU/llvm.minimum.f16.ll88
-rw-r--r--llvm/test/CodeGen/AMDGPU/llvm.minimum.f32.ll3449
-rw-r--r--llvm/test/CodeGen/AMDGPU/llvm.minimum.f64.ll6331
-rw-r--r--llvm/test/CodeGen/AMDGPU/occupancy-levels.ll2
-rw-r--r--llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.mir165
-rw-r--r--llvm/test/CodeGen/AMDGPU/unsupported-calls.ll19
-rw-r--r--llvm/test/CodeGen/ARM/neon_vabd.ll890
-rw-r--r--llvm/test/CodeGen/ARM/vaba.ll193
-rw-r--r--llvm/test/CodeGen/ARM/vabd.ll194
-rw-r--r--llvm/test/CodeGen/LoongArch/emutls.ll6
-rw-r--r--llvm/test/CodeGen/LoongArch/fp16-promote.ll326
-rw-r--r--llvm/test/CodeGen/NVPTX/intr-range.ll88
-rw-r--r--llvm/test/CodeGen/NVPTX/intrinsic-old.ll51
-rw-r--r--llvm/test/CodeGen/PowerPC/toc-data-common.ll136
-rw-r--r--llvm/test/CodeGen/PowerPC/toc-data.ll18
-rw-r--r--llvm/test/CodeGen/PowerPC/tocdata-firm-alignment.ll24
-rw-r--r--llvm/test/CodeGen/RISCV/GlobalISel/legalizer/merge-unmerge-rv32.mir10
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/fixed-vectors-select-fp.ll128
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpmerge.ll144
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect-vp.ll56
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/select-fp.ll188
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll308
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/vselect-fp.ll224
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/vselect-vp.ll80
-rw-r--r--llvm/test/CodeGen/SPIRV/event-wait-ptr-type.ll16
-rw-r--r--llvm/test/CodeGen/SPIRV/passes/SPIRVEmitIntrinsics-no-duplicate-spv_assign_type.ll4
-rw-r--r--llvm/test/CodeGen/SPIRV/transcoding/OpGroupAsyncCopy-strided.ll36
-rw-r--r--llvm/test/CodeGen/SPIRV/transcoding/check_ro_qualifier.ll2
-rw-r--r--llvm/test/CodeGen/Thumb2/mve-tailpred-vptblock.ll25
-rw-r--r--llvm/test/CodeGen/WebAssembly/expand-variadic-call.ll484
-rw-r--r--llvm/test/CodeGen/WebAssembly/simd-arith.ll220
-rw-r--r--llvm/test/CodeGen/WebAssembly/vararg-frame.ll526
-rw-r--r--llvm/test/CodeGen/X86/avx512fp16-fminimum-fmaximum.ll79
-rw-r--r--llvm/test/DebugInfo/X86/sdag-order.ll46
-rw-r--r--llvm/test/DebugInfo/symbolize-gnu-debuglink-no-realpath.test1
-rw-r--r--llvm/test/Instrumentation/HWAddressSanitizer/stack-safety-analysis.ll23
-rw-r--r--llvm/test/MC/AMDGPU/gfx1150_asm_features.s1
-rw-r--r--llvm/test/MC/RISCV/relocations.s2
-rw-r--r--llvm/test/MC/WebAssembly/reloc-pic64.s3
-rw-r--r--llvm/test/Object/AMDGPU/elf-header-flags-mach.yaml7
-rw-r--r--llvm/test/Other/can-execute.txt1
-rw-r--r--llvm/test/Other/lit-unicode.txt1
-rw-r--r--llvm/test/Transforms/ConstraintElimination/induction-condition-in-loop-exit.ll443
-rw-r--r--llvm/test/Transforms/ExpandVariadics/expand-va-intrinsic-split-linkage.ll232
-rw-r--r--llvm/test/Transforms/ExpandVariadics/expand-va-intrinsic-split-simple.ll214
-rw-r--r--llvm/test/Transforms/ExpandVariadics/indirect-calls.ll59
-rw-r--r--llvm/test/Transforms/ExpandVariadics/intrinsics.ll120
-rw-r--r--llvm/test/Transforms/ExpandVariadics/invoke.ll89
-rw-r--r--llvm/test/Transforms/ExpandVariadics/pass-byval-byref.ll153
-rw-r--r--llvm/test/Transforms/ExpandVariadics/pass-indirect.ll59
-rw-r--r--llvm/test/Transforms/ExpandVariadics/pass-integers.ll345
-rw-r--r--llvm/test/Transforms/InstCombine/abs-1.ll7
-rw-r--r--llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-sge-to-icmp-sle.ll3
-rw-r--r--llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-sgt-to-icmp-sgt.ll3
-rw-r--r--llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-sle-to-icmp-sle.ll3
-rw-r--r--llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-slt-to-icmp-sgt.ll3
-rw-r--r--llvm/test/Transforms/InstCombine/icmp-and-lowbit-mask.ll72
-rw-r--r--llvm/test/Transforms/InstCombine/icmp-of-and-x.ll38
-rw-r--r--llvm/test/Transforms/InstCombine/icmp-of-or-x.ll26
-rw-r--r--llvm/test/Transforms/InstCombine/select.ll76
-rw-r--r--llvm/test/Transforms/LoopStrengthReduce/AArch64/lsr-reuse.ll6
-rw-r--r--llvm/test/Transforms/LoopUnroll/convergent.controlled.ll562
-rw-r--r--llvm/test/tools/llvm-cov/gcov/intermediate-format.test2
-rw-r--r--llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx1.s82
-rw-r--r--llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx2.s50
-rw-r--r--llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-sse3.s18
-rw-r--r--llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-ssse3.s50
-rw-r--r--llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx1.s34
-rw-r--r--llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-sse3.s18
-rw-r--r--llvm/test/tools/llvm-objdump/ELF/AMDGPU/subtarget.ll5
-rw-r--r--llvm/test/tools/llvm-rc/windres-prefix.test2
-rw-r--r--llvm/test/tools/llvm-readobj/ELF/AMDGPU/elf-headers.test9
-rw-r--r--llvm/test/tools/llvm-reduce/remove-debug-info-nodes.ll8
-rw-r--r--llvm/test/tools/split-file/output-is-special.test1
-rw-r--r--llvm/tools/llvm-readobj/ELFDumper.cpp1
-rw-r--r--llvm/tools/llvm-reduce/deltas/ReduceDIMetadata.cpp13
-rw-r--r--llvm/unittests/Bitcode/DataLayoutUpgradeTest.cpp5
-rw-r--r--llvm/unittests/ProfileData/MemProfTest.cpp107
-rw-r--r--llvm/unittests/Support/VirtualFileSystemTest.cpp5
-rw-r--r--llvm/unittests/TargetParser/Host.cpp3
-rw-r--r--llvm/unittests/TargetParser/TargetParserTest.cpp16
-rw-r--r--llvm/utils/gn/secondary/bolt/lib/Core/BUILD.gn1
-rw-r--r--llvm/utils/gn/secondary/clang/lib/AST/BUILD.gn1
-rw-r--r--llvm/utils/gn/secondary/llvm/lib/Transforms/IPO/BUILD.gn1
-rw-r--r--llvm/utils/lit/lit/llvm/config.py5
-rw-r--r--mlir/include/mlir/Dialect/LLVMIR/FunctionCallUtils.h38
-rw-r--r--mlir/include/mlir/Dialect/Tensor/TransformOps/TensorTransformOps.td4
-rw-r--r--mlir/include/mlir/Dialect/Tensor/Transforms/Transforms.h5
-rw-r--r--mlir/include/mlir/Target/LLVMIR/Export.h9
-rw-r--r--mlir/lib/Conversion/MemRefToLLVM/AllocLikeConversion.cpp16
-rw-r--r--mlir/lib/Dialect/LLVMIR/IR/FunctionCallUtils.cpp46
-rw-r--r--mlir/lib/Dialect/Tensor/TransformOps/TensorTransformOps.cpp15
-rw-r--r--mlir/lib/Dialect/Tensor/Transforms/PackAndUnpackPatterns.cpp123
-rw-r--r--mlir/lib/Dialect/Tensor/Transforms/RewriteAsConstant.cpp165
-rw-r--r--mlir/lib/Dialect/Utils/IndexingUtils.cpp2
-rw-r--r--mlir/lib/IR/AsmPrinter.cpp11
-rw-r--r--mlir/test/Dialect/Tensor/fold-into-pack-and-unpack.mlir139
-rw-r--r--mlir/test/Dialect/Tensor/rewrite-as-constant.mlir135
-rw-r--r--mlir/test/IR/array-of-attr.mlir4
-rw-r--r--offload/DeviceRTL/CMakeLists.txt2
-rw-r--r--offload/include/PluginManager.h27
-rw-r--r--offload/plugins-nextgen/amdgpu/src/rtl.cpp29
-rw-r--r--offload/plugins-nextgen/common/include/PluginInterface.h34
-rw-r--r--offload/plugins-nextgen/common/src/PluginInterface.cpp72
-rw-r--r--offload/plugins-nextgen/cuda/src/rtl.cpp55
-rw-r--r--offload/plugins-nextgen/host/src/rtl.cpp4
-rw-r--r--offload/src/PluginManager.cpp235
-rw-r--r--offload/src/omptarget.cpp2
-rw-r--r--offload/test/offloading/ompx_bare_shfl_down_sync.cpp2
-rw-r--r--openmp/CMakeLists.txt6
-rw-r--r--third-party/unittest/googletest/include/gtest/internal/gtest-port.h2
-rw-r--r--utils/bazel/llvm-project-overlay/libc/test/src/__support/BUILD.bazel1
-rw-r--r--utils/bazel/llvm-project-overlay/libc/test/src/math/BUILD.bazel2
-rw-r--r--utils/bazel/llvm-project-overlay/libc/test/src/math/smoke/BUILD.bazel2
-rw-r--r--utils/bazel/llvm-project-overlay/llvm/BUILD.bazel7
493 files changed, 23079 insertions, 21592 deletions
diff --git a/.github/workflows/containers/github-action-ci/stage1.Dockerfile b/.github/workflows/containers/github-action-ci/stage1.Dockerfile
index fbc4548e6636..8c6bcf463841 100644
--- a/.github/workflows/containers/github-action-ci/stage1.Dockerfile
+++ b/.github/workflows/containers/github-action-ci/stage1.Dockerfile
@@ -37,7 +37,7 @@ RUN cmake -B ./build -G Ninja ./llvm \
-DLLVM_ENABLE_RUNTIMES="compiler-rt" \
-DCMAKE_INSTALL_PREFIX="$LLVM_SYSROOT" \
-DLLVM_ENABLE_PROJECTS="bolt;clang;lld;clang-tools-extra" \
- -DLLVM_DISTRIBUTION_COMPONENTS="lld;compiler-rt;clang-format" \
+ -DLLVM_DISTRIBUTION_COMPONENTS="lld;compiler-rt;clang-format;scan-build" \
-DCLANG_DEFAULT_LINKER="lld" \
-DBOOTSTRAP_CLANG_PGO_TRAINING_DATA_SOURCE_DIR=/llvm-project-llvmorg-$LLVM_VERSION/llvm
diff --git a/bolt/include/bolt/Core/GDBIndex.h b/bolt/include/bolt/Core/GDBIndex.h
new file mode 100644
index 000000000000..6604c2a11472
--- /dev/null
+++ b/bolt/include/bolt/Core/GDBIndex.h
@@ -0,0 +1,61 @@
+//===-- bolt/Core/GDBIndex.h - GDB Index support ----------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// This file contains declaration of classes required for generation of
+/// .gdb_index section.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef BOLT_CORE_GDB_INDEX_H
+#define BOLT_CORE_GDB_INDEX_H
+
+#include "bolt/Core/BinaryContext.h"
+#include <vector>
+
+namespace llvm {
+namespace bolt {
+
+class GDBIndex {
+public:
+ /// Contains information about TU so we can write out correct entries in GDB
+ /// index.
+ struct GDBIndexTUEntry {
+ uint64_t UnitOffset;
+ uint64_t TypeHash;
+ uint64_t TypeDIERelativeOffset;
+ };
+
+private:
+ BinaryContext &BC;
+
+ /// Entries for GDB Index Types CU List.
+ using GDBIndexTUEntryType = std::vector<GDBIndexTUEntry>;
+ GDBIndexTUEntryType GDBIndexTUEntryVector;
+
+public:
+ GDBIndex(BinaryContext &BC) : BC(BC) {}
+
+ std::mutex GDBIndexMutex;
+
+ /// Adds an GDBIndexTUEntry if .gdb_index section exists.
+ void addGDBTypeUnitEntry(const GDBIndexTUEntry &&Entry);
+
+ /// Rewrite .gdb_index section if present.
+ void updateGdbIndexSection(const CUOffsetMap &CUMap, const uint32_t NumCUs,
+ DebugARangesSectionWriter &ARangesSectionWriter);
+
+ /// Returns all entries needed for Types CU list.
+ const GDBIndexTUEntryType &getGDBIndexTUEntryVector() const {
+ return GDBIndexTUEntryVector;
+ }
+};
+
+} // namespace bolt
+} // namespace llvm
+
+#endif
diff --git a/bolt/lib/Core/CMakeLists.txt b/bolt/lib/Core/CMakeLists.txt
index 441df9fe0846..873cf67a5629 100644
--- a/bolt/lib/Core/CMakeLists.txt
+++ b/bolt/lib/Core/CMakeLists.txt
@@ -25,6 +25,7 @@ add_llvm_library(LLVMBOLTCore
DynoStats.cpp
Exceptions.cpp
FunctionLayout.cpp
+ GDBIndex.cpp
HashUtilities.cpp
JumpTable.cpp
MCPlusBuilder.cpp
diff --git a/bolt/lib/Core/GDBIndex.cpp b/bolt/lib/Core/GDBIndex.cpp
new file mode 100644
index 000000000000..9e6d24167d55
--- /dev/null
+++ b/bolt/lib/Core/GDBIndex.cpp
@@ -0,0 +1,185 @@
+//===- bolt/Core/GDBIndex.cpp - GDB Index support ------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "bolt/Core/GDBIndex.h"
+
+using namespace llvm::bolt;
+using namespace llvm::support::endian;
+
+void GDBIndex::addGDBTypeUnitEntry(const GDBIndexTUEntry &&Entry) {
+ std::lock_guard<std::mutex> Lock(GDBIndexMutex);
+ if (!BC.getGdbIndexSection())
+ return;
+ GDBIndexTUEntryVector.emplace_back(Entry);
+}
+
+void GDBIndex::updateGdbIndexSection(
+ const CUOffsetMap &CUMap, const uint32_t NumCUs,
+ DebugARangesSectionWriter &ARangesSectionWriter) {
+ if (!BC.getGdbIndexSection())
+ return;
+
+ // See https://sourceware.org/gdb/onlinedocs/gdb/Index-Section-Format.html
+ // for .gdb_index section format.
+
+ StringRef GdbIndexContents = BC.getGdbIndexSection()->getContents();
+
+ const char *Data = GdbIndexContents.data();
+
+ // Parse the header.
+ const uint32_t Version = read32le(Data);
+ if (Version != 7 && Version != 8) {
+ errs() << "BOLT-ERROR: can only process .gdb_index versions 7 and 8\n";
+ exit(1);
+ }
+
+ // Some .gdb_index generators use file offsets while others use section
+ // offsets. Hence we can only rely on offsets relative to each other,
+ // and ignore their absolute values.
+ const uint32_t CUListOffset = read32le(Data + 4);
+ const uint32_t CUTypesOffset = read32le(Data + 8);
+ const uint32_t AddressTableOffset = read32le(Data + 12);
+ const uint32_t SymbolTableOffset = read32le(Data + 16);
+ const uint32_t ConstantPoolOffset = read32le(Data + 20);
+ Data += 24;
+
+ // Map CUs offsets to indices and verify existing index table.
+ std::map<uint32_t, uint32_t> OffsetToIndexMap;
+ const uint32_t CUListSize = CUTypesOffset - CUListOffset;
+ const uint32_t TUListSize = AddressTableOffset - CUTypesOffset;
+ const unsigned NUmCUsEncoded = CUListSize / 16;
+ unsigned MaxDWARFVersion = BC.DwCtx->getMaxVersion();
+ unsigned NumDWARF5TUs =
+ getGDBIndexTUEntryVector().size() - BC.DwCtx->getNumTypeUnits();
+ bool SkipTypeUnits = false;
+ // For DWARF5 Types are in .debug_info.
+ // LLD doesn't generate Types CU List, and in CU list offset
+ // only includes CUs.
+ // GDB 11+ includes only CUs in CU list and generates Types
+ // list.
+ // GDB 9 includes CUs and TUs in CU list and generates TYpes
+ // list. The NumCUs is CUs + TUs, so need to modify the check.
+ // For split-dwarf
+ // GDB-11, DWARF5: TU units from dwo are not included.
+ // GDB-11, DWARF4: TU units from dwo are included.
+ if (MaxDWARFVersion >= 5)
+ SkipTypeUnits = !TUListSize ? true
+ : ((NUmCUsEncoded + NumDWARF5TUs) ==
+ BC.DwCtx->getNumCompileUnits());
+
+ if (!((CUListSize == NumCUs * 16) ||
+ (CUListSize == (NumCUs + NumDWARF5TUs) * 16))) {
+ errs() << "BOLT-ERROR: .gdb_index: CU count mismatch\n";
+ exit(1);
+ }
+ DenseSet<uint64_t> OriginalOffsets;
+ for (unsigned Index = 0, Units = BC.DwCtx->getNumCompileUnits();
+ Index < Units; ++Index) {
+ const DWARFUnit *CU = BC.DwCtx->getUnitAtIndex(Index);
+ if (SkipTypeUnits && CU->isTypeUnit())
+ continue;
+ const uint64_t Offset = read64le(Data);
+ Data += 16;
+ if (CU->getOffset() != Offset) {
+ errs() << "BOLT-ERROR: .gdb_index CU offset mismatch\n";
+ exit(1);
+ }
+
+ OriginalOffsets.insert(Offset);
+ OffsetToIndexMap[Offset] = Index;
+ }
+
+ // Ignore old address table.
+ const uint32_t OldAddressTableSize = SymbolTableOffset - AddressTableOffset;
+ // Move Data to the beginning of symbol table.
+ Data += SymbolTableOffset - CUTypesOffset;
+
+ // Calculate the size of the new address table.
+ uint32_t NewAddressTableSize = 0;
+ for (const auto &CURangesPair : ARangesSectionWriter.getCUAddressRanges()) {
+ const SmallVector<DebugAddressRange, 2> &Ranges = CURangesPair.second;
+ NewAddressTableSize += Ranges.size() * 20;
+ }
+
+ // Difference between old and new table (and section) sizes.
+ // Could be negative.
+ int32_t Delta = NewAddressTableSize - OldAddressTableSize;
+
+ size_t NewGdbIndexSize = GdbIndexContents.size() + Delta;
+
+ // Free'd by ExecutableFileMemoryManager.
+ auto *NewGdbIndexContents = new uint8_t[NewGdbIndexSize];
+ uint8_t *Buffer = NewGdbIndexContents;
+
+ write32le(Buffer, Version);
+ write32le(Buffer + 4, CUListOffset);
+ write32le(Buffer + 8, CUTypesOffset);
+ write32le(Buffer + 12, AddressTableOffset);
+ write32le(Buffer + 16, SymbolTableOffset + Delta);
+ write32le(Buffer + 20, ConstantPoolOffset + Delta);
+ Buffer += 24;
+
+ using MapEntry = std::pair<uint32_t, CUInfo>;
+ std::vector<MapEntry> CUVector(CUMap.begin(), CUMap.end());
+ // Need to sort since we write out all of TUs in .debug_info before CUs.
+ std::sort(CUVector.begin(), CUVector.end(),
+ [](const MapEntry &E1, const MapEntry &E2) -> bool {
+ return E1.second.Offset < E2.second.Offset;
+ });
+ // Writing out CU List <Offset, Size>
+ for (auto &CUInfo : CUVector) {
+ // Skipping TU for DWARF5 when they are not included in CU list.
+ if (!OriginalOffsets.count(CUInfo.first))
+ continue;
+ write64le(Buffer, CUInfo.second.Offset);
+ // Length encoded in CU doesn't contain first 4 bytes that encode length.
+ write64le(Buffer + 8, CUInfo.second.Length + 4);
+ Buffer += 16;
+ }
+
+ // Rewrite TU CU List, since abbrevs can be different.
+ // Entry example:
+ // 0: offset = 0x00000000, type_offset = 0x0000001e, type_signature =
+ // 0x418503b8111e9a7b Spec says " triplet, the first value is the CU offset,
+ // the second value is the type offset in the CU, and the third value is the
+ // type signature" Looking at what is being generated by gdb-add-index. The
+ // first entry is TU offset, second entry is offset from it, and third entry
+ // is the type signature.
+ if (TUListSize)
+ for (const GDBIndexTUEntry &Entry : getGDBIndexTUEntryVector()) {
+ write64le(Buffer, Entry.UnitOffset);
+ write64le(Buffer + 8, Entry.TypeDIERelativeOffset);
+ write64le(Buffer + 16, Entry.TypeHash);
+ Buffer += sizeof(GDBIndexTUEntry);
+ }
+
+ // Generate new address table.
+ for (const std::pair<const uint64_t, DebugAddressRangesVector> &CURangesPair :
+ ARangesSectionWriter.getCUAddressRanges()) {
+ const uint32_t CUIndex = OffsetToIndexMap[CURangesPair.first];
+ const DebugAddressRangesVector &Ranges = CURangesPair.second;
+ for (const DebugAddressRange &Range : Ranges) {
+ write64le(Buffer, Range.LowPC);
+ write64le(Buffer + 8, Range.HighPC);
+ write32le(Buffer + 16, CUIndex);
+ Buffer += 20;
+ }
+ }
+
+ const size_t TrailingSize =
+ GdbIndexContents.data() + GdbIndexContents.size() - Data;
+ assert(Buffer + TrailingSize == NewGdbIndexContents + NewGdbIndexSize &&
+ "size calculation error");
+
+ // Copy over the rest of the original data.
+ memcpy(Buffer, Data, TrailingSize);
+
+ // Register the new section.
+ BC.registerOrUpdateNoteSection(".gdb_index", NewGdbIndexContents,
+ NewGdbIndexSize);
+}
diff --git a/clang-tools-extra/clang-tidy/misc/CMakeLists.txt b/clang-tools-extra/clang-tidy/misc/CMakeLists.txt
index 35e29b9a7d13..36fcd8fc1b27 100644
--- a/clang-tools-extra/clang-tidy/misc/CMakeLists.txt
+++ b/clang-tools-extra/clang-tidy/misc/CMakeLists.txt
@@ -43,7 +43,6 @@ add_clang_library(clangTidyMiscModule
UseAnonymousNamespaceCheck.cpp
LINK_LIBS
- clangAnalysis
clangTidy
clangTidyUtils
diff --git a/clang-tools-extra/clang-tidy/misc/HeaderIncludeCycleCheck.cpp b/clang-tools-extra/clang-tidy/misc/HeaderIncludeCycleCheck.cpp
index fadfdc869d37..37bc577c646a 100644
--- a/clang-tools-extra/clang-tidy/misc/HeaderIncludeCycleCheck.cpp
+++ b/clang-tools-extra/clang-tidy/misc/HeaderIncludeCycleCheck.cpp
@@ -139,8 +139,10 @@ public:
auto CurrentIt = Files.rbegin();
do {
- Check.diag(CurrentIt->Loc, "'%0' included from here", DiagnosticIDs::Note)
- << CurrentIt->Name;
+ if (CurrentIt->Loc.isValid())
+ Check.diag(CurrentIt->Loc, "'%0' included from here",
+ DiagnosticIDs::Note)
+ << CurrentIt->Name;
} while (CurrentIt++ != It);
}
diff --git a/clang-tools-extra/clang-tidy/readability/ContainerSizeEmptyCheck.cpp b/clang-tools-extra/clang-tidy/readability/ContainerSizeEmptyCheck.cpp
index bbc1b47b97ae..bf7a847dff10 100644
--- a/clang-tools-extra/clang-tidy/readability/ContainerSizeEmptyCheck.cpp
+++ b/clang-tools-extra/clang-tidy/readability/ContainerSizeEmptyCheck.cpp
@@ -96,9 +96,14 @@ AST_MATCHER(QualType, isIntegralType) {
AST_MATCHER_P(UserDefinedLiteral, hasLiteral,
clang::ast_matchers::internal::Matcher<Expr>, InnerMatcher) {
- if (const Expr *CookedLiteral = Node.getCookedLiteral()) {
+ const UserDefinedLiteral::LiteralOperatorKind LOK =
+ Node.getLiteralOperatorKind();
+ if (LOK == UserDefinedLiteral::LOK_Template ||
+ LOK == UserDefinedLiteral::LOK_Raw)
+ return false;
+
+ if (const Expr *CookedLiteral = Node.getCookedLiteral())
return InnerMatcher.matches(*CookedLiteral, Finder, Builder);
- }
return false;
}
diff --git a/clang-tools-extra/docs/ReleaseNotes.rst b/clang-tools-extra/docs/ReleaseNotes.rst
index 6947cf06f6e5..da30aceb8d49 100644
--- a/clang-tools-extra/docs/ReleaseNotes.rst
+++ b/clang-tools-extra/docs/ReleaseNotes.rst
@@ -317,6 +317,10 @@ Changes in existing checks
Additionally, the option `UseHeaderFileExtensions` is removed, so that the
check uses the `HeaderFileExtensions` option unconditionally.
+- Improved :doc:`misc-header-include-cycle
+ <clang-tidy/checks/misc/header-include-cycle>` check by avoiding crash for self
+ include cycles.
+
- Improved :doc:`misc-unused-using-decls
<clang-tidy/checks/misc/unused-using-decls>` check by replacing the local
option `HeaderFileExtensions` by the global option of the same name.
@@ -376,6 +380,7 @@ Changes in existing checks
- Improved :doc:`readability-container-size-empty
<clang-tidy/checks/readability/container-size-empty>` check to prevent false
positives when utilizing ``size`` or ``length`` methods that accept parameter.
+ Fixed crash when facing template user defined literals.
- Improved :doc:`readability-duplicate-include
<clang-tidy/checks/readability/duplicate-include>` check by excluding include
diff --git a/clang-tools-extra/test/clang-tidy/checkers/misc/header-include-cycle.self.cpp b/clang-tools-extra/test/clang-tidy/checkers/misc/header-include-cycle.self.cpp
new file mode 100644
index 000000000000..245dd0a65a8b
--- /dev/null
+++ b/clang-tools-extra/test/clang-tidy/checkers/misc/header-include-cycle.self.cpp
@@ -0,0 +1,3 @@
+// RUN: not clang-tidy %s -checks='-*,misc-header-include-cycle'
+
+#include "header-include-cycle.self.cpp"
diff --git a/clang-tools-extra/test/clang-tidy/checkers/readability/container-size-empty.cpp b/clang-tools-extra/test/clang-tidy/checkers/readability/container-size-empty.cpp
index ecaf97fa348c..46755270b48e 100644
--- a/clang-tools-extra/test/clang-tidy/checkers/readability/container-size-empty.cpp
+++ b/clang-tools-extra/test/clang-tidy/checkers/readability/container-size-empty.cpp
@@ -889,3 +889,9 @@ namespace PR88203 {
// CHECK-FIXES: {{^ }}if (s.empty()) {}{{$}}
}
}
+
+namespace PR94454 {
+ template <char...>
+ int operator""_ci() { return 0; }
+ auto eq = 0_ci == 0;
+}
diff --git a/clang/docs/LanguageExtensions.rst b/clang/docs/LanguageExtensions.rst
index 46f99d0bbdd0..a49e4122ffc1 100644
--- a/clang/docs/LanguageExtensions.rst
+++ b/clang/docs/LanguageExtensions.rst
@@ -4016,6 +4016,30 @@ Note that the `size` argument must be a compile time constant.
Note that this intrinsic cannot yet be called in a ``constexpr`` context.
+``__is_bitwise_cloneable``
+--------------------------
+
+A type trait is used to check whether a type can be safely copied by memcpy.
+
+**Syntax**:
+
+.. code-block:: c++
+
+ bool __is_bitwise_cloneable(Type)
+
+**Description**:
+
+Objects of bitwise cloneable types can be bitwise copied by memcpy/memmove. The
+Clang compiler warrants that this behavior is well defined, and won't be
+broken by compiler optimizations and sanitizers.
+
+For implicit-lifetime types, the lifetime of the new object is implicitly
+started after the copy. For other types (e.g., classes with virtual methods),
+the lifetime isn't started, and using the object results in undefined behavior
+according to the C++ Standard.
+
+This builtin can be used in constant expressions.
+
Atomic Min/Max builtins with memory ordering
--------------------------------------------
diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 69ac08133c9f..b9c9070fcb22 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -340,6 +340,9 @@ Non-comprehensive list of changes in this release
``-Winvalid-constexpr`` is not enabled for the function definition, which
should result in mild compile-time performance improvements.
+- Added ``__is_bitwise_cloneable`` which is used to check whether a type
+ can be safely copied by memcpy/memmove.
+
New Compiler Flags
------------------
- ``-fsanitize=implicit-bitfield-conversion`` checks implicit truncation and
diff --git a/clang/include/clang/AST/Type.h b/clang/include/clang/AST/Type.h
index 263b632df23c..9eb3f6c09e3d 100644
--- a/clang/include/clang/AST/Type.h
+++ b/clang/include/clang/AST/Type.h
@@ -1120,6 +1120,20 @@ public:
/// Return true if this is a trivially copyable type (C++0x [basic.types]p9)
bool isTriviallyCopyableType(const ASTContext &Context) const;
+ /// Return true if the type is safe to bitwise copy using memcpy/memmove.
+ ///
+ /// This is an extension in clang: bitwise cloneable types act as trivially
+ /// copyable types, meaning their underlying bytes can be safely copied by
+ /// memcpy or memmove. After the copy, the destination object has the same
+ /// object representation.
+ ///
+ /// However, there are cases where it is not safe to copy:
+ /// - When sanitizers, such as AddressSanitizer, add padding with poison,
+ /// which can cause issues if those poisoned padding bits are accessed.
+ /// - Types with Objective-C lifetimes, where specific runtime
+ /// semantics may not be preserved during a bitwise copy.
+ bool isBitwiseCloneableType(const ASTContext &Context) const;
+
/// Return true if this is a trivially copyable type
bool isTriviallyCopyConstructibleType(const ASTContext &Context) const;
diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def
index 433c7795325f..9e6800ea814a 100644
--- a/clang/include/clang/Basic/BuiltinsAMDGPU.def
+++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def
@@ -240,7 +240,7 @@ TARGET_BUILTIN(__builtin_amdgcn_flat_atomic_fadd_v2bf16, "V2sV2s*0V2s", "t", "at
TARGET_BUILTIN(__builtin_amdgcn_global_atomic_fadd_v2bf16, "V2sV2s*1V2s", "t", "atomic-global-pk-add-bf16-inst")
TARGET_BUILTIN(__builtin_amdgcn_ds_atomic_fadd_v2bf16, "V2sV2s*3V2s", "t", "atomic-ds-pk-add-16-insts")
TARGET_BUILTIN(__builtin_amdgcn_ds_atomic_fadd_v2f16, "V2hV2h*3V2h", "t", "atomic-ds-pk-add-16-insts")
-TARGET_BUILTIN(__builtin_amdgcn_global_load_lds, "vv*1v*3UiiUi", "t", "gfx940-insts")
+TARGET_BUILTIN(__builtin_amdgcn_global_load_lds, "vv*1v*3IUiIiIUi", "t", "gfx940-insts")
//===----------------------------------------------------------------------===//
// Deep learning builtins.
diff --git a/clang/include/clang/Basic/Cuda.h b/clang/include/clang/Basic/Cuda.h
index d15171d959c4..0d5e38e825aa 100644
--- a/clang/include/clang/Basic/Cuda.h
+++ b/clang/include/clang/Basic/Cuda.h
@@ -124,6 +124,7 @@ enum class CudaArch {
GFX1103,
GFX1150,
GFX1151,
+ GFX1152,
GFX12_GENERIC,
GFX1200,
GFX1201,
diff --git a/clang/include/clang/Basic/DiagnosticOptions.h b/clang/include/clang/Basic/DiagnosticOptions.h
index 099982c3bdd5..30141c2b8f44 100644
--- a/clang/include/clang/Basic/DiagnosticOptions.h
+++ b/clang/include/clang/Basic/DiagnosticOptions.h
@@ -124,7 +124,7 @@ public:
/// default).
std::vector<std::string> VerifyPrefixes;
- /// The list of -Wsystem-header-in-module=... options used to override
+ /// The list of -Wsystem-headers-in-module=... options used to override
/// whether -Wsystem-headers is enabled on a per-module basis.
std::vector<std::string> SystemHeaderWarningsModules;
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index 87745140cb0e..9f0b6f5a3638 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -10082,6 +10082,12 @@ def warn_new_dangling_initializer_list : Warning<
"the allocated initializer list}0 "
"will be destroyed at the end of the full-expression">,
InGroup<DanglingInitializerList>;
+def warn_unsupported_lifetime_extension : Warning<
+ "lifetime extension of "
+ "%select{temporary|backing array of initializer list}0 created "
+ "by aggregate initialization using a default member initializer "
+ "is not yet supported; lifetime of %select{temporary|backing array}0 "
+ "will end at the end of the full-expression">, InGroup<Dangling>;
// For non-floating point, expressions of the form x == x or x != x
// should result in a warning, since these always evaluate to a constant.
diff --git a/clang/include/clang/Basic/TokenKinds.def b/clang/include/clang/Basic/TokenKinds.def
index b5a0e9df9f7a..9c4b17465e18 100644
--- a/clang/include/clang/Basic/TokenKinds.def
+++ b/clang/include/clang/Basic/TokenKinds.def
@@ -542,6 +542,8 @@ TYPE_TRAIT_2(__reference_converts_from_temporary, ReferenceConvertsFromTemporary
// is not exposed to users.
TYPE_TRAIT_2(/*EmptySpellingName*/, IsDeducible, KEYCXX)
+TYPE_TRAIT_1(__is_bitwise_cloneable, IsBitwiseCloneable, KEYALL)
+
// Embarcadero Expression Traits
EXPRESSION_TRAIT(__is_lvalue_expr, IsLValueExpr, KEYCXX)
EXPRESSION_TRAIT(__is_rvalue_expr, IsRValueExpr, KEYCXX)
diff --git a/clang/include/clang/Basic/riscv_vector.td b/clang/include/clang/Basic/riscv_vector.td
index cca4367751b9..a0820e2093bc 100644
--- a/clang/include/clang/Basic/riscv_vector.td
+++ b/clang/include/clang/Basic/riscv_vector.td
@@ -2637,7 +2637,8 @@ let UnMaskedPolicyScheme = HasPassthruOperand in {
defm vbrev : RVVOutBuiltinSetZvbb;
defm vclz : RVVOutBuiltinSetZvbb;
defm vctz : RVVOutBuiltinSetZvbb;
- defm vcpopv : RVVOutBuiltinSetZvbb;
+ let IRName = "vcpopv", MaskedIRName = "vcpopv_mask" in
+ defm vcpop : RVVOutBuiltinSetZvbb;
let OverloadedName = "vwsll" in
defm vwsll : RVVSignedWidenBinBuiltinSetVwsll;
}
diff --git a/clang/lib/AST/CMakeLists.txt b/clang/lib/AST/CMakeLists.txt
index 3faefb54f599..a5d3dacfc1a8 100644
--- a/clang/lib/AST/CMakeLists.txt
+++ b/clang/lib/AST/CMakeLists.txt
@@ -87,6 +87,7 @@ add_clang_library(clangAST
Interp/Record.cpp
Interp/Source.cpp
Interp/State.cpp
+ Interp/MemberPointer.cpp
Interp/InterpShared.cpp
ItaniumCXXABI.cpp
ItaniumMangle.cpp
diff --git a/clang/lib/AST/Interp/ByteCodeExprGen.cpp b/clang/lib/AST/Interp/ByteCodeExprGen.cpp
index 3671c41ae703..d124248a3605 100644
--- a/clang/lib/AST/Interp/ByteCodeExprGen.cpp
+++ b/clang/lib/AST/Interp/ByteCodeExprGen.cpp
@@ -100,6 +100,35 @@ bool ByteCodeExprGen<Emitter>::VisitCastExpr(const CastExpr *CE) {
return this->emitMemcpy(CE);
}
+ case CK_DerivedToBaseMemberPointer: {
+ assert(classifyPrim(CE->getType()) == PT_MemberPtr);
+ assert(classifyPrim(SubExpr->getType()) == PT_MemberPtr);
+ const auto *FromMP = SubExpr->getType()->getAs<MemberPointerType>();
+ const auto *ToMP = CE->getType()->getAs<MemberPointerType>();
+
+ unsigned DerivedOffset = collectBaseOffset(QualType(ToMP->getClass(), 0),
+ QualType(FromMP->getClass(), 0));
+
+ if (!this->visit(SubExpr))
+ return false;
+
+ return this->emitGetMemberPtrBasePop(DerivedOffset, CE);
+ }
+
+ case CK_BaseToDerivedMemberPointer: {
+ assert(classifyPrim(CE) == PT_MemberPtr);
+ assert(classifyPrim(SubExpr) == PT_MemberPtr);
+ const auto *FromMP = SubExpr->getType()->getAs<MemberPointerType>();
+ const auto *ToMP = CE->getType()->getAs<MemberPointerType>();
+
+ unsigned DerivedOffset = collectBaseOffset(QualType(FromMP->getClass(), 0),
+ QualType(ToMP->getClass(), 0));
+
+ if (!this->visit(SubExpr))
+ return false;
+ return this->emitGetMemberPtrBasePop(-DerivedOffset, CE);
+ }
+
case CK_UncheckedDerivedToBase:
case CK_DerivedToBase: {
if (!this->visit(SubExpr))
@@ -187,7 +216,8 @@ bool ByteCodeExprGen<Emitter>::VisitCastExpr(const CastExpr *CE) {
return this->emitCastFloatingIntegral(*ToT, CE);
}
- case CK_NullToPointer: {
+ case CK_NullToPointer:
+ case CK_NullToMemberPointer: {
if (DiscardResult)
return true;
@@ -326,7 +356,8 @@ bool ByteCodeExprGen<Emitter>::VisitCastExpr(const CastExpr *CE) {
return this->emitCast(*FromT, *ToT, CE);
}
- case CK_PointerToBoolean: {
+ case CK_PointerToBoolean:
+ case CK_MemberPointerToBoolean: {
PrimType PtrT = classifyPrim(SubExpr->getType());
// Just emit p != nullptr for this.
@@ -534,8 +565,23 @@ bool ByteCodeExprGen<Emitter>::VisitBinaryOperator(const BinaryOperator *BO) {
BO->isComparisonOp())
return this->emitComplexComparison(LHS, RHS, BO);
- if (BO->isPtrMemOp())
- return this->visit(RHS);
+ if (BO->isPtrMemOp()) {
+ if (!this->visit(LHS))
+ return false;
+
+ if (!this->visit(RHS))
+ return false;
+
+ if (!this->emitToMemberPtr(BO))
+ return false;
+
+ if (classifyPrim(BO) == PT_MemberPtr)
+ return true;
+
+ if (!this->emitCastMemberPtrPtr(BO))
+ return false;
+ return DiscardResult ? this->emitPopPtr(BO) : true;
+ }
// Typecheck the args.
std::optional<PrimType> LT = classify(LHS->getType());
@@ -2773,6 +2819,8 @@ bool ByteCodeExprGen<Emitter>::visitZeroInitializer(PrimType T, QualType QT,
return this->emitNullPtr(nullptr, E);
case PT_FnPtr:
return this->emitNullFnPtr(nullptr, E);
+ case PT_MemberPtr:
+ return this->emitNullMemberPtr(nullptr, E);
case PT_Float: {
return this->emitConstFloat(APFloat::getZero(Ctx.getFloatSemantics(QT)), E);
}
@@ -2875,6 +2923,7 @@ bool ByteCodeExprGen<Emitter>::emitConst(T Value, PrimType Ty, const Expr *E) {
return this->emitConstBool(Value, E);
case PT_Ptr:
case PT_FnPtr:
+ case PT_MemberPtr:
case PT_Float:
case PT_IntAP:
case PT_IntAPS:
@@ -3188,7 +3237,7 @@ bool ByteCodeExprGen<Emitter>::visitAPValueInitializer(const APValue &Val,
const APValue &F = Val.getStructField(I);
const Record::Field *RF = R->getField(I);
- if (F.isInt()) {
+ if (F.isInt() || F.isLValue()) {
PrimType T = classifyPrim(RF->Decl->getType());
if (!this->visitAPValue(F, T, E))
return false;
@@ -3308,10 +3357,27 @@ bool ByteCodeExprGen<Emitter>::VisitCallExpr(const CallExpr *E) {
}
}
+ std::optional<unsigned> CalleeOffset;
// Add the (optional, implicit) This pointer.
if (const auto *MC = dyn_cast<CXXMemberCallExpr>(E)) {
- if (!this->visit(MC->getImplicitObjectArgument()))
+ if (!FuncDecl && classifyPrim(E->getCallee()) == PT_MemberPtr) {
+ // If we end up creating a CallPtr op for this, we need the base of the
+ // member pointer as the instance pointer, and later extract the function
+ // decl as the function pointer.
+ const Expr *Callee = E->getCallee();
+ CalleeOffset =
+ this->allocateLocalPrimitive(Callee, PT_MemberPtr, true, false);
+ if (!this->visit(Callee))
+ return false;
+ if (!this->emitSetLocal(PT_MemberPtr, *CalleeOffset, E))
+ return false;
+ if (!this->emitGetLocal(PT_MemberPtr, *CalleeOffset, E))
+ return false;
+ if (!this->emitGetMemberPtrBase(E))
+ return false;
+ } else if (!this->visit(MC->getImplicitObjectArgument())) {
return false;
+ }
}
llvm::BitVector NonNullArgs = collectNonNullArgs(FuncDecl, Args);
@@ -3380,11 +3446,22 @@ bool ByteCodeExprGen<Emitter>::VisitCallExpr(const CallExpr *E) {
for (unsigned I = 0, N = E->getNumArgs(); I != N; ++I)
ArgSize += align(primSize(classify(E->getArg(I)).value_or(PT_Ptr)));
- if (!this->visit(E->getCallee()))
- return false;
+ // Get the callee, either from a member pointer saved in CalleeOffset,
+ // or by just visiting the Callee expr.
+ if (CalleeOffset) {
+ if (!this->emitGetLocal(PT_MemberPtr, *CalleeOffset, E))
+ return false;
+ if (!this->emitGetMemberPtrDecl(E))
+ return false;
+ if (!this->emitCallPtr(ArgSize, E, E))
+ return false;
+ } else {
+ if (!this->visit(E->getCallee()))
+ return false;
- if (!this->emitCallPtr(ArgSize, E, E))
- return false;
+ if (!this->emitCallPtr(ArgSize, E, E))
+ return false;
+ }
}
// Cleanup for discarded return values.
@@ -3623,6 +3700,11 @@ bool ByteCodeExprGen<Emitter>::VisitUnaryOperator(const UnaryOperator *E) {
return false;
return DiscardResult ? this->emitPop(*T, E) : true;
case UO_AddrOf: // &x
+ if (E->getType()->isMemberPointerType()) {
+ // C++11 [expr.unary.op]p3 has very strict rules on how the address of a
+ // member can be formed.
+ return this->emitGetMemberPtr(cast<DeclRefExpr>(SubExpr)->getDecl(), E);
+ }
// We should already have a pointer when we get here.
return this->delegate(SubExpr);
case UO_Deref: // *x
diff --git a/clang/lib/AST/Interp/Context.cpp b/clang/lib/AST/Interp/Context.cpp
index b0b22b059b77..98d1837204eb 100644
--- a/clang/lib/AST/Interp/Context.cpp
+++ b/clang/lib/AST/Interp/Context.cpp
@@ -163,8 +163,12 @@ std::optional<PrimType> Context::classify(QualType T) const {
if (T->isFloatingType())
return PT_Float;
+ if (T->isSpecificBuiltinType(BuiltinType::BoundMember) ||
+ T->isMemberPointerType())
+ return PT_MemberPtr;
+
if (T->isFunctionPointerType() || T->isFunctionReferenceType() ||
- T->isFunctionType() || T->isSpecificBuiltinType(BuiltinType::BoundMember))
+ T->isFunctionType())
return PT_FnPtr;
if (T->isReferenceType() || T->isPointerType() ||
@@ -177,9 +181,6 @@ std::optional<PrimType> Context::classify(QualType T) const {
if (const auto *DT = dyn_cast<DecltypeType>(T))
return classify(DT->getUnderlyingType());
- if (const auto *DT = dyn_cast<MemberPointerType>(T))
- return classify(DT->getPointeeType());
-
return std::nullopt;
}
@@ -292,10 +293,12 @@ unsigned Context::collectBaseOffset(const RecordDecl *BaseDecl,
}
if (CurDecl == FinalDecl)
break;
-
- // break;
}
assert(OffsetSum > 0);
return OffsetSum;
}
+
+const Record *Context::getRecord(const RecordDecl *D) const {
+ return P->getOrCreateRecord(D);
+}
diff --git a/clang/lib/AST/Interp/Context.h b/clang/lib/AST/Interp/Context.h
index 360e9499d084..c78dc9a2a471 100644
--- a/clang/lib/AST/Interp/Context.h
+++ b/clang/lib/AST/Interp/Context.h
@@ -107,6 +107,8 @@ public:
unsigned collectBaseOffset(const RecordDecl *BaseDecl,
const RecordDecl *DerivedDecl) const;
+ const Record *getRecord(const RecordDecl *D) const;
+
private:
/// Runs a function.
bool Run(State &Parent, const Function *Func, APValue &Result);
diff --git a/clang/lib/AST/Interp/Descriptor.cpp b/clang/lib/AST/Interp/Descriptor.cpp
index 746b765ca421..d20ab1340c89 100644
--- a/clang/lib/AST/Interp/Descriptor.cpp
+++ b/clang/lib/AST/Interp/Descriptor.cpp
@@ -11,6 +11,7 @@
#include "Floating.h"
#include "FunctionPointer.h"
#include "IntegralAP.h"
+#include "MemberPointer.h"
#include "Pointer.h"
#include "PrimType.h"
#include "Record.h"
diff --git a/clang/lib/AST/Interp/Disasm.cpp b/clang/lib/AST/Interp/Disasm.cpp
index 3f8a92ed2f0b..0ab84d159c58 100644
--- a/clang/lib/AST/Interp/Disasm.cpp
+++ b/clang/lib/AST/Interp/Disasm.cpp
@@ -19,6 +19,7 @@
#include "Integral.h"
#include "IntegralAP.h"
#include "InterpFrame.h"
+#include "MemberPointer.h"
#include "Opcode.h"
#include "PrimType.h"
#include "Program.h"
@@ -122,6 +123,8 @@ static const char *primTypeToString(PrimType T) {
return "Ptr";
case PT_FnPtr:
return "FnPtr";
+ case PT_MemberPtr:
+ return "MemberPtr";
}
llvm_unreachable("Unhandled PrimType");
}
diff --git a/clang/lib/AST/Interp/Function.cpp b/clang/lib/AST/Interp/Function.cpp
index 1d04998d5dd1..00f5a1fced53 100644
--- a/clang/lib/AST/Interp/Function.cpp
+++ b/clang/lib/AST/Interp/Function.cpp
@@ -40,7 +40,8 @@ SourceInfo Function::getSource(CodePtr PC) const {
unsigned Offset = PC - getCodeBegin();
using Elem = std::pair<unsigned, SourceInfo>;
auto It = llvm::lower_bound(SrcMap, Elem{Offset, {}}, llvm::less_first());
- assert(It != SrcMap.end());
+ if (It == SrcMap.end())
+ return SrcMap.back().second;
return It->second;
}
diff --git a/clang/lib/AST/Interp/Interp.cpp b/clang/lib/AST/Interp/Interp.cpp
index 145fa65791da..49015b1dd63d 100644
--- a/clang/lib/AST/Interp/Interp.cpp
+++ b/clang/lib/AST/Interp/Interp.cpp
@@ -373,6 +373,26 @@ bool CheckSubobject(InterpState &S, CodePtr OpPC, const Pointer &Ptr,
return false;
}
+bool CheckDowncast(InterpState &S, CodePtr OpPC, const Pointer &Ptr,
+ uint32_t Offset) {
+ uint32_t MinOffset = Ptr.getDeclDesc()->getMetadataSize();
+ uint32_t PtrOffset = Ptr.getByteOffset();
+
+ // We subtract Offset from PtrOffset. The result must be at least
+ // MinOffset.
+ if (Offset < PtrOffset && (PtrOffset - Offset) >= MinOffset)
+ return true;
+
+ const auto *E = cast<CastExpr>(S.Current->getExpr(OpPC));
+ QualType TargetQT = E->getType()->getPointeeType();
+ QualType MostDerivedQT = Ptr.getDeclPtr().getType();
+
+ S.CCEDiag(E, diag::note_constexpr_invalid_downcast)
+ << MostDerivedQT << TargetQT;
+
+ return false;
+}
+
bool CheckConst(InterpState &S, CodePtr OpPC, const Pointer &Ptr) {
assert(Ptr.isLive() && "Pointer is not live");
if (!Ptr.isConst())
@@ -493,10 +513,12 @@ bool CheckStore(InterpState &S, CodePtr OpPC, const Pointer &Ptr) {
bool CheckInvoke(InterpState &S, CodePtr OpPC, const Pointer &Ptr) {
if (!CheckLive(S, OpPC, Ptr, AK_MemberCall))
return false;
- if (!CheckExtern(S, OpPC, Ptr))
- return false;
- if (!CheckRange(S, OpPC, Ptr, AK_MemberCall))
- return false;
+ if (!Ptr.isDummy()) {
+ if (!CheckExtern(S, OpPC, Ptr))
+ return false;
+ if (!CheckRange(S, OpPC, Ptr, AK_MemberCall))
+ return false;
+ }
return true;
}
@@ -516,7 +538,7 @@ bool CheckCallable(InterpState &S, CodePtr OpPC, const Function *F) {
return false;
}
- if (!F->isConstexpr()) {
+ if (!F->isConstexpr() || !F->hasBody()) {
const SourceLocation &Loc = S.Current->getLocation(OpPC);
if (S.getLangOpts().CPlusPlus11) {
const FunctionDecl *DiagDecl = F->getDecl();
@@ -550,9 +572,10 @@ bool CheckCallable(InterpState &S, CodePtr OpPC, const Function *F) {
S.checkingPotentialConstantExpression())
return false;
- // If the declaration is defined _and_ declared 'constexpr', the below
- // diagnostic doesn't add anything useful.
- if (DiagDecl->isDefined() && DiagDecl->isConstexpr())
+ // If the declaration is defined, declared 'constexpr' _and_ has a body,
+ // the below diagnostic doesn't add anything useful.
+ if (DiagDecl->isDefined() && DiagDecl->isConstexpr() &&
+ DiagDecl->hasBody())
return false;
S.FFDiag(Loc, diag::note_constexpr_invalid_function, 1)
diff --git a/clang/lib/AST/Interp/Interp.h b/clang/lib/AST/Interp/Interp.h
index eca1792e6471..98caea5c7014 100644
--- a/clang/lib/AST/Interp/Interp.h
+++ b/clang/lib/AST/Interp/Interp.h
@@ -20,6 +20,7 @@
#include "InterpFrame.h"
#include "InterpStack.h"
#include "InterpState.h"
+#include "MemberPointer.h"
#include "Opcode.h"
#include "PrimType.h"
#include "Program.h"
@@ -75,6 +76,11 @@ bool CheckRange(InterpState &S, CodePtr OpPC, const Pointer &Ptr,
bool CheckSubobject(InterpState &S, CodePtr OpPC, const Pointer &Ptr,
CheckSubobjectKind CSK);
+/// Checks if the dowcast using the given offset is possible with the given
+/// pointer.
+bool CheckDowncast(InterpState &S, CodePtr OpPC, const Pointer &Ptr,
+ uint32_t Offset);
+
/// Checks if a pointer points to const storage.
bool CheckConst(InterpState &S, CodePtr OpPC, const Pointer &Ptr);
@@ -725,6 +731,9 @@ using CompareFn = llvm::function_ref<bool(ComparisonCategoryResult)>;
template <typename T>
bool CmpHelper(InterpState &S, CodePtr OpPC, CompareFn Fn) {
+ assert((!std::is_same_v<T, MemberPointer>) &&
+ "Non-equality comparisons on member pointer types should already be "
+ "rejected in Sema.");
using BoolT = PrimConv<PT_Bool>::T;
const T &RHS = S.Stk.pop<T>();
const T &LHS = S.Stk.pop<T>();
@@ -834,6 +843,47 @@ inline bool CmpHelperEQ<Pointer>(InterpState &S, CodePtr OpPC, CompareFn Fn) {
}
}
+template <>
+inline bool CmpHelperEQ<MemberPointer>(InterpState &S, CodePtr OpPC,
+ CompareFn Fn) {
+ const auto &RHS = S.Stk.pop<MemberPointer>();
+ const auto &LHS = S.Stk.pop<MemberPointer>();
+
+ // If either operand is a pointer to a weak function, the comparison is not
+ // constant.
+ for (const auto &MP : {LHS, RHS}) {
+ if (const CXXMethodDecl *MD = MP.getMemberFunction(); MD && MD->isWeak()) {
+ const SourceInfo &Loc = S.Current->getSource(OpPC);
+ S.FFDiag(Loc, diag::note_constexpr_mem_pointer_weak_comparison) << MD;
+ return false;
+ }
+ }
+
+ // C++11 [expr.eq]p2:
+ // If both operands are null, they compare equal. Otherwise if only one is
+ // null, they compare unequal.
+ if (LHS.isZero() && RHS.isZero()) {
+ S.Stk.push<Boolean>(Fn(ComparisonCategoryResult::Equal));
+ return true;
+ }
+ if (LHS.isZero() || RHS.isZero()) {
+ S.Stk.push<Boolean>(Fn(ComparisonCategoryResult::Unordered));
+ return true;
+ }
+
+ // We cannot compare against virtual declarations at compile time.
+ for (const auto &MP : {LHS, RHS}) {
+ if (const CXXMethodDecl *MD = MP.getMemberFunction();
+ MD && MD->isVirtual()) {
+ const SourceInfo &Loc = S.Current->getSource(OpPC);
+ S.CCEDiag(Loc, diag::note_constexpr_compare_virtual_mem_ptr) << MD;
+ }
+ }
+
+ S.Stk.push<Boolean>(Boolean::from(Fn(LHS.compare(RHS))));
+ return true;
+}
+
template <PrimType Name, class T = typename PrimConv<Name>::T>
bool EQ(InterpState &S, CodePtr OpPC) {
return CmpHelperEQ<T>(S, OpPC, [](ComparisonCategoryResult R) {
@@ -1300,6 +1350,9 @@ inline bool GetPtrDerivedPop(InterpState &S, CodePtr OpPC, uint32_t Off) {
return false;
if (!CheckSubobject(S, OpPC, Ptr, CSK_Derived))
return false;
+ if (!CheckDowncast(S, OpPC, Ptr, Off))
+ return false;
+
S.Stk.push<Pointer>(Ptr.atFieldSub(Off));
return true;
}
@@ -1324,6 +1377,12 @@ inline bool GetPtrBasePop(InterpState &S, CodePtr OpPC, uint32_t Off) {
return true;
}
+inline bool GetMemberPtrBasePop(InterpState &S, CodePtr OpPC, int32_t Off) {
+ const auto &Ptr = S.Stk.pop<MemberPointer>();
+ S.Stk.push<MemberPointer>(Ptr.atInstanceBase(Off));
+ return true;
+}
+
inline bool GetPtrThisBase(InterpState &S, CodePtr OpPC, uint32_t Off) {
if (S.checkingPotentialConstantExpression())
return false;
@@ -1532,6 +1591,24 @@ inline bool Memcpy(InterpState &S, CodePtr OpPC) {
return DoMemcpy(S, OpPC, Src, Dest);
}
+inline bool ToMemberPtr(InterpState &S, CodePtr OpPC) {
+ const auto &Member = S.Stk.pop<MemberPointer>();
+ const auto &Base = S.Stk.pop<Pointer>();
+
+ S.Stk.push<MemberPointer>(Member.takeInstance(Base));
+ return true;
+}
+
+inline bool CastMemberPtrPtr(InterpState &S, CodePtr OpPC) {
+ const auto &MP = S.Stk.pop<MemberPointer>();
+
+ if (std::optional<Pointer> Ptr = MP.toPointer(S.Ctx)) {
+ S.Stk.push<Pointer>(*Ptr);
+ return true;
+ }
+ return false;
+}
+
//===----------------------------------------------------------------------===//
// AddOffset, SubOffset
//===----------------------------------------------------------------------===//
@@ -1696,8 +1773,10 @@ inline bool SubPtr(InterpState &S, CodePtr OpPC) {
return true;
}
- T A = T::from(LHS.getIndex());
- T B = T::from(RHS.getIndex());
+ T A = LHS.isElementPastEnd() ? T::from(LHS.getNumElems())
+ : T::from(LHS.getIndex());
+ T B = RHS.isElementPastEnd() ? T::from(RHS.getNumElems())
+ : T::from(RHS.getIndex());
return AddSubMulHelper<T, T::sub, std::minus>(S, OpPC, A.bitWidth(), A, B);
}
@@ -2115,7 +2194,7 @@ inline bool ArrayDecay(InterpState &S, CodePtr OpPC) {
if (!CheckRange(S, OpPC, Ptr, CSK_ArrayToPointer))
return false;
- if (!Ptr.isUnknownSizeArray() || Ptr.isDummy()) {
+ if (Ptr.isRoot() || !Ptr.isUnknownSizeArray() || Ptr.isDummy()) {
S.Stk.push<Pointer>(Ptr.atIndex(0));
return true;
}
@@ -2329,6 +2408,28 @@ inline bool GetIntPtr(InterpState &S, CodePtr OpPC, const Descriptor *Desc) {
return true;
}
+inline bool GetMemberPtr(InterpState &S, CodePtr OpPC, const Decl *D) {
+ S.Stk.push<MemberPointer>(D);
+ return true;
+}
+
+inline bool GetMemberPtrBase(InterpState &S, CodePtr OpPC) {
+ const auto &MP = S.Stk.pop<MemberPointer>();
+
+ S.Stk.push<Pointer>(MP.getBase());
+ return true;
+}
+
+inline bool GetMemberPtrDecl(InterpState &S, CodePtr OpPC) {
+ const auto &MP = S.Stk.pop<MemberPointer>();
+
+ const auto *FD = cast<FunctionDecl>(MP.getDecl());
+ const auto *Func = S.getContext().getOrCreateFunction(FD);
+
+ S.Stk.push<FunctionPointer>(Func);
+ return true;
+}
+
/// Just emit a diagnostic. The expression that caused emission of this
/// op is not valid in a constant context.
inline bool Invalid(InterpState &S, CodePtr OpPC) {
diff --git a/clang/lib/AST/Interp/InterpFrame.cpp b/clang/lib/AST/Interp/InterpFrame.cpp
index 51b0bd5c1551..54ccf9034c7a 100644
--- a/clang/lib/AST/Interp/InterpFrame.cpp
+++ b/clang/lib/AST/Interp/InterpFrame.cpp
@@ -12,6 +12,7 @@
#include "Function.h"
#include "InterpStack.h"
#include "InterpState.h"
+#include "MemberPointer.h"
#include "Pointer.h"
#include "PrimType.h"
#include "Program.h"
diff --git a/clang/lib/AST/Interp/InterpStack.cpp b/clang/lib/AST/Interp/InterpStack.cpp
index 91fe40feb767..c7024740d322 100644
--- a/clang/lib/AST/Interp/InterpStack.cpp
+++ b/clang/lib/AST/Interp/InterpStack.cpp
@@ -10,6 +10,7 @@
#include "Boolean.h"
#include "Floating.h"
#include "Integral.h"
+#include "MemberPointer.h"
#include "Pointer.h"
#include <cassert>
#include <cstdlib>
diff --git a/clang/lib/AST/Interp/InterpStack.h b/clang/lib/AST/Interp/InterpStack.h
index 3fd0f63c781f..9d85503b851b 100644
--- a/clang/lib/AST/Interp/InterpStack.h
+++ b/clang/lib/AST/Interp/InterpStack.h
@@ -15,6 +15,7 @@
#include "FunctionPointer.h"
#include "IntegralAP.h"
+#include "MemberPointer.h"
#include "PrimType.h"
#include <memory>
#include <vector>
@@ -188,6 +189,8 @@ private:
return PT_IntAP;
else if constexpr (std::is_same_v<T, IntegralAP<false>>)
return PT_IntAP;
+ else if constexpr (std::is_same_v<T, MemberPointer>)
+ return PT_MemberPtr;
llvm_unreachable("unknown type push()'ed into InterpStack");
}
diff --git a/clang/lib/AST/Interp/MemberPointer.cpp b/clang/lib/AST/Interp/MemberPointer.cpp
new file mode 100644
index 000000000000..96f63643e83c
--- /dev/null
+++ b/clang/lib/AST/Interp/MemberPointer.cpp
@@ -0,0 +1,76 @@
+//===------------------------- MemberPointer.cpp ----------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "MemberPointer.h"
+#include "Context.h"
+#include "FunctionPointer.h"
+#include "Program.h"
+#include "Record.h"
+
+namespace clang {
+namespace interp {
+
+std::optional<Pointer> MemberPointer::toPointer(const Context &Ctx) const {
+ if (!Dcl || isa<FunctionDecl>(Dcl))
+ return Base;
+ const FieldDecl *FD = cast<FieldDecl>(Dcl);
+ assert(FD);
+
+ if (!Base.isBlockPointer())
+ return std::nullopt;
+
+ Pointer CastedBase =
+ (PtrOffset < 0 ? Base.atField(-PtrOffset) : Base.atFieldSub(PtrOffset));
+
+ const Record *BaseRecord = CastedBase.getRecord();
+ if (!BaseRecord)
+ return std::nullopt;
+
+ assert(BaseRecord);
+ if (FD->getParent() == BaseRecord->getDecl())
+ return CastedBase.atField(BaseRecord->getField(FD)->Offset);
+
+ const RecordDecl *FieldParent = FD->getParent();
+ const Record *FieldRecord = Ctx.getRecord(FieldParent);
+
+ unsigned Offset = 0;
+ Offset += FieldRecord->getField(FD)->Offset;
+ Offset += CastedBase.block()->getDescriptor()->getMetadataSize();
+
+ if (Offset > CastedBase.block()->getSize())
+ return std::nullopt;
+
+ if (const RecordDecl *BaseDecl = Base.getDeclPtr().getRecord()->getDecl();
+ BaseDecl != FieldParent)
+ Offset += Ctx.collectBaseOffset(FieldParent, BaseDecl);
+
+ if (Offset > CastedBase.block()->getSize())
+ return std::nullopt;
+
+ assert(Offset <= CastedBase.block()->getSize());
+ return Pointer(const_cast<Block *>(Base.block()), Offset, Offset);
+}
+
+FunctionPointer MemberPointer::toFunctionPointer(const Context &Ctx) const {
+ return FunctionPointer(Ctx.getProgram().getFunction(cast<FunctionDecl>(Dcl)));
+}
+
+APValue MemberPointer::toAPValue() const {
+ if (isZero())
+ return APValue(static_cast<ValueDecl *>(nullptr), /*IsDerivedMember=*/false,
+ /*Path=*/{});
+
+ if (hasBase())
+ return Base.toAPValue();
+
+ return APValue(cast<ValueDecl>(getDecl()), /*IsDerivedMember=*/false,
+ /*Path=*/{});
+}
+
+} // namespace interp
+} // namespace clang
diff --git a/clang/lib/AST/Interp/MemberPointer.h b/clang/lib/AST/Interp/MemberPointer.h
new file mode 100644
index 000000000000..5c61f6a43957
--- /dev/null
+++ b/clang/lib/AST/Interp/MemberPointer.h
@@ -0,0 +1,112 @@
+//===------------------------- MemberPointer.h ------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_AST_INTERP_MEMBER_POINTER_H
+#define LLVM_CLANG_AST_INTERP_MEMBER_POINTER_H
+
+#include "Pointer.h"
+#include <optional>
+
+namespace clang {
+class ASTContext;
+namespace interp {
+
+class Context;
+class FunctionPointer;
+
+class MemberPointer final {
+private:
+ Pointer Base;
+ const Decl *Dcl = nullptr;
+ int32_t PtrOffset = 0;
+
+ MemberPointer(Pointer Base, const Decl *Dcl, int32_t PtrOffset)
+ : Base(Base), Dcl(Dcl), PtrOffset(PtrOffset) {}
+
+public:
+ MemberPointer() = default;
+ MemberPointer(Pointer Base, const Decl *Dcl) : Base(Base), Dcl(Dcl) {}
+ MemberPointer(uint32_t Address, const Descriptor *D) {
+ // We only reach this for Address == 0, when creating a null member pointer.
+ assert(Address == 0);
+ }
+
+ MemberPointer(const Decl *D) : Dcl(D) {
+ assert((isa<FieldDecl, IndirectFieldDecl, CXXMethodDecl>(D)));
+ }
+
+ uint64_t getIntegerRepresentation() const {
+ assert(
+ false &&
+ "getIntegerRepresentation() shouldn't be reachable for MemberPointers");
+ return 17;
+ }
+
+ std::optional<Pointer> toPointer(const Context &Ctx) const;
+
+ FunctionPointer toFunctionPointer(const Context &Ctx) const;
+
+ Pointer getBase() const {
+ if (PtrOffset < 0)
+ return Base.atField(-PtrOffset);
+ return Base.atFieldSub(PtrOffset);
+ }
+ bool isMemberFunctionPointer() const {
+ return isa_and_nonnull<CXXMethodDecl>(Dcl);
+ }
+ const CXXMethodDecl *getMemberFunction() const {
+ return dyn_cast_if_present<CXXMethodDecl>(Dcl);
+ }
+ const FieldDecl *getField() const {
+ return dyn_cast_if_present<FieldDecl>(Dcl);
+ }
+
+ bool hasDecl() const { return Dcl; }
+ const Decl *getDecl() const { return Dcl; }
+
+ MemberPointer atInstanceBase(unsigned Offset) const {
+ if (Base.isZero())
+ return MemberPointer(Base, Dcl, Offset);
+ return MemberPointer(this->Base, Dcl, Offset + PtrOffset);
+ }
+
+ MemberPointer takeInstance(Pointer Instance) const {
+ assert(this->Base.isZero());
+ return MemberPointer(Instance, this->Dcl, this->PtrOffset);
+ }
+
+ APValue toAPValue() const;
+
+ bool isZero() const { return Base.isZero() && !Dcl; }
+ bool hasBase() const { return !Base.isZero(); }
+
+ void print(llvm::raw_ostream &OS) const {
+ OS << "MemberPtr(" << Base << " " << (void *)Dcl << " + " << PtrOffset
+ << ")";
+ }
+
+ std::string toDiagnosticString(const ASTContext &Ctx) const {
+ return "FIXME";
+ }
+
+ ComparisonCategoryResult compare(const MemberPointer &RHS) const {
+ if (this->Dcl == RHS.Dcl)
+ return ComparisonCategoryResult::Equal;
+ return ComparisonCategoryResult::Unordered;
+ }
+};
+
+inline llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, MemberPointer FP) {
+ FP.print(OS);
+ return OS;
+}
+
+} // namespace interp
+} // namespace clang
+
+#endif
diff --git a/clang/lib/AST/Interp/Opcodes.td b/clang/lib/AST/Interp/Opcodes.td
index cfbd7f93c32d..cb4f299c8d51 100644
--- a/clang/lib/AST/Interp/Opcodes.td
+++ b/clang/lib/AST/Interp/Opcodes.td
@@ -30,6 +30,7 @@ def IntAPS : Type;
def Float : Type;
def Ptr : Type;
def FnPtr : Type;
+def MemberPtr : Type;
//===----------------------------------------------------------------------===//
// Types transferred to the interpreter.
@@ -61,6 +62,7 @@ def ArgOffsetOfExpr : ArgType { let Name = "const OffsetOfExpr *"; }
def ArgDeclRef : ArgType { let Name = "const DeclRefExpr *"; }
def ArgDesc : ArgType { let Name = "const Descriptor *"; }
def ArgCCI : ArgType { let Name = "const ComparisonCategoryInfo *"; }
+def ArgDecl : ArgType { let Name = "const Decl*"; }
//===----------------------------------------------------------------------===//
// Classes of types instructions operate on.
@@ -93,7 +95,7 @@ def AluTypeClass : TypeClass {
}
def PtrTypeClass : TypeClass {
- let Types = [Ptr, FnPtr];
+ let Types = [Ptr, FnPtr, MemberPtr];
}
def BoolTypeClass : TypeClass {
@@ -208,7 +210,6 @@ def CallBI : Opcode {
def CallPtr : Opcode {
let Args = [ArgUint32, ArgCallExpr];
- let Types = [];
}
def CallVar : Opcode {
@@ -327,6 +328,11 @@ def GetPtrBasePop : Opcode {
// Offset of field, which is a base.
let Args = [ArgUint32];
}
+def GetMemberPtrBasePop : Opcode {
+ // Offset of field, which is a base.
+ let Args = [ArgSint32];
+}
+
def FinishInitPop : Opcode;
def FinishInit : Opcode;
@@ -751,6 +757,14 @@ def CheckNonNullArg : Opcode {
def Memcpy : Opcode;
+def ToMemberPtr : Opcode;
+def CastMemberPtrPtr : Opcode;
+def GetMemberPtr : Opcode {
+ let Args = [ArgDecl];
+}
+def GetMemberPtrBase : Opcode;
+def GetMemberPtrDecl : Opcode;
+
//===----------------------------------------------------------------------===//
// Debugging.
//===----------------------------------------------------------------------===//
diff --git a/clang/lib/AST/Interp/Pointer.cpp b/clang/lib/AST/Interp/Pointer.cpp
index 252f7ea46086..a60b4d28b438 100644
--- a/clang/lib/AST/Interp/Pointer.cpp
+++ b/clang/lib/AST/Interp/Pointer.cpp
@@ -13,6 +13,7 @@
#include "Function.h"
#include "Integral.h"
#include "InterpBlock.h"
+#include "MemberPointer.h"
#include "PrimType.h"
#include "Record.h"
diff --git a/clang/lib/AST/Interp/Pointer.h b/clang/lib/AST/Interp/Pointer.h
index 93ca754d04a6..c6e4f4d0b4ab 100644
--- a/clang/lib/AST/Interp/Pointer.h
+++ b/clang/lib/AST/Interp/Pointer.h
@@ -620,6 +620,7 @@ public:
private:
friend class Block;
friend class DeadBlock;
+ friend class MemberPointer;
friend struct InitMap;
Pointer(Block *Pointee, unsigned Base, uint64_t Offset);
diff --git a/clang/lib/AST/Interp/PrimType.cpp b/clang/lib/AST/Interp/PrimType.cpp
index 9b96dcfe6a27..3054e67d5c49 100644
--- a/clang/lib/AST/Interp/PrimType.cpp
+++ b/clang/lib/AST/Interp/PrimType.cpp
@@ -11,6 +11,7 @@
#include "Floating.h"
#include "FunctionPointer.h"
#include "IntegralAP.h"
+#include "MemberPointer.h"
#include "Pointer.h"
using namespace clang;
diff --git a/clang/lib/AST/Interp/PrimType.h b/clang/lib/AST/Interp/PrimType.h
index 604fb5dfde1e..20fb5e81774d 100644
--- a/clang/lib/AST/Interp/PrimType.h
+++ b/clang/lib/AST/Interp/PrimType.h
@@ -25,6 +25,7 @@ class Pointer;
class Boolean;
class Floating;
class FunctionPointer;
+class MemberPointer;
template <bool Signed> class IntegralAP;
template <unsigned Bits, bool Signed> class Integral;
@@ -44,10 +45,11 @@ enum PrimType : unsigned {
PT_Float = 11,
PT_Ptr = 12,
PT_FnPtr = 13,
+ PT_MemberPtr = 14,
};
inline constexpr bool isPtrType(PrimType T) {
- return T == PT_Ptr || T == PT_FnPtr;
+ return T == PT_Ptr || T == PT_FnPtr || T == PT_MemberPtr;
}
enum class CastKind : uint8_t {
@@ -91,6 +93,9 @@ template <> struct PrimConv<PT_Ptr> { using T = Pointer; };
template <> struct PrimConv<PT_FnPtr> {
using T = FunctionPointer;
};
+template <> struct PrimConv<PT_MemberPtr> {
+ using T = MemberPointer;
+};
/// Returns the size of a primitive type in bytes.
size_t primSize(PrimType Type);
@@ -131,6 +136,7 @@ static inline bool aligned(const void *P) {
TYPE_SWITCH_CASE(PT_Bool, B) \
TYPE_SWITCH_CASE(PT_Ptr, B) \
TYPE_SWITCH_CASE(PT_FnPtr, B) \
+ TYPE_SWITCH_CASE(PT_MemberPtr, B) \
} \
} while (0)
diff --git a/clang/lib/AST/ParentMap.cpp b/clang/lib/AST/ParentMap.cpp
index 534793b837bb..3d6a1cc84c7b 100644
--- a/clang/lib/AST/ParentMap.cpp
+++ b/clang/lib/AST/ParentMap.cpp
@@ -97,22 +97,6 @@ static void BuildParentMap(MapTy& M, Stmt* S,
BuildParentMap(M, SubStmt, OVMode);
}
break;
- case Stmt::CXXDefaultArgExprClass:
- if (auto *Arg = dyn_cast<CXXDefaultArgExpr>(S)) {
- if (Arg->hasRewrittenInit()) {
- M[Arg->getExpr()] = S;
- BuildParentMap(M, Arg->getExpr(), OVMode);
- }
- }
- break;
- case Stmt::CXXDefaultInitExprClass:
- if (auto *Init = dyn_cast<CXXDefaultInitExpr>(S)) {
- if (Init->hasRewrittenInit()) {
- M[Init->getExpr()] = S;
- BuildParentMap(M, Init->getExpr(), OVMode);
- }
- }
- break;
default:
for (Stmt *SubStmt : S->children()) {
if (SubStmt) {
diff --git a/clang/lib/AST/TextNodeDumper.cpp b/clang/lib/AST/TextNodeDumper.cpp
index 8bacceea0f22..1076dcd40a69 100644
--- a/clang/lib/AST/TextNodeDumper.cpp
+++ b/clang/lib/AST/TextNodeDumper.cpp
@@ -958,6 +958,9 @@ void TextNodeDumper::dumpTemplateArgument(const TemplateArgument &TA) {
}
OS << " '" << Str << "'";
+ if (!Context)
+ return;
+
if (TemplateArgument CanonTA = Context->getCanonicalTemplateArgument(TA);
!CanonTA.structurallyEquals(TA)) {
llvm::SmallString<128> CanonStr;
@@ -1139,15 +1142,17 @@ void TextNodeDumper::dumpTemplateName(TemplateName TN, StringRef Label) {
}
OS << " '" << Str << "'";
- if (TemplateName CanonTN = Context->getCanonicalTemplateName(TN);
- CanonTN != TN) {
- llvm::SmallString<128> CanonStr;
- {
- llvm::raw_svector_ostream SS(CanonStr);
- CanonTN.print(SS, PrintPolicy);
+ if (Context) {
+ if (TemplateName CanonTN = Context->getCanonicalTemplateName(TN);
+ CanonTN != TN) {
+ llvm::SmallString<128> CanonStr;
+ {
+ llvm::raw_svector_ostream SS(CanonStr);
+ CanonTN.print(SS, PrintPolicy);
+ }
+ if (CanonStr != Str)
+ OS << ":'" << CanonStr << "'";
}
- if (CanonStr != Str)
- OS << ":'" << CanonStr << "'";
}
}
dumpBareTemplateName(TN);
diff --git a/clang/lib/AST/Type.cpp b/clang/lib/AST/Type.cpp
index 2097b29b7e0b..33acae2cbafa 100644
--- a/clang/lib/AST/Type.cpp
+++ b/clang/lib/AST/Type.cpp
@@ -2749,6 +2749,43 @@ bool QualType::isTriviallyCopyableType(const ASTContext &Context) const {
/*IsCopyConstructible=*/false);
}
+// FIXME: each call will trigger a full computation, cache the result.
+bool QualType::isBitwiseCloneableType(const ASTContext &Context) const {
+ auto CanonicalType = getCanonicalType();
+ if (CanonicalType.hasNonTrivialObjCLifetime())
+ return false;
+ if (CanonicalType->isArrayType())
+ return Context.getBaseElementType(CanonicalType)
+ .isBitwiseCloneableType(Context);
+
+ if (CanonicalType->isIncompleteType())
+ return false;
+ const auto *RD = CanonicalType->getAsRecordDecl(); // struct/union/class
+ if (!RD)
+ return true;
+
+ // Never allow memcpy when we're adding poisoned padding bits to the struct.
+ // Accessing these posioned bits will trigger false alarms on
+ // SanitizeAddressFieldPadding etc.
+ if (RD->mayInsertExtraPadding())
+ return false;
+
+ for (auto *const Field : RD->fields()) {
+ if (!Field->getType().isBitwiseCloneableType(Context))
+ return false;
+ }
+
+ if (const auto *CXXRD = dyn_cast<CXXRecordDecl>(RD)) {
+ for (auto Base : CXXRD->bases())
+ if (!Base.getType().isBitwiseCloneableType(Context))
+ return false;
+ for (auto VBase : CXXRD->vbases())
+ if (!VBase.getType().isBitwiseCloneableType(Context))
+ return false;
+ }
+ return true;
+}
+
bool QualType::isTriviallyCopyConstructibleType(
const ASTContext &Context) const {
return isTriviallyCopyableTypeImpl(*this, Context,
@@ -4444,7 +4481,6 @@ static CachedProperties computeCachedProperties(const Type *T) {
#define NON_CANONICAL_UNLESS_DEPENDENT_TYPE(Class,Base) case Type::Class:
#include "clang/AST/TypeNodes.inc"
// Treat instantiation-dependent types as external.
- if (!T->isInstantiationDependentType()) T->dump();
assert(T->isInstantiationDependentType());
return CachedProperties(Linkage::External, false);
diff --git a/clang/lib/Analysis/CFG.cpp b/clang/lib/Analysis/CFG.cpp
index 02317257c274..64e6155de090 100644
--- a/clang/lib/Analysis/CFG.cpp
+++ b/clang/lib/Analysis/CFG.cpp
@@ -556,10 +556,6 @@ public:
private:
// Visitors to walk an AST and construct the CFG.
- CFGBlock *VisitCXXDefaultArgExpr(CXXDefaultArgExpr *Default,
- AddStmtChoice asc);
- CFGBlock *VisitCXXDefaultInitExpr(CXXDefaultInitExpr *Default,
- AddStmtChoice asc);
CFGBlock *VisitInitListExpr(InitListExpr *ILE, AddStmtChoice asc);
CFGBlock *VisitAddrLabelExpr(AddrLabelExpr *A, AddStmtChoice asc);
CFGBlock *VisitAttributedStmt(AttributedStmt *A, AddStmtChoice asc);
@@ -2258,10 +2254,16 @@ CFGBlock *CFGBuilder::Visit(Stmt * S, AddStmtChoice asc,
asc, ExternallyDestructed);
case Stmt::CXXDefaultArgExprClass:
- return VisitCXXDefaultArgExpr(cast<CXXDefaultArgExpr>(S), asc);
-
case Stmt::CXXDefaultInitExprClass:
- return VisitCXXDefaultInitExpr(cast<CXXDefaultInitExpr>(S), asc);
+ // FIXME: The expression inside a CXXDefaultArgExpr is owned by the
+ // called function's declaration, not by the caller. If we simply add
+ // this expression to the CFG, we could end up with the same Expr
+ // appearing multiple times (PR13385).
+ //
+ // It's likewise possible for multiple CXXDefaultInitExprs for the same
+ // expression to be used in the same function (through aggregate
+ // initialization).
+ return VisitStmt(S, asc);
case Stmt::CXXBindTemporaryExprClass:
return VisitCXXBindTemporaryExpr(cast<CXXBindTemporaryExpr>(S), asc);
@@ -2431,40 +2433,6 @@ CFGBlock *CFGBuilder::VisitChildren(Stmt *S) {
return B;
}
-CFGBlock *CFGBuilder::VisitCXXDefaultArgExpr(CXXDefaultArgExpr *Arg,
- AddStmtChoice asc) {
- if (Arg->hasRewrittenInit()) {
- if (asc.alwaysAdd(*this, Arg)) {
- autoCreateBlock();
- appendStmt(Block, Arg);
- }
- return VisitStmt(Arg->getExpr(), asc);
- }
-
- // We can't add the default argument if it's not rewritten because the
- // expression inside a CXXDefaultArgExpr is owned by the called function's
- // declaration, not by the caller, we could end up with the same expression
- // appearing multiple times.
- return VisitStmt(Arg, asc);
-}
-
-CFGBlock *CFGBuilder::VisitCXXDefaultInitExpr(CXXDefaultInitExpr *Init,
- AddStmtChoice asc) {
- if (Init->hasRewrittenInit()) {
- if (asc.alwaysAdd(*this, Init)) {
- autoCreateBlock();
- appendStmt(Block, Init);
- }
- return VisitStmt(Init->getExpr(), asc);
- }
-
- // We can't add the default initializer if it's not rewritten because multiple
- // CXXDefaultInitExprs for the same sub-expression to be used in the same
- // function (through aggregate initialization). we could end up with the same
- // expression appearing multiple times.
- return VisitStmt(Init, asc);
-}
-
CFGBlock *CFGBuilder::VisitInitListExpr(InitListExpr *ILE, AddStmtChoice asc) {
if (asc.alwaysAdd(*this, ILE)) {
autoCreateBlock();
diff --git a/clang/lib/Basic/Cuda.cpp b/clang/lib/Basic/Cuda.cpp
index e2609b9573cc..1d96a929f95d 100644
--- a/clang/lib/Basic/Cuda.cpp
+++ b/clang/lib/Basic/Cuda.cpp
@@ -144,6 +144,7 @@ static const CudaArchToStringMap arch_names[] = {
GFX(1103), // gfx1103
GFX(1150), // gfx1150
GFX(1151), // gfx1151
+ GFX(1152), // gfx1152
{CudaArch::GFX12_GENERIC, "gfx12-generic", "compute_amdgcn"},
GFX(1200), // gfx1200
GFX(1201), // gfx1201
diff --git a/clang/lib/Basic/Targets/LoongArch.h b/clang/lib/Basic/Targets/LoongArch.h
index 68572843f2d7..5fc223483951 100644
--- a/clang/lib/Basic/Targets/LoongArch.h
+++ b/clang/lib/Basic/Targets/LoongArch.h
@@ -133,7 +133,7 @@ public:
LongWidth = LongAlign = PointerWidth = PointerAlign = 64;
IntMaxType = Int64Type = SignedLong;
HasUnalignedAccess = true;
- resetDataLayout("e-m:e-p:64:64-i64:64-i128:128-n64-S128");
+ resetDataLayout("e-m:e-p:64:64-i64:64-i128:128-n32:64-S128");
// TODO: select appropriate ABI.
setABI("lp64d");
}
diff --git a/clang/lib/Basic/Targets/NVPTX.cpp b/clang/lib/Basic/Targets/NVPTX.cpp
index fc6ef1119e9c..ff7d2f1f92aa 100644
--- a/clang/lib/Basic/Targets/NVPTX.cpp
+++ b/clang/lib/Basic/Targets/NVPTX.cpp
@@ -228,6 +228,7 @@ void NVPTXTargetInfo::getTargetDefines(const LangOptions &Opts,
case CudaArch::GFX1103:
case CudaArch::GFX1150:
case CudaArch::GFX1151:
+ case CudaArch::GFX1152:
case CudaArch::GFX12_GENERIC:
case CudaArch::GFX1200:
case CudaArch::GFX1201:
diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
index 191bd757831f..6e9a1bacd9bf 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
@@ -3537,6 +3537,7 @@ void CGOpenMPRuntimeGPU::processRequiresDirective(
case CudaArch::GFX1103:
case CudaArch::GFX1150:
case CudaArch::GFX1151:
+ case CudaArch::GFX1152:
case CudaArch::GFX12_GENERIC:
case CudaArch::GFX1200:
case CudaArch::GFX1201:
diff --git a/clang/lib/CodeGen/Targets/AMDGPU.cpp b/clang/lib/CodeGen/Targets/AMDGPU.cpp
index d1ff8b4b62f1..057f6ef40c51 100644
--- a/clang/lib/CodeGen/Targets/AMDGPU.cpp
+++ b/clang/lib/CodeGen/Targets/AMDGPU.cpp
@@ -120,7 +120,11 @@ void AMDGPUABIInfo::computeInfo(CGFunctionInfo &FI) const {
Address AMDGPUABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
QualType Ty) const {
- llvm_unreachable("AMDGPU does not support varargs");
+ const bool IsIndirect = false;
+ const bool AllowHigherAlign = false;
+ return emitVoidPtrVAArg(CGF, VAListAddr, Ty, IsIndirect,
+ getContext().getTypeInfoInChars(Ty),
+ CharUnits::fromQuantity(4), AllowHigherAlign);
}
ABIArgInfo AMDGPUABIInfo::classifyReturnType(QualType RetTy) const {
diff --git a/clang/lib/ExtractAPI/Serialization/SymbolGraphSerializer.cpp b/clang/lib/ExtractAPI/Serialization/SymbolGraphSerializer.cpp
index 08e711cafae2..6e56ee5b573f 100644
--- a/clang/lib/ExtractAPI/Serialization/SymbolGraphSerializer.cpp
+++ b/clang/lib/ExtractAPI/Serialization/SymbolGraphSerializer.cpp
@@ -31,7 +31,6 @@
using namespace clang;
using namespace clang::extractapi;
using namespace llvm;
-using namespace llvm::json;
namespace {
@@ -1036,9 +1035,9 @@ void SymbolGraphSerializer::serializeGraphToStream(
ExtendedModule &&EM) {
Object Root = serializeGraph(ModuleName, std::move(EM));
if (Options.Compact)
- OS << formatv("{0}", Value(std::move(Root))) << "\n";
+ OS << formatv("{0}", json::Value(std::move(Root))) << "\n";
else
- OS << formatv("{0:2}", Value(std::move(Root))) << "\n";
+ OS << formatv("{0:2}", json::Value(std::move(Root))) << "\n";
}
void SymbolGraphSerializer::serializeMainSymbolGraph(
diff --git a/clang/lib/Format/ContinuationIndenter.cpp b/clang/lib/Format/ContinuationIndenter.cpp
index 6b9fbfe0ebf5..be684ac71cd6 100644
--- a/clang/lib/Format/ContinuationIndenter.cpp
+++ b/clang/lib/Format/ContinuationIndenter.cpp
@@ -1712,7 +1712,7 @@ void ContinuationIndenter::moveStatePastFakeLParens(LineState &State,
(!Previous || Previous->isNot(tok::kw_return) ||
(Style.Language != FormatStyle::LK_Java && PrecedenceLevel > 0)) &&
(Style.AlignAfterOpenBracket != FormatStyle::BAS_DontAlign ||
- PrecedenceLevel != prec::Comma || Current.NestingLevel == 0) &&
+ PrecedenceLevel > prec::Comma || Current.NestingLevel == 0) &&
(!Style.isTableGen() ||
(Previous && Previous->isOneOf(TT_TableGenDAGArgListComma,
TT_TableGenDAGArgListCommaToBreak)))) {
diff --git a/clang/lib/Interpreter/Interpreter.cpp b/clang/lib/Interpreter/Interpreter.cpp
index 683f87e8c8c7..7a9527891427 100644
--- a/clang/lib/Interpreter/Interpreter.cpp
+++ b/clang/lib/Interpreter/Interpreter.cpp
@@ -42,6 +42,9 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/TargetParser/Host.h"
+
+#include <cstdarg>
+
using namespace clang;
// FIXME: Figure out how to unify with namespace init_convenience from
@@ -270,14 +273,10 @@ Interpreter::~Interpreter() {
// can't find the precise resource directory in unittests so we have to hard
// code them.
const char *const Runtimes = R"(
+ #define __CLANG_REPL__ 1
#ifdef __cplusplus
+ #define EXTERN_C extern "C"
void *__clang_Interpreter_SetValueWithAlloc(void*, void*, void*);
- void __clang_Interpreter_SetValueNoAlloc(void*, void*, void*);
- void __clang_Interpreter_SetValueNoAlloc(void*, void*, void*, void*);
- void __clang_Interpreter_SetValueNoAlloc(void*, void*, void*, float);
- void __clang_Interpreter_SetValueNoAlloc(void*, void*, void*, double);
- void __clang_Interpreter_SetValueNoAlloc(void*, void*, void*, long double);
- void __clang_Interpreter_SetValueNoAlloc(void*,void*,void*,unsigned long long);
struct __clang_Interpreter_NewTag{} __ci_newtag;
void* operator new(__SIZE_TYPE__, void* __p, __clang_Interpreter_NewTag) noexcept;
template <class T, class = T (*)() /*disable for arrays*/>
@@ -289,7 +288,11 @@ const char *const Runtimes = R"(
void __clang_Interpreter_SetValueCopyArr(const T (*Src)[N], void* Placement, unsigned long Size) {
__clang_Interpreter_SetValueCopyArr(Src[0], Placement, Size);
}
+#else
+ #define EXTERN_C extern
#endif // __cplusplus
+
+ EXTERN_C void __clang_Interpreter_SetValueNoAlloc(void *This, void *OutVal, void *OpaqueType, ...);
)";
llvm::Expected<std::unique_ptr<Interpreter>>
@@ -588,15 +591,17 @@ std::unique_ptr<RuntimeInterfaceBuilder> Interpreter::FindRuntimeInterface() {
if (!LookupInterface(ValuePrintingInfo[NoAlloc],
MagicRuntimeInterface[NoAlloc]))
return nullptr;
- if (!LookupInterface(ValuePrintingInfo[WithAlloc],
- MagicRuntimeInterface[WithAlloc]))
- return nullptr;
- if (!LookupInterface(ValuePrintingInfo[CopyArray],
- MagicRuntimeInterface[CopyArray]))
- return nullptr;
- if (!LookupInterface(ValuePrintingInfo[NewTag],
- MagicRuntimeInterface[NewTag]))
- return nullptr;
+ if (Ctx.getLangOpts().CPlusPlus) {
+ if (!LookupInterface(ValuePrintingInfo[WithAlloc],
+ MagicRuntimeInterface[WithAlloc]))
+ return nullptr;
+ if (!LookupInterface(ValuePrintingInfo[CopyArray],
+ MagicRuntimeInterface[CopyArray]))
+ return nullptr;
+ if (!LookupInterface(ValuePrintingInfo[NewTag],
+ MagicRuntimeInterface[NewTag]))
+ return nullptr;
+ }
return createInProcessRuntimeInterfaceBuilder(*this, Ctx, S);
}
@@ -855,69 +860,81 @@ __clang_Interpreter_SetValueWithAlloc(void *This, void *OutVal,
return VRef.getPtr();
}
-// Pointers, lvalue struct that can take as a reference.
-REPL_EXTERNAL_VISIBILITY void
-__clang_Interpreter_SetValueNoAlloc(void *This, void *OutVal, void *OpaqueType,
- void *Val) {
+extern "C" void REPL_EXTERNAL_VISIBILITY __clang_Interpreter_SetValueNoAlloc(
+ void *This, void *OutVal, void *OpaqueType, ...) {
Value &VRef = *(Value *)OutVal;
- VRef = Value(static_cast<Interpreter *>(This), OpaqueType);
- VRef.setPtr(Val);
-}
+ Interpreter *I = static_cast<Interpreter *>(This);
+ VRef = Value(I, OpaqueType);
+ if (VRef.isVoid())
+ return;
-REPL_EXTERNAL_VISIBILITY void
-__clang_Interpreter_SetValueNoAlloc(void *This, void *OutVal,
- void *OpaqueType) {
- Value &VRef = *(Value *)OutVal;
- VRef = Value(static_cast<Interpreter *>(This), OpaqueType);
-}
+ va_list args;
+ va_start(args, /*last named param*/ OpaqueType);
-static void SetValueDataBasedOnQualType(Value &V, unsigned long long Data) {
- QualType QT = V.getType();
- if (const auto *ET = QT->getAs<EnumType>())
- QT = ET->getDecl()->getIntegerType();
-
- switch (QT->castAs<BuiltinType>()->getKind()) {
- default:
- llvm_unreachable("unknown type kind!");
-#define X(type, name) \
- case BuiltinType::name: \
- V.set##name(Data); \
- break;
- REPL_BUILTIN_TYPES
-#undef X
+ QualType QT = VRef.getType();
+ if (VRef.getKind() == Value::K_PtrOrObj) {
+ VRef.setPtr(va_arg(args, void *));
+ } else {
+ if (const auto *ET = QT->getAs<EnumType>())
+ QT = ET->getDecl()->getIntegerType();
+ switch (QT->castAs<BuiltinType>()->getKind()) {
+ default:
+ llvm_unreachable("unknown type kind!");
+ break;
+ // Types shorter than int are resolved as int, else va_arg has UB.
+ case BuiltinType::Bool:
+ VRef.setBool(va_arg(args, int));
+ break;
+ case BuiltinType::Char_S:
+ VRef.setChar_S(va_arg(args, int));
+ break;
+ case BuiltinType::SChar:
+ VRef.setSChar(va_arg(args, int));
+ break;
+ case BuiltinType::Char_U:
+ VRef.setChar_U(va_arg(args, unsigned));
+ break;
+ case BuiltinType::UChar:
+ VRef.setUChar(va_arg(args, unsigned));
+ break;
+ case BuiltinType::Short:
+ VRef.setShort(va_arg(args, int));
+ break;
+ case BuiltinType::UShort:
+ VRef.setUShort(va_arg(args, unsigned));
+ break;
+ case BuiltinType::Int:
+ VRef.setInt(va_arg(args, int));
+ break;
+ case BuiltinType::UInt:
+ VRef.setUInt(va_arg(args, unsigned));
+ break;
+ case BuiltinType::Long:
+ VRef.setLong(va_arg(args, long));
+ break;
+ case BuiltinType::ULong:
+ VRef.setULong(va_arg(args, unsigned long));
+ break;
+ case BuiltinType::LongLong:
+ VRef.setLongLong(va_arg(args, long long));
+ break;
+ case BuiltinType::ULongLong:
+ VRef.setULongLong(va_arg(args, unsigned long long));
+ break;
+ // Types shorter than double are resolved as double, else va_arg has UB.
+ case BuiltinType::Float:
+ VRef.setFloat(va_arg(args, double));
+ break;
+ case BuiltinType::Double:
+ VRef.setDouble(va_arg(args, double));
+ break;
+ case BuiltinType::LongDouble:
+ VRef.setLongDouble(va_arg(args, long double));
+ break;
+ // See REPL_BUILTIN_TYPES.
+ }
}
-}
-
-REPL_EXTERNAL_VISIBILITY void
-__clang_Interpreter_SetValueNoAlloc(void *This, void *OutVal, void *OpaqueType,
- unsigned long long Val) {
- Value &VRef = *(Value *)OutVal;
- VRef = Value(static_cast<Interpreter *>(This), OpaqueType);
- SetValueDataBasedOnQualType(VRef, Val);
-}
-
-REPL_EXTERNAL_VISIBILITY void
-__clang_Interpreter_SetValueNoAlloc(void *This, void *OutVal, void *OpaqueType,
- float Val) {
- Value &VRef = *(Value *)OutVal;
- VRef = Value(static_cast<Interpreter *>(This), OpaqueType);
- VRef.setFloat(Val);
-}
-
-REPL_EXTERNAL_VISIBILITY void
-__clang_Interpreter_SetValueNoAlloc(void *This, void *OutVal, void *OpaqueType,
- double Val) {
- Value &VRef = *(Value *)OutVal;
- VRef = Value(static_cast<Interpreter *>(This), OpaqueType);
- VRef.setDouble(Val);
-}
-
-REPL_EXTERNAL_VISIBILITY void
-__clang_Interpreter_SetValueNoAlloc(void *This, void *OutVal, void *OpaqueType,
- long double Val) {
- Value &VRef = *(Value *)OutVal;
- VRef = Value(static_cast<Interpreter *>(This), OpaqueType);
- VRef.setLongDouble(Val);
+ va_end(args);
}
// A trampoline to work around the fact that operator placement new cannot
diff --git a/clang/lib/Parse/ParseStmt.cpp b/clang/lib/Parse/ParseStmt.cpp
index c25203243ee4..16a5b7483ec1 100644
--- a/clang/lib/Parse/ParseStmt.cpp
+++ b/clang/lib/Parse/ParseStmt.cpp
@@ -571,11 +571,8 @@ StmtResult Parser::ParseExprStatement(ParsedStmtContext StmtCtx) {
}
Token *CurTok = nullptr;
- // If the semicolon is missing at the end of REPL input, consider if
- // we want to do value printing. Note this is only enabled in C++ mode
- // since part of the implementation requires C++ language features.
// Note we shouldn't eat the token since the callback needs it.
- if (Tok.is(tok::annot_repl_input_end) && Actions.getLangOpts().CPlusPlus)
+ if (Tok.is(tok::annot_repl_input_end))
CurTok = &Tok;
else
// Otherwise, eat the semicolon.
diff --git a/clang/lib/Sema/Scope.cpp b/clang/lib/Sema/Scope.cpp
index c08073e80ff3..5bc7e79a6818 100644
--- a/clang/lib/Sema/Scope.cpp
+++ b/clang/lib/Sema/Scope.cpp
@@ -228,7 +228,11 @@ void Scope::dumpImpl(raw_ostream &OS) const {
{CompoundStmtScope, "CompoundStmtScope"},
{ClassInheritanceScope, "ClassInheritanceScope"},
{CatchScope, "CatchScope"},
+ {ConditionVarScope, "ConditionVarScope"},
+ {OpenMPOrderClauseScope, "OpenMPOrderClauseScope"},
+ {LambdaScope, "LambdaScope"},
{OpenACCComputeConstructScope, "OpenACCComputeConstructScope"},
+ {TypeAliasScope, "TypeAliasScope"},
{FriendScope, "FriendScope"},
};
diff --git a/clang/lib/Sema/SemaAMDGPU.cpp b/clang/lib/Sema/SemaAMDGPU.cpp
index c446cc1d042a..d11bc9eec330 100644
--- a/clang/lib/Sema/SemaAMDGPU.cpp
+++ b/clang/lib/Sema/SemaAMDGPU.cpp
@@ -31,9 +31,9 @@ bool SemaAMDGPU::CheckAMDGCNBuiltinFunctionCall(unsigned BuiltinID,
constexpr const int SizeIdx = 2;
llvm::APSInt Size;
Expr *ArgExpr = TheCall->getArg(SizeIdx);
- ExprResult R = SemaRef.VerifyIntegerConstantExpression(ArgExpr, &Size);
- if (R.isInvalid())
- return true;
+ [[maybe_unused]] ExprResult R =
+ SemaRef.VerifyIntegerConstantExpression(ArgExpr, &Size);
+ assert(!R.isInvalid());
switch (Size.getSExtValue()) {
case 1:
case 2:
diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp
index fb5ca199b3fc..76145f291887 100644
--- a/clang/lib/Sema/SemaExpr.cpp
+++ b/clang/lib/Sema/SemaExpr.cpp
@@ -5572,9 +5572,10 @@ ExprResult Sema::BuildCXXDefaultArgExpr(SourceLocation CallLoc,
Res = Immediate.TransformInitializer(Param->getInit(),
/*NotCopy=*/false);
});
- if (Res.isUsable())
- Res = ConvertParamDefaultArgument(Param, Res.get(),
- Res.get()->getBeginLoc());
+ if (Res.isInvalid())
+ return ExprError();
+ Res = ConvertParamDefaultArgument(Param, Res.get(),
+ Res.get()->getBeginLoc());
if (Res.isInvalid())
return ExprError();
Init = Res.get();
@@ -5608,10 +5609,9 @@ ExprResult Sema::BuildCXXDefaultInitExpr(SourceLocation Loc, FieldDecl *Field) {
InitializationContext.emplace(Loc, Field, CurContext);
Expr *Init = nullptr;
- bool HasRewrittenInit = false;
bool NestedDefaultChecking = isCheckingDefaultArgumentOrInitializer();
- bool InLifetimeExtendingContext = isInLifetimeExtendingContext();
+
EnterExpressionEvaluationContext EvalContext(
*this, ExpressionEvaluationContext::PotentiallyEvaluated, Field);
@@ -5646,36 +5646,19 @@ ExprResult Sema::BuildCXXDefaultInitExpr(SourceLocation Loc, FieldDecl *Field) {
ImmediateCallVisitor V(getASTContext());
if (!NestedDefaultChecking)
V.TraverseDecl(Field);
-
- // CWG1815
- // Support lifetime extension of temporary created by aggregate
- // initialization using a default member initializer. We should always rebuild
- // the initializer if it contains any temporaries (if the initializer
- // expression is an ExprWithCleanups). Then make sure the normal lifetime
- // extension code recurses into the default initializer and does lifetime
- // extension when warranted.
- bool ContainsAnyTemporaries =
- isa_and_present<ExprWithCleanups>(Field->getInClassInitializer());
- if (V.HasImmediateCalls || InLifetimeExtendingContext ||
- ContainsAnyTemporaries) {
- HasRewrittenInit = true;
+ if (V.HasImmediateCalls) {
ExprEvalContexts.back().DelayedDefaultInitializationContext = {Loc, Field,
CurContext};
ExprEvalContexts.back().IsCurrentlyCheckingDefaultArgumentOrInitializer =
NestedDefaultChecking;
- // Pass down lifetime extending flag, and collect temporaries in
- // CreateMaterializeTemporaryExpr when we rewrite the call argument.
- keepInLifetimeExtendingContext();
+
EnsureImmediateInvocationInDefaultArgs Immediate(*this);
ExprResult Res;
-
- // Rebuild CXXDefaultInitExpr might cause diagnostics.
- SFINAETrap Trap(*this);
runWithSufficientStackSpace(Loc, [&] {
Res = Immediate.TransformInitializer(Field->getInClassInitializer(),
/*CXXDirectInit=*/false);
});
- if (Res.isUsable())
+ if (!Res.isInvalid())
Res = ConvertMemberDefaultInitExpression(Field, Res.get(), Loc);
if (Res.isInvalid()) {
Field->setInvalidDecl();
@@ -5702,7 +5685,7 @@ ExprResult Sema::BuildCXXDefaultInitExpr(SourceLocation Loc, FieldDecl *Field) {
return CXXDefaultInitExpr::Create(Context, InitializationContext->Loc,
Field, InitializationContext->Context,
- HasRewrittenInit ? Init : nullptr);
+ Init);
}
// DR1351:
diff --git a/clang/lib/Sema/SemaExprCXX.cpp b/clang/lib/Sema/SemaExprCXX.cpp
index 4487c618862c..cf461a68d552 100644
--- a/clang/lib/Sema/SemaExprCXX.cpp
+++ b/clang/lib/Sema/SemaExprCXX.cpp
@@ -1555,6 +1555,9 @@ Sema::BuildCXXTypeConstructExpr(TypeSourceInfo *TInfo,
bool ListInitialization) {
QualType Ty = TInfo->getType();
SourceLocation TyBeginLoc = TInfo->getTypeLoc().getBeginLoc();
+
+ assert((!ListInitialization || Exprs.size() == 1) &&
+ "List initialization must have exactly one expression.");
SourceRange FullRange = SourceRange(TyBeginLoc, RParenOrBraceLoc);
InitializedEntity Entity =
@@ -5126,6 +5129,7 @@ static bool CheckUnaryTypeTraitTypeCompleteness(Sema &S, TypeTrait UTT,
case UTT_IsStandardLayout:
case UTT_IsPOD:
case UTT_IsLiteral:
+ case UTT_IsBitwiseCloneable:
// By analogy, is_trivially_relocatable and is_trivially_equality_comparable
// impose the same constraints.
case UTT_IsTriviallyRelocatable:
@@ -5619,6 +5623,8 @@ static bool EvaluateUnaryTypeTrait(Sema &Self, TypeTrait UTT,
return C.hasUniqueObjectRepresentations(T);
case UTT_IsTriviallyRelocatable:
return T.isTriviallyRelocatableType(C);
+ case UTT_IsBitwiseCloneable:
+ return T.isBitwiseCloneableType(C);
case UTT_IsReferenceable:
return T.isReferenceable();
case UTT_CanPassInRegs:
diff --git a/clang/lib/Sema/SemaInit.cpp b/clang/lib/Sema/SemaInit.cpp
index 9ed3e8a0df02..ed8b226a6b39 100644
--- a/clang/lib/Sema/SemaInit.cpp
+++ b/clang/lib/Sema/SemaInit.cpp
@@ -8063,6 +8063,11 @@ static void visitLocalsRetainedByInitializer(IndirectLocalPath &Path,
enum PathLifetimeKind {
/// Lifetime-extend along this path.
Extend,
+ /// We should lifetime-extend, but we don't because (due to technical
+ /// limitations) we can't. This happens for default member initializers,
+ /// which we don't clone for every use, so we don't have a unique
+ /// MaterializeTemporaryExpr to update.
+ ShouldExtend,
/// Do not lifetime extend along this path.
NoExtend
};
@@ -8074,7 +8079,7 @@ shouldLifetimeExtendThroughPath(const IndirectLocalPath &Path) {
PathLifetimeKind Kind = PathLifetimeKind::Extend;
for (auto Elem : Path) {
if (Elem.Kind == IndirectLocalPathEntry::DefaultInit)
- Kind = PathLifetimeKind::Extend;
+ Kind = PathLifetimeKind::ShouldExtend;
else if (Elem.Kind != IndirectLocalPathEntry::LambdaCaptureInit)
return PathLifetimeKind::NoExtend;
}
@@ -8194,6 +8199,18 @@ void Sema::checkInitializerLifetime(const InitializedEntity &Entity,
ExtendingEntity->allocateManglingNumber());
// Also visit the temporaries lifetime-extended by this initializer.
return true;
+
+ case PathLifetimeKind::ShouldExtend:
+ // We're supposed to lifetime-extend the temporary along this path (per
+ // the resolution of DR1815), but we don't support that yet.
+ //
+ // FIXME: Properly handle this situation. Perhaps the easiest approach
+ // would be to clone the initializer expression on each use that would
+ // lifetime extend its temporaries.
+ Diag(DiagLoc, diag::warn_unsupported_lifetime_extension)
+ << RK << DiagRange;
+ break;
+
case PathLifetimeKind::NoExtend:
// If the path goes through the initialization of a variable or field,
// it can't possibly reach a temporary created in this full-expression.
diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h
index 95dd356d48be..3bfda09d5f80 100644
--- a/clang/lib/Sema/TreeTransform.h
+++ b/clang/lib/Sema/TreeTransform.h
@@ -14172,13 +14172,6 @@ TreeTransform<Derived>::TransformCXXTemporaryObjectExpr(
if (TransformExprs(E->getArgs(), E->getNumArgs(), true, Args,
&ArgumentChanged))
return ExprError();
-
- if (E->isListInitialization() && !E->isStdInitListInitialization()) {
- ExprResult Res = RebuildInitList(E->getBeginLoc(), Args, E->getEndLoc());
- if (Res.isInvalid())
- return ExprError();
- Args = {Res.get()};
- }
}
if (!getDerived().AlwaysRebuild() &&
@@ -14190,9 +14183,12 @@ TreeTransform<Derived>::TransformCXXTemporaryObjectExpr(
return SemaRef.MaybeBindToTemporary(E);
}
+ // FIXME: We should just pass E->isListInitialization(), but we're not
+ // prepared to handle list-initialization without a child InitListExpr.
SourceLocation LParenLoc = T->getTypeLoc().getEndLoc();
return getDerived().RebuildCXXTemporaryObjectExpr(
- T, LParenLoc, Args, E->getEndLoc(), E->isListInitialization());
+ T, LParenLoc, Args, E->getEndLoc(),
+ /*ListInitialization=*/LParenLoc.isInvalid());
}
template<typename Derived>
diff --git a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
index 290d96611d46..197d67310728 100644
--- a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
+++ b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
@@ -1971,45 +1971,33 @@ void ExprEngine::Visit(const Stmt *S, ExplodedNode *Pred,
ExplodedNodeSet Tmp;
StmtNodeBuilder Bldr2(PreVisit, Tmp, *currBldrCtx);
- bool HasRewrittenInit = false;
- const Expr *ArgE = nullptr;
- if (const auto *DefE = dyn_cast<CXXDefaultArgExpr>(S)) {
+ const Expr *ArgE;
+ if (const auto *DefE = dyn_cast<CXXDefaultArgExpr>(S))
ArgE = DefE->getExpr();
- HasRewrittenInit = DefE->hasRewrittenInit();
- } else if (const auto *DefE = dyn_cast<CXXDefaultInitExpr>(S)) {
+ else if (const auto *DefE = dyn_cast<CXXDefaultInitExpr>(S))
ArgE = DefE->getExpr();
- HasRewrittenInit = DefE->hasRewrittenInit();
- } else
+ else
llvm_unreachable("unknown constant wrapper kind");
- if (HasRewrittenInit) {
- for (auto *N : PreVisit) {
- ProgramStateRef state = N->getState();
- const LocationContext *LCtx = N->getLocationContext();
- state = state->BindExpr(S, LCtx, state->getSVal(ArgE, LCtx));
- Bldr2.generateNode(S, N, state);
- }
- } else {
- // If it's not rewritten, the contents of these expressions are not
- // actually part of the current function, so we fall back to constant
- // evaluation.
- bool IsTemporary = false;
- if (const auto *MTE = dyn_cast<MaterializeTemporaryExpr>(ArgE)) {
- ArgE = MTE->getSubExpr();
- IsTemporary = true;
- }
-
- std::optional<SVal> ConstantVal = svalBuilder.getConstantVal(ArgE);
- const LocationContext *LCtx = Pred->getLocationContext();
- for (auto *I : PreVisit) {
- ProgramStateRef State = I->getState();
- State = State->BindExpr(S, LCtx, ConstantVal.value_or(UnknownVal()));
- if (IsTemporary)
- State = createTemporaryRegionIfNeeded(State, LCtx, cast<Expr>(S),
- cast<Expr>(S));
+ bool IsTemporary = false;
+ if (const auto *MTE = dyn_cast<MaterializeTemporaryExpr>(ArgE)) {
+ ArgE = MTE->getSubExpr();
+ IsTemporary = true;
+ }
- Bldr2.generateNode(S, I, State);
- }
+ std::optional<SVal> ConstantVal = svalBuilder.getConstantVal(ArgE);
+ if (!ConstantVal)
+ ConstantVal = UnknownVal();
+
+ const LocationContext *LCtx = Pred->getLocationContext();
+ for (const auto I : PreVisit) {
+ ProgramStateRef State = I->getState();
+ State = State->BindExpr(S, LCtx, *ConstantVal);
+ if (IsTemporary)
+ State = createTemporaryRegionIfNeeded(State, LCtx,
+ cast<Expr>(S),
+ cast<Expr>(S));
+ Bldr2.generateNode(S, I, State);
}
getCheckerManager().runCheckersForPostStmt(Dst, Tmp, S, *this);
diff --git a/clang/test/AST/Interp/arrays.cpp b/clang/test/AST/Interp/arrays.cpp
index dd5064d993e6..6f6fca8c1cfd 100644
--- a/clang/test/AST/Interp/arrays.cpp
+++ b/clang/test/AST/Interp/arrays.cpp
@@ -609,3 +609,17 @@ namespace ArrayMemberAccess {
bool cond = a->x;
}
}
+
+namespace OnePastEndSub {
+ struct A {};
+ constexpr A a[3][3];
+ constexpr int diff2 = &a[1][3] - &a[1][0]; /// Used to crash.
+}
+
+static int same_entity_2[3];
+constexpr int *get2() {
+ // This is a redeclaration of the same entity, even though it doesn't
+ // inherit the type of the prior declaration.
+ extern int same_entity_2[];
+ return same_entity_2;
+}
diff --git a/clang/test/AST/Interp/cxx23.cpp b/clang/test/AST/Interp/cxx23.cpp
index c91d52c552b1..1efd784abbbe 100644
--- a/clang/test/AST/Interp/cxx23.cpp
+++ b/clang/test/AST/Interp/cxx23.cpp
@@ -178,3 +178,25 @@ namespace ExplicitLambdaThis {
};
static_assert(f());
}
+
+namespace std {
+ struct strong_ordering {
+ int n;
+ constexpr operator int() const { return n; }
+ static const strong_ordering less, equal, greater;
+ };
+ constexpr strong_ordering strong_ordering::less = {-1};
+ constexpr strong_ordering strong_ordering::equal = {0};
+ constexpr strong_ordering strong_ordering::greater = {1};
+}
+
+namespace UndefinedThreeWay {
+ struct A {
+ friend constexpr std::strong_ordering operator<=>(const A&, const A&) = default; // all-note {{declared here}}
+ };
+
+ constexpr std::strong_ordering operator<=>(const A&, const A&) noexcept;
+ constexpr std::strong_ordering (*test_a_threeway)(const A&, const A&) = &operator<=>;
+ static_assert(!(*test_a_threeway)(A(), A())); // all-error {{static assertion expression is not an integral constant expression}} \
+ // all-note {{undefined function 'operator<=>' cannot be used in a constant expression}}
+}
diff --git a/clang/test/AST/Interp/eval-order.cpp b/clang/test/AST/Interp/eval-order.cpp
index aaf2b74510bb..7a7ce6a71460 100644
--- a/clang/test/AST/Interp/eval-order.cpp
+++ b/clang/test/AST/Interp/eval-order.cpp
@@ -71,8 +71,8 @@ namespace EvalOrder {
// Rules 1 and 2 have no effect ('b' is not an expression).
// Rule 3: a->*b
- // SEQ(A(ud).*B(&UserDefined::n)); FIXME
- // SEQ(A(&ud)->*B(&UserDefined::n)); FIXME
+ SEQ(A(ud).*B(&UserDefined::n));
+ SEQ(A(&ud)->*B(&UserDefined::n));
// Rule 4: a(b1, b2, b3)
SEQ(A(f)(B(1), B(2), B(3))); // expected-error {{not an integral constant expression}} FIXME \
diff --git a/clang/test/AST/Interp/literals.cpp b/clang/test/AST/Interp/literals.cpp
index c160be06dd24..5a29013a053a 100644
--- a/clang/test/AST/Interp/literals.cpp
+++ b/clang/test/AST/Interp/literals.cpp
@@ -66,7 +66,12 @@ namespace ScalarTypes {
First = 0,
};
static_assert(getScalar<E>() == First, "");
- /// FIXME: Member pointers.
+
+ struct S {
+ int v;
+ };
+ constexpr int S::* MemberPtr = &S::v;
+ static_assert(getScalar<decltype(MemberPtr)>() == nullptr, "");
#if __cplusplus >= 201402L
constexpr void Void(int n) {
@@ -1204,7 +1209,7 @@ namespace incdecbool {
constexpr int externvar1() { // both-error {{never produces a constant expression}}
extern char arr[]; // ref-note {{declared here}}
return arr[0]; // ref-note {{read of non-constexpr variable 'arr'}} \
- // expected-note {{array-to-pointer decay of array member without known bound is not supported}}
+ // expected-note {{indexing of array without known bound}}
}
#endif
diff --git a/clang/test/AST/Interp/memberpointers.cpp b/clang/test/AST/Interp/memberpointers.cpp
new file mode 100644
index 000000000000..54d73fe86ca1
--- /dev/null
+++ b/clang/test/AST/Interp/memberpointers.cpp
@@ -0,0 +1,197 @@
+// RUN: %clang_cc1 -std=c++14 -fexperimental-new-constant-interpreter -verify=expected,both %s
+// RUN: %clang_cc1 -std=c++14 -verify=ref,both %s
+
+namespace MemberPointers {
+ struct A {
+ constexpr A(int n) : n(n) {}
+ int n;
+ constexpr int f() const { return n + 3; }
+ };
+
+ constexpr A a(7);
+ static_assert(A(5).*&A::n == 5, "");
+ static_assert((&a)->*&A::n == 7, "");
+ static_assert((A(8).*&A::f)() == 11, "");
+ static_assert(((&a)->*&A::f)() == 10, "");
+
+ struct B : A {
+ constexpr B(int n, int m) : A(n), m(m) {}
+ int m;
+ constexpr int g() const { return n + m + 1; }
+ };
+ constexpr B b(9, 13);
+ static_assert(B(4, 11).*&A::n == 4, "");
+ static_assert(B(4, 11).*&B::m == 11, "");
+ static_assert(B(4, 11).m == 11, "");
+ static_assert(B(4, 11).*(int(A::*))&B::m == 11, "");
+ static_assert(B(4, 11).*&B::m == 11, "");
+ static_assert((&b)->*&A::n == 9, "");
+ static_assert((&b)->*&B::m == 13, "");
+ static_assert((&b)->*(int(A::*))&B::m == 13, "");
+ static_assert((B(4, 11).*&A::f)() == 7, "");
+ static_assert((B(4, 11).*&B::g)() == 16, "");
+
+ static_assert((B(4, 11).*(int(A::*)() const)&B::g)() == 16, "");
+
+ static_assert(((&b)->*&A::f)() == 12, "");
+ static_assert(((&b)->*&B::g)() == 23, "");
+ static_assert(((&b)->*(int(A::*)()const)&B::g)() == 23, "");
+
+
+ struct S {
+ constexpr S(int m, int n, int (S::*pf)() const, int S::*pn) :
+ m(m), n(n), pf(pf), pn(pn) {}
+ constexpr S() : m(), n(), pf(&S::f), pn(&S::n) {}
+
+ constexpr int f() const { return this->*pn; }
+ virtual int g() const;
+
+ int m, n;
+ int (S::*pf)() const;
+ int S::*pn;
+ };
+
+ constexpr int S::*pm = &S::m;
+ constexpr int S::*pn = &S::n;
+
+ constexpr int (S::*pf)() const = &S::f;
+ constexpr int (S::*pg)() const = &S::g;
+
+ constexpr S s(2, 5, &S::f, &S::m);
+
+ static_assert((s.*&S::f)() == 2, "");
+ static_assert((s.*s.pf)() == 2, "");
+
+ static_assert(pf == &S::f, "");
+
+ static_assert(pf == s.*&S::pf, "");
+
+ static_assert(pm == &S::m, "");
+ static_assert(pm != pn, "");
+ static_assert(s.pn != pn, "");
+ static_assert(s.pn == pm, "");
+ static_assert(pg != nullptr, "");
+ static_assert(pf != nullptr, "");
+ static_assert((int S::*)nullptr == nullptr, "");
+ static_assert(pg == pg, ""); // both-error {{constant expression}} \
+ // both-note {{comparison of pointer to virtual member function 'g' has unspecified value}}
+ static_assert(pf != pg, ""); // both-error {{constant expression}} \
+ // both-note {{comparison of pointer to virtual member function 'g' has unspecified value}}
+
+ template<int n> struct T : T<n-1> { const int X = n;};
+ template<> struct T<0> { int n; char k;};
+ template<> struct T<30> : T<29> { int m; };
+
+ T<17> t17;
+ T<30> t30;
+
+ constexpr int (T<15>::*deepm) = (int(T<10>::*))&T<30>::m;
+ constexpr int (T<10>::*deepn) = &T<0>::n;
+ constexpr char (T<10>::*deepk) = &T<0>::k;
+
+ static_assert(&(t17.*deepn) == &t17.n, "");
+ static_assert(&(t17.*deepk) == &t17.k, "");
+ static_assert(deepn == &T<2>::n, "");
+
+ constexpr int *pgood = &(t30.*deepm);
+ constexpr int *pbad = &(t17.*deepm); // both-error {{constant expression}}
+ static_assert(&(t30.*deepm) == &t30.m, "");
+
+ static_assert(deepm == &T<50>::m, "");
+ static_assert(deepm != deepn, "");
+
+ constexpr T<5> *p17_5 = &t17;
+ constexpr T<13> *p17_13 = (T<13>*)p17_5;
+ constexpr T<23> *p17_23 = (T<23>*)p17_13; // both-error {{constant expression}} \
+ // both-note {{cannot cast object of dynamic type 'T<17>' to type 'T<23>'}}
+ constexpr T<18> *p17_18 = (T<18>*)p17_13; // both-error {{constant expression}} \
+ // both-note {{cannot cast object of dynamic type 'T<17>' to type 'T<18>'}}
+ static_assert(&(p17_5->*(int(T<0>::*))deepn) == &t17.n, "");
+ static_assert(&(p17_5->*(int(T<0>::*))deepn), "");
+
+
+ static_assert(&(p17_13->*deepn) == &t17.n, "");
+ constexpr int *pbad2 = &(p17_13->*(int(T<9>::*))deepm); // both-error {{constant expression}}
+
+ constexpr T<5> *p30_5 = &t30;
+ constexpr T<23> *p30_23 = (T<23>*)p30_5;
+ constexpr T<13> *p30_13 = p30_23;
+ static_assert(&(p30_13->*deepn) == &t30.n, "");
+ static_assert(&(p30_23->*deepn) == &t30.n, "");
+ static_assert(&(p30_5->*(int(T<3>::*))deepn) == &t30.n, "");
+
+ static_assert(&(p30_5->*(int(T<2>::*))deepm) == &t30.m, "");
+ static_assert(&(((T<17>*)p30_13)->*deepm) == &t30.m, "");
+ static_assert(&(p30_23->*deepm) == &t30.m, "");
+
+
+ /// Added tests not from constant-expression-cxx11.cpp
+ static_assert(pm, "");
+ static_assert(!((int S::*)nullptr), "");
+ constexpr int S::*pk = nullptr;
+ static_assert(!pk, "");
+}
+
+namespace test3 {
+ struct nsCSSRect {
+ };
+ static int nsCSSRect::* sides;
+ nsCSSRect dimenX;
+ void ParseBoxCornerRadii(int y) {
+ switch (y) {
+ }
+ int& x = dimenX.*sides;
+ }
+}
+
+void foo() {
+ class X;
+ void (X::*d) ();
+ d = nullptr; /// This calls in the constant interpreter.
+}
+
+namespace {
+ struct A { int n; };
+ struct B { int n; };
+ struct C : A, B {};
+ struct D { double d; C c; };
+ const int &&u = static_cast<B&&>(0, ((D&&)D{}).*&D::c).n; // both-warning {{left operand of comma operator has no effect}}
+}
+
+/// From SemaTemplate/instantiate-member-pointers.cpp
+namespace {
+ struct Y {
+ int x;
+ };
+
+ template<typename T, typename Class, T Class::*Ptr>
+ struct X3 {
+ X3<T, Class, Ptr> &operator=(const T& value) {
+ return *this;
+ }
+ };
+
+ typedef int Y::*IntMember;
+ template<IntMember Member>
+ struct X4 {
+ X3<int, Y, Member> member;
+ int &getMember(Y& y) { return y.*Member; }
+ };
+
+ int &get_X4(X4<&Y::x> x4, Y& y) {
+ return x4.getMember(y);
+ }
+}
+
+/// From test/CXX/basic/basic.def.odr/p2.cpp
+namespace {
+ void use(int);
+ struct S { int x; int f() const; };
+ constexpr S *ps = nullptr;
+ S *const &psr = ps;
+
+ void test() {
+ use(ps->*&S::x);
+ use(psr->*&S::x);
+ }
+}
diff --git a/clang/test/AST/ast-dump-default-init-json.cpp b/clang/test/AST/ast-dump-default-init-json.cpp
index f4949a9c9eed..1058b4e3ea4d 100644
--- a/clang/test/AST/ast-dump-default-init-json.cpp
+++ b/clang/test/AST/ast-dump-default-init-json.cpp
@@ -789,10 +789,10 @@ void test() {
// CHECK-NEXT: "valueCategory": "lvalue",
// CHECK-NEXT: "extendingDecl": {
// CHECK-NEXT: "id": "0x{{.*}}",
-// CHECK-NEXT: "kind": "VarDecl",
-// CHECK-NEXT: "name": "b",
+// CHECK-NEXT: "kind": "FieldDecl",
+// CHECK-NEXT: "name": "a",
// CHECK-NEXT: "type": {
-// CHECK-NEXT: "qualType": "B"
+// CHECK-NEXT: "qualType": "const A &"
// CHECK-NEXT: }
// CHECK-NEXT: },
// CHECK-NEXT: "storageDuration": "automatic",
diff --git a/clang/test/AST/ast-dump-default-init.cpp b/clang/test/AST/ast-dump-default-init.cpp
index 26864fbf1542..15b29f04bf21 100644
--- a/clang/test/AST/ast-dump-default-init.cpp
+++ b/clang/test/AST/ast-dump-default-init.cpp
@@ -13,7 +13,7 @@ void test() {
}
// CHECK: -CXXDefaultInitExpr 0x{{[^ ]*}} <{{.*}}> 'const A' lvalue has rewritten init
// CHECK-NEXT: `-ExprWithCleanups 0x{{[^ ]*}} <{{.*}}> 'const A' lvalue
-// CHECK-NEXT: `-MaterializeTemporaryExpr 0x{{[^ ]*}} <{{.*}}> 'const A' lvalue extended by Var 0x{{[^ ]*}} 'b' 'B'
+// CHECK-NEXT: `-MaterializeTemporaryExpr 0x{{[^ ]*}} <{{.*}}> 'const A' lvalue extended by Field 0x{{[^ ]*}} 'a' 'const A &'
// CHECK-NEXT: `-ImplicitCastExpr 0x{{[^ ]*}} <{{.*}}> 'const A' <NoOp>
// CHECK-NEXT: `-CXXFunctionalCastExpr 0x{{[^ ]*}} <{{.*}}> 'A' functional cast to A <NoOp>
// CHECK-NEXT: `-InitListExpr 0x{{[^ ]*}} <{{.*}}> 'A'
diff --git a/clang/test/Analysis/cxx-uninitialized-object.cpp b/clang/test/Analysis/cxx-uninitialized-object.cpp
index aee0dae15fbf..e3fa8ae8d7f2 100644
--- a/clang/test/Analysis/cxx-uninitialized-object.cpp
+++ b/clang/test/Analysis/cxx-uninitialized-object.cpp
@@ -1114,27 +1114,27 @@ void fCXX11MemberInitTest1() {
CXX11MemberInitTest1();
}
-#ifdef PEDANTIC
struct CXX11MemberInitTest2 {
struct RecordType {
- int a; // expected-note {{uninitialized field 'this->a'}}
- int b; // expected-note {{uninitialized field 'this->b'}}
+ // TODO: we'd expect the note: {{uninitialized field 'this->rec.a'}}
+ int a; // no-note
+ // TODO: we'd expect the note: {{uninitialized field 'this->rec.b'}}
+ int b; // no-note
RecordType(int) {}
};
- RecordType rec = RecordType(int()); // expected-warning {{2 uninitialized fields}}
+ RecordType rec = RecordType(int());
int dontGetFilteredByNonPedanticMode = 0;
CXX11MemberInitTest2() {}
};
void fCXX11MemberInitTest2() {
+ // TODO: we'd expect the warning: {{2 uninitializeds field}}
CXX11MemberInitTest2(); // no-warning
}
-#endif // PEDANTIC
-
//===----------------------------------------------------------------------===//
// "Esoteric" primitive type tests.
//===----------------------------------------------------------------------===//
diff --git a/clang/test/Analysis/lifetime-extended-regions.cpp b/clang/test/Analysis/lifetime-extended-regions.cpp
index 524f4e0c400d..4e98bd4b0403 100644
--- a/clang/test/Analysis/lifetime-extended-regions.cpp
+++ b/clang/test/Analysis/lifetime-extended-regions.cpp
@@ -120,11 +120,11 @@ void aggregateWithReferences() {
clang_analyzer_dump(viaReference); // expected-warning-re {{&lifetime_extended_object{RefAggregate, viaReference, S{{[0-9]+}}} }}
clang_analyzer_dump(viaReference.rx); // expected-warning-re {{&lifetime_extended_object{int, viaReference, S{{[0-9]+}}} }}
clang_analyzer_dump(viaReference.ry); // expected-warning-re {{&lifetime_extended_object{Composite, viaReference, S{{[0-9]+}}} }}
-
- // The lifetime lifetime of object bound to reference members of aggregates,
- // that are created from default member initializer was extended.
- RefAggregate defaultInitExtended{i};
- clang_analyzer_dump(defaultInitExtended.ry); // expected-warning-re {{&lifetime_extended_object{Composite, defaultInitExtended, S{{[0-9]+}}} }}
+
+ // clang does not currently implement extending lifetime of object bound to reference members of aggregates,
+ // that are created from default member initializer (see `warn_unsupported_lifetime_extension` from `-Wdangling`)
+ RefAggregate defaultInitExtended{i}; // clang-bug does not extend `Composite`
+ clang_analyzer_dump(defaultInitExtended.ry); // expected-warning {{Unknown }}
}
void lambda() {
diff --git a/clang/test/CXX/drs/cwg16xx.cpp b/clang/test/CXX/drs/cwg16xx.cpp
index 82ef871939d2..cf6b45ceabf2 100644
--- a/clang/test/CXX/drs/cwg16xx.cpp
+++ b/clang/test/CXX/drs/cwg16xx.cpp
@@ -483,6 +483,8 @@ namespace cwg1696 { // cwg1696: 7
const A &a = A(); // #cwg1696-D1-a
};
D1 d1 = {}; // #cwg1696-d1
+ // since-cxx14-warning@-1 {{lifetime extension of temporary created by aggregate initialization using a default member initializer is not yet supported; lifetime of temporary will end at the end of the full-expression}}
+ // since-cxx14-note@#cwg1696-D1-a {{initializing field 'a' with default member initializer}}
struct D2 {
const A &a = A(); // #cwg1696-D2-a
diff --git a/clang/test/CXX/drs/cwg18xx.cpp b/clang/test/CXX/drs/cwg18xx.cpp
index 054ce5a4f4b7..323e56f9c527 100644
--- a/clang/test/CXX/drs/cwg18xx.cpp
+++ b/clang/test/CXX/drs/cwg18xx.cpp
@@ -206,28 +206,19 @@ namespace cwg1814 { // cwg1814: yes
#endif
}
-namespace cwg1815 { // cwg1815: 19
+namespace cwg1815 { // cwg1815: no
#if __cplusplus >= 201402L
- struct A { int &&r = 0; };
+ // FIXME: needs codegen test
+ struct A { int &&r = 0; }; // #cwg1815-A
A a = {};
+ // since-cxx14-warning@-1 {{lifetime extension of temporary created by aggregate initialization using a default member initializer is not yet supported; lifetime of temporary will end at the end of the full-expression}} FIXME
+ // since-cxx14-note@#cwg1815-A {{initializing field 'r' with default member initializer}}
struct B { int &&r = 0; }; // #cwg1815-B
// since-cxx14-error@-1 {{reference member 'r' binds to a temporary object whose lifetime would be shorter than the lifetime of the constructed object}}
// since-cxx14-note@#cwg1815-B {{initializing field 'r' with default member initializer}}
// since-cxx14-note@#cwg1815-b {{in implicit default constructor for 'cwg1815::B' first required here}}
B b; // #cwg1815-b
-
-#if __cplusplus >= 201703L
- struct C { const int &r = 0; };
- constexpr C c = {}; // OK, since cwg1815
- static_assert(c.r == 0);
-
- constexpr int f() {
- A a = {}; // OK, since cwg1815
- return a.r;
- }
- static_assert(f() == 0);
-#endif
#endif
}
diff --git a/clang/test/CXX/special/class.temporary/p6.cpp b/clang/test/CXX/special/class.temporary/p6.cpp
index a6d2adfd1fd2..5554363cc69a 100644
--- a/clang/test/CXX/special/class.temporary/p6.cpp
+++ b/clang/test/CXX/special/class.temporary/p6.cpp
@@ -269,40 +269,6 @@ void init_capture_init_list() {
// CHECK: }
}
-void check_dr1815() { // dr1815: yes
-#if __cplusplus >= 201402L
-
- struct A {
- int &&r = 0;
- ~A() {}
- };
-
- struct B {
- A &&a = A{};
- ~B() {}
- };
- B a = {};
-
- // CHECK: call {{.*}}block_scope_begin_function
- extern void block_scope_begin_function();
- extern void block_scope_end_function();
- block_scope_begin_function();
- {
- // CHECK: call void @_ZZ12check_dr1815vEN1BD1Ev
- // CHECK: call void @_ZZ12check_dr1815vEN1AD1Ev
- B b = {};
- }
- // CHECK: call {{.*}}block_scope_end_function
- block_scope_end_function();
-
- // CHECK: call {{.*}}some_other_function
- extern void some_other_function();
- some_other_function();
- // CHECK: call void @_ZZ12check_dr1815vEN1BD1Ev
- // CHECK: call void @_ZZ12check_dr1815vEN1AD1Ev
-#endif
-}
-
namespace P2718R0 {
namespace basic {
template <typename E> using T2 = std::list<E>;
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vcpopv.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vcpopv.c
index 13748be1acc1..b87b225b632a 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vcpopv.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vcpopv.c
@@ -16,399 +16,399 @@
#include <riscv_vector.h>
-// CHECK-LABEL: @test_vcpopv_v_u8mf8(
+// CHECK-LABEL: @test_vcpop_v_u8mf8(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 1 x i8> @llvm.riscv.vcpopv.nxv1i8.i64(<vscale x 1 x i8> poison, <vscale x 1 x i8> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 1 x i8> [[TMP0]]
//
-vuint8mf8_t test_vcpopv_v_u8mf8(vuint8mf8_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u8mf8(vs2, vl);
+vuint8mf8_t test_vcpop_v_u8mf8(vuint8mf8_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u8mf8(vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u8mf4(
+// CHECK-LABEL: @test_vcpop_v_u8mf4(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i8> @llvm.riscv.vcpopv.nxv2i8.i64(<vscale x 2 x i8> poison, <vscale x 2 x i8> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 2 x i8> [[TMP0]]
//
-vuint8mf4_t test_vcpopv_v_u8mf4(vuint8mf4_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u8mf4(vs2, vl);
+vuint8mf4_t test_vcpop_v_u8mf4(vuint8mf4_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u8mf4(vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u8mf2(
+// CHECK-LABEL: @test_vcpop_v_u8mf2(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i8> @llvm.riscv.vcpopv.nxv4i8.i64(<vscale x 4 x i8> poison, <vscale x 4 x i8> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 4 x i8> [[TMP0]]
//
-vuint8mf2_t test_vcpopv_v_u8mf2(vuint8mf2_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u8mf2(vs2, vl);
+vuint8mf2_t test_vcpop_v_u8mf2(vuint8mf2_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u8mf2(vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u8m1(
+// CHECK-LABEL: @test_vcpop_v_u8m1(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 8 x i8> @llvm.riscv.vcpopv.nxv8i8.i64(<vscale x 8 x i8> poison, <vscale x 8 x i8> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 8 x i8> [[TMP0]]
//
-vuint8m1_t test_vcpopv_v_u8m1(vuint8m1_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u8m1(vs2, vl);
+vuint8m1_t test_vcpop_v_u8m1(vuint8m1_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u8m1(vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u8m2(
+// CHECK-LABEL: @test_vcpop_v_u8m2(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 16 x i8> @llvm.riscv.vcpopv.nxv16i8.i64(<vscale x 16 x i8> poison, <vscale x 16 x i8> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
-vuint8m2_t test_vcpopv_v_u8m2(vuint8m2_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u8m2(vs2, vl);
+vuint8m2_t test_vcpop_v_u8m2(vuint8m2_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u8m2(vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u8m4(
+// CHECK-LABEL: @test_vcpop_v_u8m4(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 32 x i8> @llvm.riscv.vcpopv.nxv32i8.i64(<vscale x 32 x i8> poison, <vscale x 32 x i8> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 32 x i8> [[TMP0]]
//
-vuint8m4_t test_vcpopv_v_u8m4(vuint8m4_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u8m4(vs2, vl);
+vuint8m4_t test_vcpop_v_u8m4(vuint8m4_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u8m4(vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u8m8(
+// CHECK-LABEL: @test_vcpop_v_u8m8(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 64 x i8> @llvm.riscv.vcpopv.nxv64i8.i64(<vscale x 64 x i8> poison, <vscale x 64 x i8> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 64 x i8> [[TMP0]]
//
-vuint8m8_t test_vcpopv_v_u8m8(vuint8m8_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u8m8(vs2, vl);
+vuint8m8_t test_vcpop_v_u8m8(vuint8m8_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u8m8(vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u16mf4(
+// CHECK-LABEL: @test_vcpop_v_u16mf4(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vcpopv.nxv1i16.i64(<vscale x 1 x i16> poison, <vscale x 1 x i16> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 1 x i16> [[TMP0]]
//
-vuint16mf4_t test_vcpopv_v_u16mf4(vuint16mf4_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u16mf4(vs2, vl);
+vuint16mf4_t test_vcpop_v_u16mf4(vuint16mf4_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u16mf4(vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u16mf2(
+// CHECK-LABEL: @test_vcpop_v_u16mf2(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vcpopv.nxv2i16.i64(<vscale x 2 x i16> poison, <vscale x 2 x i16> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 2 x i16> [[TMP0]]
//
-vuint16mf2_t test_vcpopv_v_u16mf2(vuint16mf2_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u16mf2(vs2, vl);
+vuint16mf2_t test_vcpop_v_u16mf2(vuint16mf2_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u16mf2(vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u16m1(
+// CHECK-LABEL: @test_vcpop_v_u16m1(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vcpopv.nxv4i16.i64(<vscale x 4 x i16> poison, <vscale x 4 x i16> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 4 x i16> [[TMP0]]
//
-vuint16m1_t test_vcpopv_v_u16m1(vuint16m1_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u16m1(vs2, vl);
+vuint16m1_t test_vcpop_v_u16m1(vuint16m1_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u16m1(vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u16m2(
+// CHECK-LABEL: @test_vcpop_v_u16m2(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vcpopv.nxv8i16.i64(<vscale x 8 x i16> poison, <vscale x 8 x i16> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]]
//
-vuint16m2_t test_vcpopv_v_u16m2(vuint16m2_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u16m2(vs2, vl);
+vuint16m2_t test_vcpop_v_u16m2(vuint16m2_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u16m2(vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u16m4(
+// CHECK-LABEL: @test_vcpop_v_u16m4(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 16 x i16> @llvm.riscv.vcpopv.nxv16i16.i64(<vscale x 16 x i16> poison, <vscale x 16 x i16> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 16 x i16> [[TMP0]]
//
-vuint16m4_t test_vcpopv_v_u16m4(vuint16m4_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u16m4(vs2, vl);
+vuint16m4_t test_vcpop_v_u16m4(vuint16m4_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u16m4(vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u16m8(
+// CHECK-LABEL: @test_vcpop_v_u16m8(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 32 x i16> @llvm.riscv.vcpopv.nxv32i16.i64(<vscale x 32 x i16> poison, <vscale x 32 x i16> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 32 x i16> [[TMP0]]
//
-vuint16m8_t test_vcpopv_v_u16m8(vuint16m8_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u16m8(vs2, vl);
+vuint16m8_t test_vcpop_v_u16m8(vuint16m8_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u16m8(vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u32mf2(
+// CHECK-LABEL: @test_vcpop_v_u32mf2(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vcpopv.nxv1i32.i64(<vscale x 1 x i32> poison, <vscale x 1 x i32> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 1 x i32> [[TMP0]]
//
-vuint32mf2_t test_vcpopv_v_u32mf2(vuint32mf2_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u32mf2(vs2, vl);
+vuint32mf2_t test_vcpop_v_u32mf2(vuint32mf2_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u32mf2(vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u32m1(
+// CHECK-LABEL: @test_vcpop_v_u32m1(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vcpopv.nxv2i32.i64(<vscale x 2 x i32> poison, <vscale x 2 x i32> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 2 x i32> [[TMP0]]
//
-vuint32m1_t test_vcpopv_v_u32m1(vuint32m1_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u32m1(vs2, vl);
+vuint32m1_t test_vcpop_v_u32m1(vuint32m1_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u32m1(vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u32m2(
+// CHECK-LABEL: @test_vcpop_v_u32m2(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vcpopv.nxv4i32.i64(<vscale x 4 x i32> poison, <vscale x 4 x i32> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]]
//
-vuint32m2_t test_vcpopv_v_u32m2(vuint32m2_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u32m2(vs2, vl);
+vuint32m2_t test_vcpop_v_u32m2(vuint32m2_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u32m2(vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u32m4(
+// CHECK-LABEL: @test_vcpop_v_u32m4(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vcpopv.nxv8i32.i64(<vscale x 8 x i32> poison, <vscale x 8 x i32> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 8 x i32> [[TMP0]]
//
-vuint32m4_t test_vcpopv_v_u32m4(vuint32m4_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u32m4(vs2, vl);
+vuint32m4_t test_vcpop_v_u32m4(vuint32m4_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u32m4(vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u32m8(
+// CHECK-LABEL: @test_vcpop_v_u32m8(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vcpopv.nxv16i32.i64(<vscale x 16 x i32> poison, <vscale x 16 x i32> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 16 x i32> [[TMP0]]
//
-vuint32m8_t test_vcpopv_v_u32m8(vuint32m8_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u32m8(vs2, vl);
+vuint32m8_t test_vcpop_v_u32m8(vuint32m8_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u32m8(vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u64m1(
+// CHECK-LABEL: @test_vcpop_v_u64m1(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vcpopv.nxv1i64.i64(<vscale x 1 x i64> poison, <vscale x 1 x i64> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 1 x i64> [[TMP0]]
//
-vuint64m1_t test_vcpopv_v_u64m1(vuint64m1_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u64m1(vs2, vl);
+vuint64m1_t test_vcpop_v_u64m1(vuint64m1_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u64m1(vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u64m2(
+// CHECK-LABEL: @test_vcpop_v_u64m2(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vcpopv.nxv2i64.i64(<vscale x 2 x i64> poison, <vscale x 2 x i64> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP0]]
//
-vuint64m2_t test_vcpopv_v_u64m2(vuint64m2_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u64m2(vs2, vl);
+vuint64m2_t test_vcpop_v_u64m2(vuint64m2_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u64m2(vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u64m4(
+// CHECK-LABEL: @test_vcpop_v_u64m4(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vcpopv.nxv4i64.i64(<vscale x 4 x i64> poison, <vscale x 4 x i64> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 4 x i64> [[TMP0]]
//
-vuint64m4_t test_vcpopv_v_u64m4(vuint64m4_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u64m4(vs2, vl);
+vuint64m4_t test_vcpop_v_u64m4(vuint64m4_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u64m4(vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u64m8(
+// CHECK-LABEL: @test_vcpop_v_u64m8(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vcpopv.nxv8i64.i64(<vscale x 8 x i64> poison, <vscale x 8 x i64> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 8 x i64> [[TMP0]]
//
-vuint64m8_t test_vcpopv_v_u64m8(vuint64m8_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u64m8(vs2, vl);
+vuint64m8_t test_vcpop_v_u64m8(vuint64m8_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u64m8(vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u8mf8_m(
+// CHECK-LABEL: @test_vcpop_v_u8mf8_m(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 1 x i8> @llvm.riscv.vcpopv.mask.nxv1i8.i64(<vscale x 1 x i8> poison, <vscale x 1 x i8> [[VS2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 3)
// CHECK-NEXT: ret <vscale x 1 x i8> [[TMP0]]
//
-vuint8mf8_t test_vcpopv_v_u8mf8_m(vbool64_t mask, vuint8mf8_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u8mf8_m(mask, vs2, vl);
+vuint8mf8_t test_vcpop_v_u8mf8_m(vbool64_t mask, vuint8mf8_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u8mf8_m(mask, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u8mf4_m(
+// CHECK-LABEL: @test_vcpop_v_u8mf4_m(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i8> @llvm.riscv.vcpopv.mask.nxv2i8.i64(<vscale x 2 x i8> poison, <vscale x 2 x i8> [[VS2:%.*]], <vscale x 2 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 3)
// CHECK-NEXT: ret <vscale x 2 x i8> [[TMP0]]
//
-vuint8mf4_t test_vcpopv_v_u8mf4_m(vbool32_t mask, vuint8mf4_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u8mf4_m(mask, vs2, vl);
+vuint8mf4_t test_vcpop_v_u8mf4_m(vbool32_t mask, vuint8mf4_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u8mf4_m(mask, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u8mf2_m(
+// CHECK-LABEL: @test_vcpop_v_u8mf2_m(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i8> @llvm.riscv.vcpopv.mask.nxv4i8.i64(<vscale x 4 x i8> poison, <vscale x 4 x i8> [[VS2:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 3)
// CHECK-NEXT: ret <vscale x 4 x i8> [[TMP0]]
//
-vuint8mf2_t test_vcpopv_v_u8mf2_m(vbool16_t mask, vuint8mf2_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u8mf2_m(mask, vs2, vl);
+vuint8mf2_t test_vcpop_v_u8mf2_m(vbool16_t mask, vuint8mf2_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u8mf2_m(mask, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u8m1_m(
+// CHECK-LABEL: @test_vcpop_v_u8m1_m(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 8 x i8> @llvm.riscv.vcpopv.mask.nxv8i8.i64(<vscale x 8 x i8> poison, <vscale x 8 x i8> [[VS2:%.*]], <vscale x 8 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 3)
// CHECK-NEXT: ret <vscale x 8 x i8> [[TMP0]]
//
-vuint8m1_t test_vcpopv_v_u8m1_m(vbool8_t mask, vuint8m1_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u8m1_m(mask, vs2, vl);
+vuint8m1_t test_vcpop_v_u8m1_m(vbool8_t mask, vuint8m1_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u8m1_m(mask, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u8m2_m(
+// CHECK-LABEL: @test_vcpop_v_u8m2_m(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 16 x i8> @llvm.riscv.vcpopv.mask.nxv16i8.i64(<vscale x 16 x i8> poison, <vscale x 16 x i8> [[VS2:%.*]], <vscale x 16 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 3)
// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
-vuint8m2_t test_vcpopv_v_u8m2_m(vbool4_t mask, vuint8m2_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u8m2_m(mask, vs2, vl);
+vuint8m2_t test_vcpop_v_u8m2_m(vbool4_t mask, vuint8m2_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u8m2_m(mask, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u8m4_m(
+// CHECK-LABEL: @test_vcpop_v_u8m4_m(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 32 x i8> @llvm.riscv.vcpopv.mask.nxv32i8.i64(<vscale x 32 x i8> poison, <vscale x 32 x i8> [[VS2:%.*]], <vscale x 32 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 3)
// CHECK-NEXT: ret <vscale x 32 x i8> [[TMP0]]
//
-vuint8m4_t test_vcpopv_v_u8m4_m(vbool2_t mask, vuint8m4_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u8m4_m(mask, vs2, vl);
+vuint8m4_t test_vcpop_v_u8m4_m(vbool2_t mask, vuint8m4_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u8m4_m(mask, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u8m8_m(
+// CHECK-LABEL: @test_vcpop_v_u8m8_m(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 64 x i8> @llvm.riscv.vcpopv.mask.nxv64i8.i64(<vscale x 64 x i8> poison, <vscale x 64 x i8> [[VS2:%.*]], <vscale x 64 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 3)
// CHECK-NEXT: ret <vscale x 64 x i8> [[TMP0]]
//
-vuint8m8_t test_vcpopv_v_u8m8_m(vbool1_t mask, vuint8m8_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u8m8_m(mask, vs2, vl);
+vuint8m8_t test_vcpop_v_u8m8_m(vbool1_t mask, vuint8m8_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u8m8_m(mask, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u16mf4_m(
+// CHECK-LABEL: @test_vcpop_v_u16mf4_m(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vcpopv.mask.nxv1i16.i64(<vscale x 1 x i16> poison, <vscale x 1 x i16> [[VS2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 3)
// CHECK-NEXT: ret <vscale x 1 x i16> [[TMP0]]
//
-vuint16mf4_t test_vcpopv_v_u16mf4_m(vbool64_t mask, vuint16mf4_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u16mf4_m(mask, vs2, vl);
+vuint16mf4_t test_vcpop_v_u16mf4_m(vbool64_t mask, vuint16mf4_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u16mf4_m(mask, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u16mf2_m(
+// CHECK-LABEL: @test_vcpop_v_u16mf2_m(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vcpopv.mask.nxv2i16.i64(<vscale x 2 x i16> poison, <vscale x 2 x i16> [[VS2:%.*]], <vscale x 2 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 3)
// CHECK-NEXT: ret <vscale x 2 x i16> [[TMP0]]
//
-vuint16mf2_t test_vcpopv_v_u16mf2_m(vbool32_t mask, vuint16mf2_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u16mf2_m(mask, vs2, vl);
+vuint16mf2_t test_vcpop_v_u16mf2_m(vbool32_t mask, vuint16mf2_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u16mf2_m(mask, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u16m1_m(
+// CHECK-LABEL: @test_vcpop_v_u16m1_m(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vcpopv.mask.nxv4i16.i64(<vscale x 4 x i16> poison, <vscale x 4 x i16> [[VS2:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 3)
// CHECK-NEXT: ret <vscale x 4 x i16> [[TMP0]]
//
-vuint16m1_t test_vcpopv_v_u16m1_m(vbool16_t mask, vuint16m1_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u16m1_m(mask, vs2, vl);
+vuint16m1_t test_vcpop_v_u16m1_m(vbool16_t mask, vuint16m1_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u16m1_m(mask, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u16m2_m(
+// CHECK-LABEL: @test_vcpop_v_u16m2_m(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vcpopv.mask.nxv8i16.i64(<vscale x 8 x i16> poison, <vscale x 8 x i16> [[VS2:%.*]], <vscale x 8 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 3)
// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]]
//
-vuint16m2_t test_vcpopv_v_u16m2_m(vbool8_t mask, vuint16m2_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u16m2_m(mask, vs2, vl);
+vuint16m2_t test_vcpop_v_u16m2_m(vbool8_t mask, vuint16m2_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u16m2_m(mask, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u16m4_m(
+// CHECK-LABEL: @test_vcpop_v_u16m4_m(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 16 x i16> @llvm.riscv.vcpopv.mask.nxv16i16.i64(<vscale x 16 x i16> poison, <vscale x 16 x i16> [[VS2:%.*]], <vscale x 16 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 3)
// CHECK-NEXT: ret <vscale x 16 x i16> [[TMP0]]
//
-vuint16m4_t test_vcpopv_v_u16m4_m(vbool4_t mask, vuint16m4_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u16m4_m(mask, vs2, vl);
+vuint16m4_t test_vcpop_v_u16m4_m(vbool4_t mask, vuint16m4_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u16m4_m(mask, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u16m8_m(
+// CHECK-LABEL: @test_vcpop_v_u16m8_m(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 32 x i16> @llvm.riscv.vcpopv.mask.nxv32i16.i64(<vscale x 32 x i16> poison, <vscale x 32 x i16> [[VS2:%.*]], <vscale x 32 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 3)
// CHECK-NEXT: ret <vscale x 32 x i16> [[TMP0]]
//
-vuint16m8_t test_vcpopv_v_u16m8_m(vbool2_t mask, vuint16m8_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u16m8_m(mask, vs2, vl);
+vuint16m8_t test_vcpop_v_u16m8_m(vbool2_t mask, vuint16m8_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u16m8_m(mask, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u32mf2_m(
+// CHECK-LABEL: @test_vcpop_v_u32mf2_m(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vcpopv.mask.nxv1i32.i64(<vscale x 1 x i32> poison, <vscale x 1 x i32> [[VS2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 3)
// CHECK-NEXT: ret <vscale x 1 x i32> [[TMP0]]
//
-vuint32mf2_t test_vcpopv_v_u32mf2_m(vbool64_t mask, vuint32mf2_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u32mf2_m(mask, vs2, vl);
+vuint32mf2_t test_vcpop_v_u32mf2_m(vbool64_t mask, vuint32mf2_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u32mf2_m(mask, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u32m1_m(
+// CHECK-LABEL: @test_vcpop_v_u32m1_m(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vcpopv.mask.nxv2i32.i64(<vscale x 2 x i32> poison, <vscale x 2 x i32> [[VS2:%.*]], <vscale x 2 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 3)
// CHECK-NEXT: ret <vscale x 2 x i32> [[TMP0]]
//
-vuint32m1_t test_vcpopv_v_u32m1_m(vbool32_t mask, vuint32m1_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u32m1_m(mask, vs2, vl);
+vuint32m1_t test_vcpop_v_u32m1_m(vbool32_t mask, vuint32m1_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u32m1_m(mask, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u32m2_m(
+// CHECK-LABEL: @test_vcpop_v_u32m2_m(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vcpopv.mask.nxv4i32.i64(<vscale x 4 x i32> poison, <vscale x 4 x i32> [[VS2:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 3)
// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]]
//
-vuint32m2_t test_vcpopv_v_u32m2_m(vbool16_t mask, vuint32m2_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u32m2_m(mask, vs2, vl);
+vuint32m2_t test_vcpop_v_u32m2_m(vbool16_t mask, vuint32m2_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u32m2_m(mask, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u32m4_m(
+// CHECK-LABEL: @test_vcpop_v_u32m4_m(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vcpopv.mask.nxv8i32.i64(<vscale x 8 x i32> poison, <vscale x 8 x i32> [[VS2:%.*]], <vscale x 8 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 3)
// CHECK-NEXT: ret <vscale x 8 x i32> [[TMP0]]
//
-vuint32m4_t test_vcpopv_v_u32m4_m(vbool8_t mask, vuint32m4_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u32m4_m(mask, vs2, vl);
+vuint32m4_t test_vcpop_v_u32m4_m(vbool8_t mask, vuint32m4_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u32m4_m(mask, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u32m8_m(
+// CHECK-LABEL: @test_vcpop_v_u32m8_m(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vcpopv.mask.nxv16i32.i64(<vscale x 16 x i32> poison, <vscale x 16 x i32> [[VS2:%.*]], <vscale x 16 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 3)
// CHECK-NEXT: ret <vscale x 16 x i32> [[TMP0]]
//
-vuint32m8_t test_vcpopv_v_u32m8_m(vbool4_t mask, vuint32m8_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u32m8_m(mask, vs2, vl);
+vuint32m8_t test_vcpop_v_u32m8_m(vbool4_t mask, vuint32m8_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u32m8_m(mask, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u64m1_m(
+// CHECK-LABEL: @test_vcpop_v_u64m1_m(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vcpopv.mask.nxv1i64.i64(<vscale x 1 x i64> poison, <vscale x 1 x i64> [[VS2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 3)
// CHECK-NEXT: ret <vscale x 1 x i64> [[TMP0]]
//
-vuint64m1_t test_vcpopv_v_u64m1_m(vbool64_t mask, vuint64m1_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u64m1_m(mask, vs2, vl);
+vuint64m1_t test_vcpop_v_u64m1_m(vbool64_t mask, vuint64m1_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u64m1_m(mask, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u64m2_m(
+// CHECK-LABEL: @test_vcpop_v_u64m2_m(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vcpopv.mask.nxv2i64.i64(<vscale x 2 x i64> poison, <vscale x 2 x i64> [[VS2:%.*]], <vscale x 2 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 3)
// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP0]]
//
-vuint64m2_t test_vcpopv_v_u64m2_m(vbool32_t mask, vuint64m2_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u64m2_m(mask, vs2, vl);
+vuint64m2_t test_vcpop_v_u64m2_m(vbool32_t mask, vuint64m2_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u64m2_m(mask, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u64m4_m(
+// CHECK-LABEL: @test_vcpop_v_u64m4_m(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vcpopv.mask.nxv4i64.i64(<vscale x 4 x i64> poison, <vscale x 4 x i64> [[VS2:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 3)
// CHECK-NEXT: ret <vscale x 4 x i64> [[TMP0]]
//
-vuint64m4_t test_vcpopv_v_u64m4_m(vbool16_t mask, vuint64m4_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u64m4_m(mask, vs2, vl);
+vuint64m4_t test_vcpop_v_u64m4_m(vbool16_t mask, vuint64m4_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u64m4_m(mask, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u64m8_m(
+// CHECK-LABEL: @test_vcpop_v_u64m8_m(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vcpopv.mask.nxv8i64.i64(<vscale x 8 x i64> poison, <vscale x 8 x i64> [[VS2:%.*]], <vscale x 8 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 3)
// CHECK-NEXT: ret <vscale x 8 x i64> [[TMP0]]
//
-vuint64m8_t test_vcpopv_v_u64m8_m(vbool8_t mask, vuint64m8_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u64m8_m(mask, vs2, vl);
+vuint64m8_t test_vcpop_v_u64m8_m(vbool8_t mask, vuint64m8_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u64m8_m(mask, vs2, vl);
}
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vcpopv.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vcpopv.c
index adb0ac9ee5d7..5625b19f57f3 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vcpopv.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vcpopv.c
@@ -16,399 +16,399 @@
#include <riscv_vector.h>
-// CHECK-LABEL: @test_vcpopv_v_u8mf8(
+// CHECK-LABEL: @test_vcpop_v_u8mf8(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 1 x i8> @llvm.riscv.vcpopv.nxv1i8.i64(<vscale x 1 x i8> poison, <vscale x 1 x i8> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 1 x i8> [[TMP0]]
//
-vuint8mf8_t test_vcpopv_v_u8mf8(vuint8mf8_t vs2, size_t vl) {
- return __riscv_vcpopv(vs2, vl);
+vuint8mf8_t test_vcpop_v_u8mf8(vuint8mf8_t vs2, size_t vl) {
+ return __riscv_vcpop(vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u8mf4(
+// CHECK-LABEL: @test_vcpop_v_u8mf4(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i8> @llvm.riscv.vcpopv.nxv2i8.i64(<vscale x 2 x i8> poison, <vscale x 2 x i8> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 2 x i8> [[TMP0]]
//
-vuint8mf4_t test_vcpopv_v_u8mf4(vuint8mf4_t vs2, size_t vl) {
- return __riscv_vcpopv(vs2, vl);
+vuint8mf4_t test_vcpop_v_u8mf4(vuint8mf4_t vs2, size_t vl) {
+ return __riscv_vcpop(vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u8mf2(
+// CHECK-LABEL: @test_vcpop_v_u8mf2(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i8> @llvm.riscv.vcpopv.nxv4i8.i64(<vscale x 4 x i8> poison, <vscale x 4 x i8> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 4 x i8> [[TMP0]]
//
-vuint8mf2_t test_vcpopv_v_u8mf2(vuint8mf2_t vs2, size_t vl) {
- return __riscv_vcpopv(vs2, vl);
+vuint8mf2_t test_vcpop_v_u8mf2(vuint8mf2_t vs2, size_t vl) {
+ return __riscv_vcpop(vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u8m1(
+// CHECK-LABEL: @test_vcpop_v_u8m1(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 8 x i8> @llvm.riscv.vcpopv.nxv8i8.i64(<vscale x 8 x i8> poison, <vscale x 8 x i8> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 8 x i8> [[TMP0]]
//
-vuint8m1_t test_vcpopv_v_u8m1(vuint8m1_t vs2, size_t vl) {
- return __riscv_vcpopv(vs2, vl);
+vuint8m1_t test_vcpop_v_u8m1(vuint8m1_t vs2, size_t vl) {
+ return __riscv_vcpop(vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u8m2(
+// CHECK-LABEL: @test_vcpop_v_u8m2(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 16 x i8> @llvm.riscv.vcpopv.nxv16i8.i64(<vscale x 16 x i8> poison, <vscale x 16 x i8> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
-vuint8m2_t test_vcpopv_v_u8m2(vuint8m2_t vs2, size_t vl) {
- return __riscv_vcpopv(vs2, vl);
+vuint8m2_t test_vcpop_v_u8m2(vuint8m2_t vs2, size_t vl) {
+ return __riscv_vcpop(vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u8m4(
+// CHECK-LABEL: @test_vcpop_v_u8m4(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 32 x i8> @llvm.riscv.vcpopv.nxv32i8.i64(<vscale x 32 x i8> poison, <vscale x 32 x i8> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 32 x i8> [[TMP0]]
//
-vuint8m4_t test_vcpopv_v_u8m4(vuint8m4_t vs2, size_t vl) {
- return __riscv_vcpopv(vs2, vl);
+vuint8m4_t test_vcpop_v_u8m4(vuint8m4_t vs2, size_t vl) {
+ return __riscv_vcpop(vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u8m8(
+// CHECK-LABEL: @test_vcpop_v_u8m8(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 64 x i8> @llvm.riscv.vcpopv.nxv64i8.i64(<vscale x 64 x i8> poison, <vscale x 64 x i8> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 64 x i8> [[TMP0]]
//
-vuint8m8_t test_vcpopv_v_u8m8(vuint8m8_t vs2, size_t vl) {
- return __riscv_vcpopv(vs2, vl);
+vuint8m8_t test_vcpop_v_u8m8(vuint8m8_t vs2, size_t vl) {
+ return __riscv_vcpop(vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u16mf4(
+// CHECK-LABEL: @test_vcpop_v_u16mf4(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vcpopv.nxv1i16.i64(<vscale x 1 x i16> poison, <vscale x 1 x i16> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 1 x i16> [[TMP0]]
//
-vuint16mf4_t test_vcpopv_v_u16mf4(vuint16mf4_t vs2, size_t vl) {
- return __riscv_vcpopv(vs2, vl);
+vuint16mf4_t test_vcpop_v_u16mf4(vuint16mf4_t vs2, size_t vl) {
+ return __riscv_vcpop(vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u16mf2(
+// CHECK-LABEL: @test_vcpop_v_u16mf2(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vcpopv.nxv2i16.i64(<vscale x 2 x i16> poison, <vscale x 2 x i16> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 2 x i16> [[TMP0]]
//
-vuint16mf2_t test_vcpopv_v_u16mf2(vuint16mf2_t vs2, size_t vl) {
- return __riscv_vcpopv(vs2, vl);
+vuint16mf2_t test_vcpop_v_u16mf2(vuint16mf2_t vs2, size_t vl) {
+ return __riscv_vcpop(vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u16m1(
+// CHECK-LABEL: @test_vcpop_v_u16m1(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vcpopv.nxv4i16.i64(<vscale x 4 x i16> poison, <vscale x 4 x i16> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 4 x i16> [[TMP0]]
//
-vuint16m1_t test_vcpopv_v_u16m1(vuint16m1_t vs2, size_t vl) {
- return __riscv_vcpopv(vs2, vl);
+vuint16m1_t test_vcpop_v_u16m1(vuint16m1_t vs2, size_t vl) {
+ return __riscv_vcpop(vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u16m2(
+// CHECK-LABEL: @test_vcpop_v_u16m2(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vcpopv.nxv8i16.i64(<vscale x 8 x i16> poison, <vscale x 8 x i16> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]]
//
-vuint16m2_t test_vcpopv_v_u16m2(vuint16m2_t vs2, size_t vl) {
- return __riscv_vcpopv(vs2, vl);
+vuint16m2_t test_vcpop_v_u16m2(vuint16m2_t vs2, size_t vl) {
+ return __riscv_vcpop(vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u16m4(
+// CHECK-LABEL: @test_vcpop_v_u16m4(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 16 x i16> @llvm.riscv.vcpopv.nxv16i16.i64(<vscale x 16 x i16> poison, <vscale x 16 x i16> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 16 x i16> [[TMP0]]
//
-vuint16m4_t test_vcpopv_v_u16m4(vuint16m4_t vs2, size_t vl) {
- return __riscv_vcpopv(vs2, vl);
+vuint16m4_t test_vcpop_v_u16m4(vuint16m4_t vs2, size_t vl) {
+ return __riscv_vcpop(vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u16m8(
+// CHECK-LABEL: @test_vcpop_v_u16m8(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 32 x i16> @llvm.riscv.vcpopv.nxv32i16.i64(<vscale x 32 x i16> poison, <vscale x 32 x i16> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 32 x i16> [[TMP0]]
//
-vuint16m8_t test_vcpopv_v_u16m8(vuint16m8_t vs2, size_t vl) {
- return __riscv_vcpopv(vs2, vl);
+vuint16m8_t test_vcpop_v_u16m8(vuint16m8_t vs2, size_t vl) {
+ return __riscv_vcpop(vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u32mf2(
+// CHECK-LABEL: @test_vcpop_v_u32mf2(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vcpopv.nxv1i32.i64(<vscale x 1 x i32> poison, <vscale x 1 x i32> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 1 x i32> [[TMP0]]
//
-vuint32mf2_t test_vcpopv_v_u32mf2(vuint32mf2_t vs2, size_t vl) {
- return __riscv_vcpopv(vs2, vl);
+vuint32mf2_t test_vcpop_v_u32mf2(vuint32mf2_t vs2, size_t vl) {
+ return __riscv_vcpop(vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u32m1(
+// CHECK-LABEL: @test_vcpop_v_u32m1(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vcpopv.nxv2i32.i64(<vscale x 2 x i32> poison, <vscale x 2 x i32> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 2 x i32> [[TMP0]]
//
-vuint32m1_t test_vcpopv_v_u32m1(vuint32m1_t vs2, size_t vl) {
- return __riscv_vcpopv(vs2, vl);
+vuint32m1_t test_vcpop_v_u32m1(vuint32m1_t vs2, size_t vl) {
+ return __riscv_vcpop(vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u32m2(
+// CHECK-LABEL: @test_vcpop_v_u32m2(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vcpopv.nxv4i32.i64(<vscale x 4 x i32> poison, <vscale x 4 x i32> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]]
//
-vuint32m2_t test_vcpopv_v_u32m2(vuint32m2_t vs2, size_t vl) {
- return __riscv_vcpopv(vs2, vl);
+vuint32m2_t test_vcpop_v_u32m2(vuint32m2_t vs2, size_t vl) {
+ return __riscv_vcpop(vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u32m4(
+// CHECK-LABEL: @test_vcpop_v_u32m4(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vcpopv.nxv8i32.i64(<vscale x 8 x i32> poison, <vscale x 8 x i32> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 8 x i32> [[TMP0]]
//
-vuint32m4_t test_vcpopv_v_u32m4(vuint32m4_t vs2, size_t vl) {
- return __riscv_vcpopv(vs2, vl);
+vuint32m4_t test_vcpop_v_u32m4(vuint32m4_t vs2, size_t vl) {
+ return __riscv_vcpop(vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u32m8(
+// CHECK-LABEL: @test_vcpop_v_u32m8(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vcpopv.nxv16i32.i64(<vscale x 16 x i32> poison, <vscale x 16 x i32> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 16 x i32> [[TMP0]]
//
-vuint32m8_t test_vcpopv_v_u32m8(vuint32m8_t vs2, size_t vl) {
- return __riscv_vcpopv(vs2, vl);
+vuint32m8_t test_vcpop_v_u32m8(vuint32m8_t vs2, size_t vl) {
+ return __riscv_vcpop(vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u64m1(
+// CHECK-LABEL: @test_vcpop_v_u64m1(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vcpopv.nxv1i64.i64(<vscale x 1 x i64> poison, <vscale x 1 x i64> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 1 x i64> [[TMP0]]
//
-vuint64m1_t test_vcpopv_v_u64m1(vuint64m1_t vs2, size_t vl) {
- return __riscv_vcpopv(vs2, vl);
+vuint64m1_t test_vcpop_v_u64m1(vuint64m1_t vs2, size_t vl) {
+ return __riscv_vcpop(vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u64m2(
+// CHECK-LABEL: @test_vcpop_v_u64m2(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vcpopv.nxv2i64.i64(<vscale x 2 x i64> poison, <vscale x 2 x i64> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP0]]
//
-vuint64m2_t test_vcpopv_v_u64m2(vuint64m2_t vs2, size_t vl) {
- return __riscv_vcpopv(vs2, vl);
+vuint64m2_t test_vcpop_v_u64m2(vuint64m2_t vs2, size_t vl) {
+ return __riscv_vcpop(vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u64m4(
+// CHECK-LABEL: @test_vcpop_v_u64m4(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vcpopv.nxv4i64.i64(<vscale x 4 x i64> poison, <vscale x 4 x i64> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 4 x i64> [[TMP0]]
//
-vuint64m4_t test_vcpopv_v_u64m4(vuint64m4_t vs2, size_t vl) {
- return __riscv_vcpopv(vs2, vl);
+vuint64m4_t test_vcpop_v_u64m4(vuint64m4_t vs2, size_t vl) {
+ return __riscv_vcpop(vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u64m8(
+// CHECK-LABEL: @test_vcpop_v_u64m8(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vcpopv.nxv8i64.i64(<vscale x 8 x i64> poison, <vscale x 8 x i64> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 8 x i64> [[TMP0]]
//
-vuint64m8_t test_vcpopv_v_u64m8(vuint64m8_t vs2, size_t vl) {
- return __riscv_vcpopv(vs2, vl);
+vuint64m8_t test_vcpop_v_u64m8(vuint64m8_t vs2, size_t vl) {
+ return __riscv_vcpop(vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u8mf8_m(
+// CHECK-LABEL: @test_vcpop_v_u8mf8_m(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 1 x i8> @llvm.riscv.vcpopv.mask.nxv1i8.i64(<vscale x 1 x i8> poison, <vscale x 1 x i8> [[VS2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 3)
// CHECK-NEXT: ret <vscale x 1 x i8> [[TMP0]]
//
-vuint8mf8_t test_vcpopv_v_u8mf8_m(vbool64_t mask, vuint8mf8_t vs2, size_t vl) {
- return __riscv_vcpopv(mask, vs2, vl);
+vuint8mf8_t test_vcpop_v_u8mf8_m(vbool64_t mask, vuint8mf8_t vs2, size_t vl) {
+ return __riscv_vcpop(mask, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u8mf4_m(
+// CHECK-LABEL: @test_vcpop_v_u8mf4_m(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i8> @llvm.riscv.vcpopv.mask.nxv2i8.i64(<vscale x 2 x i8> poison, <vscale x 2 x i8> [[VS2:%.*]], <vscale x 2 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 3)
// CHECK-NEXT: ret <vscale x 2 x i8> [[TMP0]]
//
-vuint8mf4_t test_vcpopv_v_u8mf4_m(vbool32_t mask, vuint8mf4_t vs2, size_t vl) {
- return __riscv_vcpopv(mask, vs2, vl);
+vuint8mf4_t test_vcpop_v_u8mf4_m(vbool32_t mask, vuint8mf4_t vs2, size_t vl) {
+ return __riscv_vcpop(mask, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u8mf2_m(
+// CHECK-LABEL: @test_vcpop_v_u8mf2_m(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i8> @llvm.riscv.vcpopv.mask.nxv4i8.i64(<vscale x 4 x i8> poison, <vscale x 4 x i8> [[VS2:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 3)
// CHECK-NEXT: ret <vscale x 4 x i8> [[TMP0]]
//
-vuint8mf2_t test_vcpopv_v_u8mf2_m(vbool16_t mask, vuint8mf2_t vs2, size_t vl) {
- return __riscv_vcpopv(mask, vs2, vl);
+vuint8mf2_t test_vcpop_v_u8mf2_m(vbool16_t mask, vuint8mf2_t vs2, size_t vl) {
+ return __riscv_vcpop(mask, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u8m1_m(
+// CHECK-LABEL: @test_vcpop_v_u8m1_m(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 8 x i8> @llvm.riscv.vcpopv.mask.nxv8i8.i64(<vscale x 8 x i8> poison, <vscale x 8 x i8> [[VS2:%.*]], <vscale x 8 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 3)
// CHECK-NEXT: ret <vscale x 8 x i8> [[TMP0]]
//
-vuint8m1_t test_vcpopv_v_u8m1_m(vbool8_t mask, vuint8m1_t vs2, size_t vl) {
- return __riscv_vcpopv(mask, vs2, vl);
+vuint8m1_t test_vcpop_v_u8m1_m(vbool8_t mask, vuint8m1_t vs2, size_t vl) {
+ return __riscv_vcpop(mask, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u8m2_m(
+// CHECK-LABEL: @test_vcpop_v_u8m2_m(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 16 x i8> @llvm.riscv.vcpopv.mask.nxv16i8.i64(<vscale x 16 x i8> poison, <vscale x 16 x i8> [[VS2:%.*]], <vscale x 16 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 3)
// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
-vuint8m2_t test_vcpopv_v_u8m2_m(vbool4_t mask, vuint8m2_t vs2, size_t vl) {
- return __riscv_vcpopv(mask, vs2, vl);
+vuint8m2_t test_vcpop_v_u8m2_m(vbool4_t mask, vuint8m2_t vs2, size_t vl) {
+ return __riscv_vcpop(mask, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u8m4_m(
+// CHECK-LABEL: @test_vcpop_v_u8m4_m(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 32 x i8> @llvm.riscv.vcpopv.mask.nxv32i8.i64(<vscale x 32 x i8> poison, <vscale x 32 x i8> [[VS2:%.*]], <vscale x 32 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 3)
// CHECK-NEXT: ret <vscale x 32 x i8> [[TMP0]]
//
-vuint8m4_t test_vcpopv_v_u8m4_m(vbool2_t mask, vuint8m4_t vs2, size_t vl) {
- return __riscv_vcpopv(mask, vs2, vl);
+vuint8m4_t test_vcpop_v_u8m4_m(vbool2_t mask, vuint8m4_t vs2, size_t vl) {
+ return __riscv_vcpop(mask, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u8m8_m(
+// CHECK-LABEL: @test_vcpop_v_u8m8_m(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 64 x i8> @llvm.riscv.vcpopv.mask.nxv64i8.i64(<vscale x 64 x i8> poison, <vscale x 64 x i8> [[VS2:%.*]], <vscale x 64 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 3)
// CHECK-NEXT: ret <vscale x 64 x i8> [[TMP0]]
//
-vuint8m8_t test_vcpopv_v_u8m8_m(vbool1_t mask, vuint8m8_t vs2, size_t vl) {
- return __riscv_vcpopv(mask, vs2, vl);
+vuint8m8_t test_vcpop_v_u8m8_m(vbool1_t mask, vuint8m8_t vs2, size_t vl) {
+ return __riscv_vcpop(mask, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u16mf4_m(
+// CHECK-LABEL: @test_vcpop_v_u16mf4_m(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vcpopv.mask.nxv1i16.i64(<vscale x 1 x i16> poison, <vscale x 1 x i16> [[VS2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 3)
// CHECK-NEXT: ret <vscale x 1 x i16> [[TMP0]]
//
-vuint16mf4_t test_vcpopv_v_u16mf4_m(vbool64_t mask, vuint16mf4_t vs2, size_t vl) {
- return __riscv_vcpopv(mask, vs2, vl);
+vuint16mf4_t test_vcpop_v_u16mf4_m(vbool64_t mask, vuint16mf4_t vs2, size_t vl) {
+ return __riscv_vcpop(mask, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u16mf2_m(
+// CHECK-LABEL: @test_vcpop_v_u16mf2_m(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vcpopv.mask.nxv2i16.i64(<vscale x 2 x i16> poison, <vscale x 2 x i16> [[VS2:%.*]], <vscale x 2 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 3)
// CHECK-NEXT: ret <vscale x 2 x i16> [[TMP0]]
//
-vuint16mf2_t test_vcpopv_v_u16mf2_m(vbool32_t mask, vuint16mf2_t vs2, size_t vl) {
- return __riscv_vcpopv(mask, vs2, vl);
+vuint16mf2_t test_vcpop_v_u16mf2_m(vbool32_t mask, vuint16mf2_t vs2, size_t vl) {
+ return __riscv_vcpop(mask, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u16m1_m(
+// CHECK-LABEL: @test_vcpop_v_u16m1_m(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vcpopv.mask.nxv4i16.i64(<vscale x 4 x i16> poison, <vscale x 4 x i16> [[VS2:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 3)
// CHECK-NEXT: ret <vscale x 4 x i16> [[TMP0]]
//
-vuint16m1_t test_vcpopv_v_u16m1_m(vbool16_t mask, vuint16m1_t vs2, size_t vl) {
- return __riscv_vcpopv(mask, vs2, vl);
+vuint16m1_t test_vcpop_v_u16m1_m(vbool16_t mask, vuint16m1_t vs2, size_t vl) {
+ return __riscv_vcpop(mask, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u16m2_m(
+// CHECK-LABEL: @test_vcpop_v_u16m2_m(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vcpopv.mask.nxv8i16.i64(<vscale x 8 x i16> poison, <vscale x 8 x i16> [[VS2:%.*]], <vscale x 8 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 3)
// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]]
//
-vuint16m2_t test_vcpopv_v_u16m2_m(vbool8_t mask, vuint16m2_t vs2, size_t vl) {
- return __riscv_vcpopv(mask, vs2, vl);
+vuint16m2_t test_vcpop_v_u16m2_m(vbool8_t mask, vuint16m2_t vs2, size_t vl) {
+ return __riscv_vcpop(mask, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u16m4_m(
+// CHECK-LABEL: @test_vcpop_v_u16m4_m(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 16 x i16> @llvm.riscv.vcpopv.mask.nxv16i16.i64(<vscale x 16 x i16> poison, <vscale x 16 x i16> [[VS2:%.*]], <vscale x 16 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 3)
// CHECK-NEXT: ret <vscale x 16 x i16> [[TMP0]]
//
-vuint16m4_t test_vcpopv_v_u16m4_m(vbool4_t mask, vuint16m4_t vs2, size_t vl) {
- return __riscv_vcpopv(mask, vs2, vl);
+vuint16m4_t test_vcpop_v_u16m4_m(vbool4_t mask, vuint16m4_t vs2, size_t vl) {
+ return __riscv_vcpop(mask, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u16m8_m(
+// CHECK-LABEL: @test_vcpop_v_u16m8_m(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 32 x i16> @llvm.riscv.vcpopv.mask.nxv32i16.i64(<vscale x 32 x i16> poison, <vscale x 32 x i16> [[VS2:%.*]], <vscale x 32 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 3)
// CHECK-NEXT: ret <vscale x 32 x i16> [[TMP0]]
//
-vuint16m8_t test_vcpopv_v_u16m8_m(vbool2_t mask, vuint16m8_t vs2, size_t vl) {
- return __riscv_vcpopv(mask, vs2, vl);
+vuint16m8_t test_vcpop_v_u16m8_m(vbool2_t mask, vuint16m8_t vs2, size_t vl) {
+ return __riscv_vcpop(mask, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u32mf2_m(
+// CHECK-LABEL: @test_vcpop_v_u32mf2_m(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vcpopv.mask.nxv1i32.i64(<vscale x 1 x i32> poison, <vscale x 1 x i32> [[VS2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 3)
// CHECK-NEXT: ret <vscale x 1 x i32> [[TMP0]]
//
-vuint32mf2_t test_vcpopv_v_u32mf2_m(vbool64_t mask, vuint32mf2_t vs2, size_t vl) {
- return __riscv_vcpopv(mask, vs2, vl);
+vuint32mf2_t test_vcpop_v_u32mf2_m(vbool64_t mask, vuint32mf2_t vs2, size_t vl) {
+ return __riscv_vcpop(mask, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u32m1_m(
+// CHECK-LABEL: @test_vcpop_v_u32m1_m(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vcpopv.mask.nxv2i32.i64(<vscale x 2 x i32> poison, <vscale x 2 x i32> [[VS2:%.*]], <vscale x 2 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 3)
// CHECK-NEXT: ret <vscale x 2 x i32> [[TMP0]]
//
-vuint32m1_t test_vcpopv_v_u32m1_m(vbool32_t mask, vuint32m1_t vs2, size_t vl) {
- return __riscv_vcpopv(mask, vs2, vl);
+vuint32m1_t test_vcpop_v_u32m1_m(vbool32_t mask, vuint32m1_t vs2, size_t vl) {
+ return __riscv_vcpop(mask, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u32m2_m(
+// CHECK-LABEL: @test_vcpop_v_u32m2_m(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vcpopv.mask.nxv4i32.i64(<vscale x 4 x i32> poison, <vscale x 4 x i32> [[VS2:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 3)
// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]]
//
-vuint32m2_t test_vcpopv_v_u32m2_m(vbool16_t mask, vuint32m2_t vs2, size_t vl) {
- return __riscv_vcpopv(mask, vs2, vl);
+vuint32m2_t test_vcpop_v_u32m2_m(vbool16_t mask, vuint32m2_t vs2, size_t vl) {
+ return __riscv_vcpop(mask, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u32m4_m(
+// CHECK-LABEL: @test_vcpop_v_u32m4_m(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vcpopv.mask.nxv8i32.i64(<vscale x 8 x i32> poison, <vscale x 8 x i32> [[VS2:%.*]], <vscale x 8 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 3)
// CHECK-NEXT: ret <vscale x 8 x i32> [[TMP0]]
//
-vuint32m4_t test_vcpopv_v_u32m4_m(vbool8_t mask, vuint32m4_t vs2, size_t vl) {
- return __riscv_vcpopv(mask, vs2, vl);
+vuint32m4_t test_vcpop_v_u32m4_m(vbool8_t mask, vuint32m4_t vs2, size_t vl) {
+ return __riscv_vcpop(mask, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u32m8_m(
+// CHECK-LABEL: @test_vcpop_v_u32m8_m(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vcpopv.mask.nxv16i32.i64(<vscale x 16 x i32> poison, <vscale x 16 x i32> [[VS2:%.*]], <vscale x 16 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 3)
// CHECK-NEXT: ret <vscale x 16 x i32> [[TMP0]]
//
-vuint32m8_t test_vcpopv_v_u32m8_m(vbool4_t mask, vuint32m8_t vs2, size_t vl) {
- return __riscv_vcpopv(mask, vs2, vl);
+vuint32m8_t test_vcpop_v_u32m8_m(vbool4_t mask, vuint32m8_t vs2, size_t vl) {
+ return __riscv_vcpop(mask, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u64m1_m(
+// CHECK-LABEL: @test_vcpop_v_u64m1_m(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vcpopv.mask.nxv1i64.i64(<vscale x 1 x i64> poison, <vscale x 1 x i64> [[VS2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 3)
// CHECK-NEXT: ret <vscale x 1 x i64> [[TMP0]]
//
-vuint64m1_t test_vcpopv_v_u64m1_m(vbool64_t mask, vuint64m1_t vs2, size_t vl) {
- return __riscv_vcpopv(mask, vs2, vl);
+vuint64m1_t test_vcpop_v_u64m1_m(vbool64_t mask, vuint64m1_t vs2, size_t vl) {
+ return __riscv_vcpop(mask, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u64m2_m(
+// CHECK-LABEL: @test_vcpop_v_u64m2_m(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vcpopv.mask.nxv2i64.i64(<vscale x 2 x i64> poison, <vscale x 2 x i64> [[VS2:%.*]], <vscale x 2 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 3)
// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP0]]
//
-vuint64m2_t test_vcpopv_v_u64m2_m(vbool32_t mask, vuint64m2_t vs2, size_t vl) {
- return __riscv_vcpopv(mask, vs2, vl);
+vuint64m2_t test_vcpop_v_u64m2_m(vbool32_t mask, vuint64m2_t vs2, size_t vl) {
+ return __riscv_vcpop(mask, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u64m4_m(
+// CHECK-LABEL: @test_vcpop_v_u64m4_m(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vcpopv.mask.nxv4i64.i64(<vscale x 4 x i64> poison, <vscale x 4 x i64> [[VS2:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 3)
// CHECK-NEXT: ret <vscale x 4 x i64> [[TMP0]]
//
-vuint64m4_t test_vcpopv_v_u64m4_m(vbool16_t mask, vuint64m4_t vs2, size_t vl) {
- return __riscv_vcpopv(mask, vs2, vl);
+vuint64m4_t test_vcpop_v_u64m4_m(vbool16_t mask, vuint64m4_t vs2, size_t vl) {
+ return __riscv_vcpop(mask, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u64m8_m(
+// CHECK-LABEL: @test_vcpop_v_u64m8_m(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vcpopv.mask.nxv8i64.i64(<vscale x 8 x i64> poison, <vscale x 8 x i64> [[VS2:%.*]], <vscale x 8 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 3)
// CHECK-NEXT: ret <vscale x 8 x i64> [[TMP0]]
//
-vuint64m8_t test_vcpopv_v_u64m8_m(vbool8_t mask, vuint64m8_t vs2, size_t vl) {
- return __riscv_vcpopv(mask, vs2, vl);
+vuint64m8_t test_vcpop_v_u64m8_m(vbool8_t mask, vuint64m8_t vs2, size_t vl) {
+ return __riscv_vcpop(mask, vs2, vl);
}
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/policy/non-overloaded/vcpopv.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/policy/non-overloaded/vcpopv.c
index 8a1f2e1beec1..3a110339b5f1 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/policy/non-overloaded/vcpopv.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/policy/non-overloaded/vcpopv.c
@@ -16,795 +16,795 @@
#include <riscv_vector.h>
-// CHECK-LABEL: @test_vcpopv_v_u8mf8_tu(
+// CHECK-LABEL: @test_vcpop_v_u8mf8_tu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 1 x i8> @llvm.riscv.vcpopv.nxv1i8.i64(<vscale x 1 x i8> [[MASKEDOFF:%.*]], <vscale x 1 x i8> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 1 x i8> [[TMP0]]
//
-vuint8mf8_t test_vcpopv_v_u8mf8_tu(vuint8mf8_t maskedoff, vuint8mf8_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u8mf8_tu(maskedoff, vs2, vl);
+vuint8mf8_t test_vcpop_v_u8mf8_tu(vuint8mf8_t maskedoff, vuint8mf8_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u8mf8_tu(maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u8mf4_tu(
+// CHECK-LABEL: @test_vcpop_v_u8mf4_tu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i8> @llvm.riscv.vcpopv.nxv2i8.i64(<vscale x 2 x i8> [[MASKEDOFF:%.*]], <vscale x 2 x i8> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 2 x i8> [[TMP0]]
//
-vuint8mf4_t test_vcpopv_v_u8mf4_tu(vuint8mf4_t maskedoff, vuint8mf4_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u8mf4_tu(maskedoff, vs2, vl);
+vuint8mf4_t test_vcpop_v_u8mf4_tu(vuint8mf4_t maskedoff, vuint8mf4_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u8mf4_tu(maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u8mf2_tu(
+// CHECK-LABEL: @test_vcpop_v_u8mf2_tu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i8> @llvm.riscv.vcpopv.nxv4i8.i64(<vscale x 4 x i8> [[MASKEDOFF:%.*]], <vscale x 4 x i8> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 4 x i8> [[TMP0]]
//
-vuint8mf2_t test_vcpopv_v_u8mf2_tu(vuint8mf2_t maskedoff, vuint8mf2_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u8mf2_tu(maskedoff, vs2, vl);
+vuint8mf2_t test_vcpop_v_u8mf2_tu(vuint8mf2_t maskedoff, vuint8mf2_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u8mf2_tu(maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u8m1_tu(
+// CHECK-LABEL: @test_vcpop_v_u8m1_tu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 8 x i8> @llvm.riscv.vcpopv.nxv8i8.i64(<vscale x 8 x i8> [[MASKEDOFF:%.*]], <vscale x 8 x i8> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 8 x i8> [[TMP0]]
//
-vuint8m1_t test_vcpopv_v_u8m1_tu(vuint8m1_t maskedoff, vuint8m1_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u8m1_tu(maskedoff, vs2, vl);
+vuint8m1_t test_vcpop_v_u8m1_tu(vuint8m1_t maskedoff, vuint8m1_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u8m1_tu(maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u8m2_tu(
+// CHECK-LABEL: @test_vcpop_v_u8m2_tu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 16 x i8> @llvm.riscv.vcpopv.nxv16i8.i64(<vscale x 16 x i8> [[MASKEDOFF:%.*]], <vscale x 16 x i8> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
-vuint8m2_t test_vcpopv_v_u8m2_tu(vuint8m2_t maskedoff, vuint8m2_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u8m2_tu(maskedoff, vs2, vl);
+vuint8m2_t test_vcpop_v_u8m2_tu(vuint8m2_t maskedoff, vuint8m2_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u8m2_tu(maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u8m4_tu(
+// CHECK-LABEL: @test_vcpop_v_u8m4_tu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 32 x i8> @llvm.riscv.vcpopv.nxv32i8.i64(<vscale x 32 x i8> [[MASKEDOFF:%.*]], <vscale x 32 x i8> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 32 x i8> [[TMP0]]
//
-vuint8m4_t test_vcpopv_v_u8m4_tu(vuint8m4_t maskedoff, vuint8m4_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u8m4_tu(maskedoff, vs2, vl);
+vuint8m4_t test_vcpop_v_u8m4_tu(vuint8m4_t maskedoff, vuint8m4_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u8m4_tu(maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u8m8_tu(
+// CHECK-LABEL: @test_vcpop_v_u8m8_tu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 64 x i8> @llvm.riscv.vcpopv.nxv64i8.i64(<vscale x 64 x i8> [[MASKEDOFF:%.*]], <vscale x 64 x i8> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 64 x i8> [[TMP0]]
//
-vuint8m8_t test_vcpopv_v_u8m8_tu(vuint8m8_t maskedoff, vuint8m8_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u8m8_tu(maskedoff, vs2, vl);
+vuint8m8_t test_vcpop_v_u8m8_tu(vuint8m8_t maskedoff, vuint8m8_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u8m8_tu(maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u16mf4_tu(
+// CHECK-LABEL: @test_vcpop_v_u16mf4_tu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vcpopv.nxv1i16.i64(<vscale x 1 x i16> [[MASKEDOFF:%.*]], <vscale x 1 x i16> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 1 x i16> [[TMP0]]
//
-vuint16mf4_t test_vcpopv_v_u16mf4_tu(vuint16mf4_t maskedoff, vuint16mf4_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u16mf4_tu(maskedoff, vs2, vl);
+vuint16mf4_t test_vcpop_v_u16mf4_tu(vuint16mf4_t maskedoff, vuint16mf4_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u16mf4_tu(maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u16mf2_tu(
+// CHECK-LABEL: @test_vcpop_v_u16mf2_tu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vcpopv.nxv2i16.i64(<vscale x 2 x i16> [[MASKEDOFF:%.*]], <vscale x 2 x i16> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 2 x i16> [[TMP0]]
//
-vuint16mf2_t test_vcpopv_v_u16mf2_tu(vuint16mf2_t maskedoff, vuint16mf2_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u16mf2_tu(maskedoff, vs2, vl);
+vuint16mf2_t test_vcpop_v_u16mf2_tu(vuint16mf2_t maskedoff, vuint16mf2_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u16mf2_tu(maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u16m1_tu(
+// CHECK-LABEL: @test_vcpop_v_u16m1_tu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vcpopv.nxv4i16.i64(<vscale x 4 x i16> [[MASKEDOFF:%.*]], <vscale x 4 x i16> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 4 x i16> [[TMP0]]
//
-vuint16m1_t test_vcpopv_v_u16m1_tu(vuint16m1_t maskedoff, vuint16m1_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u16m1_tu(maskedoff, vs2, vl);
+vuint16m1_t test_vcpop_v_u16m1_tu(vuint16m1_t maskedoff, vuint16m1_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u16m1_tu(maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u16m2_tu(
+// CHECK-LABEL: @test_vcpop_v_u16m2_tu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vcpopv.nxv8i16.i64(<vscale x 8 x i16> [[MASKEDOFF:%.*]], <vscale x 8 x i16> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]]
//
-vuint16m2_t test_vcpopv_v_u16m2_tu(vuint16m2_t maskedoff, vuint16m2_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u16m2_tu(maskedoff, vs2, vl);
+vuint16m2_t test_vcpop_v_u16m2_tu(vuint16m2_t maskedoff, vuint16m2_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u16m2_tu(maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u16m4_tu(
+// CHECK-LABEL: @test_vcpop_v_u16m4_tu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 16 x i16> @llvm.riscv.vcpopv.nxv16i16.i64(<vscale x 16 x i16> [[MASKEDOFF:%.*]], <vscale x 16 x i16> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 16 x i16> [[TMP0]]
//
-vuint16m4_t test_vcpopv_v_u16m4_tu(vuint16m4_t maskedoff, vuint16m4_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u16m4_tu(maskedoff, vs2, vl);
+vuint16m4_t test_vcpop_v_u16m4_tu(vuint16m4_t maskedoff, vuint16m4_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u16m4_tu(maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u16m8_tu(
+// CHECK-LABEL: @test_vcpop_v_u16m8_tu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 32 x i16> @llvm.riscv.vcpopv.nxv32i16.i64(<vscale x 32 x i16> [[MASKEDOFF:%.*]], <vscale x 32 x i16> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 32 x i16> [[TMP0]]
//
-vuint16m8_t test_vcpopv_v_u16m8_tu(vuint16m8_t maskedoff, vuint16m8_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u16m8_tu(maskedoff, vs2, vl);
+vuint16m8_t test_vcpop_v_u16m8_tu(vuint16m8_t maskedoff, vuint16m8_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u16m8_tu(maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u32mf2_tu(
+// CHECK-LABEL: @test_vcpop_v_u32mf2_tu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vcpopv.nxv1i32.i64(<vscale x 1 x i32> [[MASKEDOFF:%.*]], <vscale x 1 x i32> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 1 x i32> [[TMP0]]
//
-vuint32mf2_t test_vcpopv_v_u32mf2_tu(vuint32mf2_t maskedoff, vuint32mf2_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u32mf2_tu(maskedoff, vs2, vl);
+vuint32mf2_t test_vcpop_v_u32mf2_tu(vuint32mf2_t maskedoff, vuint32mf2_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u32mf2_tu(maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u32m1_tu(
+// CHECK-LABEL: @test_vcpop_v_u32m1_tu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vcpopv.nxv2i32.i64(<vscale x 2 x i32> [[MASKEDOFF:%.*]], <vscale x 2 x i32> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 2 x i32> [[TMP0]]
//
-vuint32m1_t test_vcpopv_v_u32m1_tu(vuint32m1_t maskedoff, vuint32m1_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u32m1_tu(maskedoff, vs2, vl);
+vuint32m1_t test_vcpop_v_u32m1_tu(vuint32m1_t maskedoff, vuint32m1_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u32m1_tu(maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u32m2_tu(
+// CHECK-LABEL: @test_vcpop_v_u32m2_tu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vcpopv.nxv4i32.i64(<vscale x 4 x i32> [[MASKEDOFF:%.*]], <vscale x 4 x i32> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]]
//
-vuint32m2_t test_vcpopv_v_u32m2_tu(vuint32m2_t maskedoff, vuint32m2_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u32m2_tu(maskedoff, vs2, vl);
+vuint32m2_t test_vcpop_v_u32m2_tu(vuint32m2_t maskedoff, vuint32m2_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u32m2_tu(maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u32m4_tu(
+// CHECK-LABEL: @test_vcpop_v_u32m4_tu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vcpopv.nxv8i32.i64(<vscale x 8 x i32> [[MASKEDOFF:%.*]], <vscale x 8 x i32> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 8 x i32> [[TMP0]]
//
-vuint32m4_t test_vcpopv_v_u32m4_tu(vuint32m4_t maskedoff, vuint32m4_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u32m4_tu(maskedoff, vs2, vl);
+vuint32m4_t test_vcpop_v_u32m4_tu(vuint32m4_t maskedoff, vuint32m4_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u32m4_tu(maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u32m8_tu(
+// CHECK-LABEL: @test_vcpop_v_u32m8_tu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vcpopv.nxv16i32.i64(<vscale x 16 x i32> [[MASKEDOFF:%.*]], <vscale x 16 x i32> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 16 x i32> [[TMP0]]
//
-vuint32m8_t test_vcpopv_v_u32m8_tu(vuint32m8_t maskedoff, vuint32m8_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u32m8_tu(maskedoff, vs2, vl);
+vuint32m8_t test_vcpop_v_u32m8_tu(vuint32m8_t maskedoff, vuint32m8_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u32m8_tu(maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u64m1_tu(
+// CHECK-LABEL: @test_vcpop_v_u64m1_tu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vcpopv.nxv1i64.i64(<vscale x 1 x i64> [[MASKEDOFF:%.*]], <vscale x 1 x i64> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 1 x i64> [[TMP0]]
//
-vuint64m1_t test_vcpopv_v_u64m1_tu(vuint64m1_t maskedoff, vuint64m1_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u64m1_tu(maskedoff, vs2, vl);
+vuint64m1_t test_vcpop_v_u64m1_tu(vuint64m1_t maskedoff, vuint64m1_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u64m1_tu(maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u64m2_tu(
+// CHECK-LABEL: @test_vcpop_v_u64m2_tu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vcpopv.nxv2i64.i64(<vscale x 2 x i64> [[MASKEDOFF:%.*]], <vscale x 2 x i64> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP0]]
//
-vuint64m2_t test_vcpopv_v_u64m2_tu(vuint64m2_t maskedoff, vuint64m2_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u64m2_tu(maskedoff, vs2, vl);
+vuint64m2_t test_vcpop_v_u64m2_tu(vuint64m2_t maskedoff, vuint64m2_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u64m2_tu(maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u64m4_tu(
+// CHECK-LABEL: @test_vcpop_v_u64m4_tu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vcpopv.nxv4i64.i64(<vscale x 4 x i64> [[MASKEDOFF:%.*]], <vscale x 4 x i64> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 4 x i64> [[TMP0]]
//
-vuint64m4_t test_vcpopv_v_u64m4_tu(vuint64m4_t maskedoff, vuint64m4_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u64m4_tu(maskedoff, vs2, vl);
+vuint64m4_t test_vcpop_v_u64m4_tu(vuint64m4_t maskedoff, vuint64m4_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u64m4_tu(maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u64m8_tu(
+// CHECK-LABEL: @test_vcpop_v_u64m8_tu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vcpopv.nxv8i64.i64(<vscale x 8 x i64> [[MASKEDOFF:%.*]], <vscale x 8 x i64> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 8 x i64> [[TMP0]]
//
-vuint64m8_t test_vcpopv_v_u64m8_tu(vuint64m8_t maskedoff, vuint64m8_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u64m8_tu(maskedoff, vs2, vl);
+vuint64m8_t test_vcpop_v_u64m8_tu(vuint64m8_t maskedoff, vuint64m8_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u64m8_tu(maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u8mf8_tum(
+// CHECK-LABEL: @test_vcpop_v_u8mf8_tum(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 1 x i8> @llvm.riscv.vcpopv.mask.nxv1i8.i64(<vscale x 1 x i8> [[MASKEDOFF:%.*]], <vscale x 1 x i8> [[VS2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 2)
// CHECK-NEXT: ret <vscale x 1 x i8> [[TMP0]]
//
-vuint8mf8_t test_vcpopv_v_u8mf8_tum(vbool64_t mask, vuint8mf8_t maskedoff, vuint8mf8_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u8mf8_tum(mask, maskedoff, vs2, vl);
+vuint8mf8_t test_vcpop_v_u8mf8_tum(vbool64_t mask, vuint8mf8_t maskedoff, vuint8mf8_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u8mf8_tum(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u8mf4_tum(
+// CHECK-LABEL: @test_vcpop_v_u8mf4_tum(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i8> @llvm.riscv.vcpopv.mask.nxv2i8.i64(<vscale x 2 x i8> [[MASKEDOFF:%.*]], <vscale x 2 x i8> [[VS2:%.*]], <vscale x 2 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 2)
// CHECK-NEXT: ret <vscale x 2 x i8> [[TMP0]]
//
-vuint8mf4_t test_vcpopv_v_u8mf4_tum(vbool32_t mask, vuint8mf4_t maskedoff, vuint8mf4_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u8mf4_tum(mask, maskedoff, vs2, vl);
+vuint8mf4_t test_vcpop_v_u8mf4_tum(vbool32_t mask, vuint8mf4_t maskedoff, vuint8mf4_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u8mf4_tum(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u8mf2_tum(
+// CHECK-LABEL: @test_vcpop_v_u8mf2_tum(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i8> @llvm.riscv.vcpopv.mask.nxv4i8.i64(<vscale x 4 x i8> [[MASKEDOFF:%.*]], <vscale x 4 x i8> [[VS2:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 2)
// CHECK-NEXT: ret <vscale x 4 x i8> [[TMP0]]
//
-vuint8mf2_t test_vcpopv_v_u8mf2_tum(vbool16_t mask, vuint8mf2_t maskedoff, vuint8mf2_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u8mf2_tum(mask, maskedoff, vs2, vl);
+vuint8mf2_t test_vcpop_v_u8mf2_tum(vbool16_t mask, vuint8mf2_t maskedoff, vuint8mf2_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u8mf2_tum(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u8m1_tum(
+// CHECK-LABEL: @test_vcpop_v_u8m1_tum(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 8 x i8> @llvm.riscv.vcpopv.mask.nxv8i8.i64(<vscale x 8 x i8> [[MASKEDOFF:%.*]], <vscale x 8 x i8> [[VS2:%.*]], <vscale x 8 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 2)
// CHECK-NEXT: ret <vscale x 8 x i8> [[TMP0]]
//
-vuint8m1_t test_vcpopv_v_u8m1_tum(vbool8_t mask, vuint8m1_t maskedoff, vuint8m1_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u8m1_tum(mask, maskedoff, vs2, vl);
+vuint8m1_t test_vcpop_v_u8m1_tum(vbool8_t mask, vuint8m1_t maskedoff, vuint8m1_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u8m1_tum(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u8m2_tum(
+// CHECK-LABEL: @test_vcpop_v_u8m2_tum(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 16 x i8> @llvm.riscv.vcpopv.mask.nxv16i8.i64(<vscale x 16 x i8> [[MASKEDOFF:%.*]], <vscale x 16 x i8> [[VS2:%.*]], <vscale x 16 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 2)
// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
-vuint8m2_t test_vcpopv_v_u8m2_tum(vbool4_t mask, vuint8m2_t maskedoff, vuint8m2_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u8m2_tum(mask, maskedoff, vs2, vl);
+vuint8m2_t test_vcpop_v_u8m2_tum(vbool4_t mask, vuint8m2_t maskedoff, vuint8m2_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u8m2_tum(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u8m4_tum(
+// CHECK-LABEL: @test_vcpop_v_u8m4_tum(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 32 x i8> @llvm.riscv.vcpopv.mask.nxv32i8.i64(<vscale x 32 x i8> [[MASKEDOFF:%.*]], <vscale x 32 x i8> [[VS2:%.*]], <vscale x 32 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 2)
// CHECK-NEXT: ret <vscale x 32 x i8> [[TMP0]]
//
-vuint8m4_t test_vcpopv_v_u8m4_tum(vbool2_t mask, vuint8m4_t maskedoff, vuint8m4_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u8m4_tum(mask, maskedoff, vs2, vl);
+vuint8m4_t test_vcpop_v_u8m4_tum(vbool2_t mask, vuint8m4_t maskedoff, vuint8m4_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u8m4_tum(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u8m8_tum(
+// CHECK-LABEL: @test_vcpop_v_u8m8_tum(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 64 x i8> @llvm.riscv.vcpopv.mask.nxv64i8.i64(<vscale x 64 x i8> [[MASKEDOFF:%.*]], <vscale x 64 x i8> [[VS2:%.*]], <vscale x 64 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 2)
// CHECK-NEXT: ret <vscale x 64 x i8> [[TMP0]]
//
-vuint8m8_t test_vcpopv_v_u8m8_tum(vbool1_t mask, vuint8m8_t maskedoff, vuint8m8_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u8m8_tum(mask, maskedoff, vs2, vl);
+vuint8m8_t test_vcpop_v_u8m8_tum(vbool1_t mask, vuint8m8_t maskedoff, vuint8m8_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u8m8_tum(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u16mf4_tum(
+// CHECK-LABEL: @test_vcpop_v_u16mf4_tum(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vcpopv.mask.nxv1i16.i64(<vscale x 1 x i16> [[MASKEDOFF:%.*]], <vscale x 1 x i16> [[VS2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 2)
// CHECK-NEXT: ret <vscale x 1 x i16> [[TMP0]]
//
-vuint16mf4_t test_vcpopv_v_u16mf4_tum(vbool64_t mask, vuint16mf4_t maskedoff, vuint16mf4_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u16mf4_tum(mask, maskedoff, vs2, vl);
+vuint16mf4_t test_vcpop_v_u16mf4_tum(vbool64_t mask, vuint16mf4_t maskedoff, vuint16mf4_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u16mf4_tum(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u16mf2_tum(
+// CHECK-LABEL: @test_vcpop_v_u16mf2_tum(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vcpopv.mask.nxv2i16.i64(<vscale x 2 x i16> [[MASKEDOFF:%.*]], <vscale x 2 x i16> [[VS2:%.*]], <vscale x 2 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 2)
// CHECK-NEXT: ret <vscale x 2 x i16> [[TMP0]]
//
-vuint16mf2_t test_vcpopv_v_u16mf2_tum(vbool32_t mask, vuint16mf2_t maskedoff, vuint16mf2_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u16mf2_tum(mask, maskedoff, vs2, vl);
+vuint16mf2_t test_vcpop_v_u16mf2_tum(vbool32_t mask, vuint16mf2_t maskedoff, vuint16mf2_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u16mf2_tum(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u16m1_tum(
+// CHECK-LABEL: @test_vcpop_v_u16m1_tum(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vcpopv.mask.nxv4i16.i64(<vscale x 4 x i16> [[MASKEDOFF:%.*]], <vscale x 4 x i16> [[VS2:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 2)
// CHECK-NEXT: ret <vscale x 4 x i16> [[TMP0]]
//
-vuint16m1_t test_vcpopv_v_u16m1_tum(vbool16_t mask, vuint16m1_t maskedoff, vuint16m1_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u16m1_tum(mask, maskedoff, vs2, vl);
+vuint16m1_t test_vcpop_v_u16m1_tum(vbool16_t mask, vuint16m1_t maskedoff, vuint16m1_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u16m1_tum(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u16m2_tum(
+// CHECK-LABEL: @test_vcpop_v_u16m2_tum(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vcpopv.mask.nxv8i16.i64(<vscale x 8 x i16> [[MASKEDOFF:%.*]], <vscale x 8 x i16> [[VS2:%.*]], <vscale x 8 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 2)
// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]]
//
-vuint16m2_t test_vcpopv_v_u16m2_tum(vbool8_t mask, vuint16m2_t maskedoff, vuint16m2_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u16m2_tum(mask, maskedoff, vs2, vl);
+vuint16m2_t test_vcpop_v_u16m2_tum(vbool8_t mask, vuint16m2_t maskedoff, vuint16m2_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u16m2_tum(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u16m4_tum(
+// CHECK-LABEL: @test_vcpop_v_u16m4_tum(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 16 x i16> @llvm.riscv.vcpopv.mask.nxv16i16.i64(<vscale x 16 x i16> [[MASKEDOFF:%.*]], <vscale x 16 x i16> [[VS2:%.*]], <vscale x 16 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 2)
// CHECK-NEXT: ret <vscale x 16 x i16> [[TMP0]]
//
-vuint16m4_t test_vcpopv_v_u16m4_tum(vbool4_t mask, vuint16m4_t maskedoff, vuint16m4_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u16m4_tum(mask, maskedoff, vs2, vl);
+vuint16m4_t test_vcpop_v_u16m4_tum(vbool4_t mask, vuint16m4_t maskedoff, vuint16m4_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u16m4_tum(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u16m8_tum(
+// CHECK-LABEL: @test_vcpop_v_u16m8_tum(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 32 x i16> @llvm.riscv.vcpopv.mask.nxv32i16.i64(<vscale x 32 x i16> [[MASKEDOFF:%.*]], <vscale x 32 x i16> [[VS2:%.*]], <vscale x 32 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 2)
// CHECK-NEXT: ret <vscale x 32 x i16> [[TMP0]]
//
-vuint16m8_t test_vcpopv_v_u16m8_tum(vbool2_t mask, vuint16m8_t maskedoff, vuint16m8_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u16m8_tum(mask, maskedoff, vs2, vl);
+vuint16m8_t test_vcpop_v_u16m8_tum(vbool2_t mask, vuint16m8_t maskedoff, vuint16m8_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u16m8_tum(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u32mf2_tum(
+// CHECK-LABEL: @test_vcpop_v_u32mf2_tum(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vcpopv.mask.nxv1i32.i64(<vscale x 1 x i32> [[MASKEDOFF:%.*]], <vscale x 1 x i32> [[VS2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 2)
// CHECK-NEXT: ret <vscale x 1 x i32> [[TMP0]]
//
-vuint32mf2_t test_vcpopv_v_u32mf2_tum(vbool64_t mask, vuint32mf2_t maskedoff, vuint32mf2_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u32mf2_tum(mask, maskedoff, vs2, vl);
+vuint32mf2_t test_vcpop_v_u32mf2_tum(vbool64_t mask, vuint32mf2_t maskedoff, vuint32mf2_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u32mf2_tum(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u32m1_tum(
+// CHECK-LABEL: @test_vcpop_v_u32m1_tum(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vcpopv.mask.nxv2i32.i64(<vscale x 2 x i32> [[MASKEDOFF:%.*]], <vscale x 2 x i32> [[VS2:%.*]], <vscale x 2 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 2)
// CHECK-NEXT: ret <vscale x 2 x i32> [[TMP0]]
//
-vuint32m1_t test_vcpopv_v_u32m1_tum(vbool32_t mask, vuint32m1_t maskedoff, vuint32m1_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u32m1_tum(mask, maskedoff, vs2, vl);
+vuint32m1_t test_vcpop_v_u32m1_tum(vbool32_t mask, vuint32m1_t maskedoff, vuint32m1_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u32m1_tum(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u32m2_tum(
+// CHECK-LABEL: @test_vcpop_v_u32m2_tum(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vcpopv.mask.nxv4i32.i64(<vscale x 4 x i32> [[MASKEDOFF:%.*]], <vscale x 4 x i32> [[VS2:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 2)
// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]]
//
-vuint32m2_t test_vcpopv_v_u32m2_tum(vbool16_t mask, vuint32m2_t maskedoff, vuint32m2_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u32m2_tum(mask, maskedoff, vs2, vl);
+vuint32m2_t test_vcpop_v_u32m2_tum(vbool16_t mask, vuint32m2_t maskedoff, vuint32m2_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u32m2_tum(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u32m4_tum(
+// CHECK-LABEL: @test_vcpop_v_u32m4_tum(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vcpopv.mask.nxv8i32.i64(<vscale x 8 x i32> [[MASKEDOFF:%.*]], <vscale x 8 x i32> [[VS2:%.*]], <vscale x 8 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 2)
// CHECK-NEXT: ret <vscale x 8 x i32> [[TMP0]]
//
-vuint32m4_t test_vcpopv_v_u32m4_tum(vbool8_t mask, vuint32m4_t maskedoff, vuint32m4_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u32m4_tum(mask, maskedoff, vs2, vl);
+vuint32m4_t test_vcpop_v_u32m4_tum(vbool8_t mask, vuint32m4_t maskedoff, vuint32m4_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u32m4_tum(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u32m8_tum(
+// CHECK-LABEL: @test_vcpop_v_u32m8_tum(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vcpopv.mask.nxv16i32.i64(<vscale x 16 x i32> [[MASKEDOFF:%.*]], <vscale x 16 x i32> [[VS2:%.*]], <vscale x 16 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 2)
// CHECK-NEXT: ret <vscale x 16 x i32> [[TMP0]]
//
-vuint32m8_t test_vcpopv_v_u32m8_tum(vbool4_t mask, vuint32m8_t maskedoff, vuint32m8_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u32m8_tum(mask, maskedoff, vs2, vl);
+vuint32m8_t test_vcpop_v_u32m8_tum(vbool4_t mask, vuint32m8_t maskedoff, vuint32m8_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u32m8_tum(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u64m1_tum(
+// CHECK-LABEL: @test_vcpop_v_u64m1_tum(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vcpopv.mask.nxv1i64.i64(<vscale x 1 x i64> [[MASKEDOFF:%.*]], <vscale x 1 x i64> [[VS2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 2)
// CHECK-NEXT: ret <vscale x 1 x i64> [[TMP0]]
//
-vuint64m1_t test_vcpopv_v_u64m1_tum(vbool64_t mask, vuint64m1_t maskedoff, vuint64m1_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u64m1_tum(mask, maskedoff, vs2, vl);
+vuint64m1_t test_vcpop_v_u64m1_tum(vbool64_t mask, vuint64m1_t maskedoff, vuint64m1_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u64m1_tum(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u64m2_tum(
+// CHECK-LABEL: @test_vcpop_v_u64m2_tum(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vcpopv.mask.nxv2i64.i64(<vscale x 2 x i64> [[MASKEDOFF:%.*]], <vscale x 2 x i64> [[VS2:%.*]], <vscale x 2 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 2)
// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP0]]
//
-vuint64m2_t test_vcpopv_v_u64m2_tum(vbool32_t mask, vuint64m2_t maskedoff, vuint64m2_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u64m2_tum(mask, maskedoff, vs2, vl);
+vuint64m2_t test_vcpop_v_u64m2_tum(vbool32_t mask, vuint64m2_t maskedoff, vuint64m2_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u64m2_tum(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u64m4_tum(
+// CHECK-LABEL: @test_vcpop_v_u64m4_tum(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vcpopv.mask.nxv4i64.i64(<vscale x 4 x i64> [[MASKEDOFF:%.*]], <vscale x 4 x i64> [[VS2:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 2)
// CHECK-NEXT: ret <vscale x 4 x i64> [[TMP0]]
//
-vuint64m4_t test_vcpopv_v_u64m4_tum(vbool16_t mask, vuint64m4_t maskedoff, vuint64m4_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u64m4_tum(mask, maskedoff, vs2, vl);
+vuint64m4_t test_vcpop_v_u64m4_tum(vbool16_t mask, vuint64m4_t maskedoff, vuint64m4_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u64m4_tum(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u64m8_tum(
+// CHECK-LABEL: @test_vcpop_v_u64m8_tum(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vcpopv.mask.nxv8i64.i64(<vscale x 8 x i64> [[MASKEDOFF:%.*]], <vscale x 8 x i64> [[VS2:%.*]], <vscale x 8 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 2)
// CHECK-NEXT: ret <vscale x 8 x i64> [[TMP0]]
//
-vuint64m8_t test_vcpopv_v_u64m8_tum(vbool8_t mask, vuint64m8_t maskedoff, vuint64m8_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u64m8_tum(mask, maskedoff, vs2, vl);
+vuint64m8_t test_vcpop_v_u64m8_tum(vbool8_t mask, vuint64m8_t maskedoff, vuint64m8_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u64m8_tum(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u8mf8_tumu(
+// CHECK-LABEL: @test_vcpop_v_u8mf8_tumu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 1 x i8> @llvm.riscv.vcpopv.mask.nxv1i8.i64(<vscale x 1 x i8> [[MASKEDOFF:%.*]], <vscale x 1 x i8> [[VS2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
// CHECK-NEXT: ret <vscale x 1 x i8> [[TMP0]]
//
-vuint8mf8_t test_vcpopv_v_u8mf8_tumu(vbool64_t mask, vuint8mf8_t maskedoff, vuint8mf8_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u8mf8_tumu(mask, maskedoff, vs2, vl);
+vuint8mf8_t test_vcpop_v_u8mf8_tumu(vbool64_t mask, vuint8mf8_t maskedoff, vuint8mf8_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u8mf8_tumu(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u8mf4_tumu(
+// CHECK-LABEL: @test_vcpop_v_u8mf4_tumu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i8> @llvm.riscv.vcpopv.mask.nxv2i8.i64(<vscale x 2 x i8> [[MASKEDOFF:%.*]], <vscale x 2 x i8> [[VS2:%.*]], <vscale x 2 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
// CHECK-NEXT: ret <vscale x 2 x i8> [[TMP0]]
//
-vuint8mf4_t test_vcpopv_v_u8mf4_tumu(vbool32_t mask, vuint8mf4_t maskedoff, vuint8mf4_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u8mf4_tumu(mask, maskedoff, vs2, vl);
+vuint8mf4_t test_vcpop_v_u8mf4_tumu(vbool32_t mask, vuint8mf4_t maskedoff, vuint8mf4_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u8mf4_tumu(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u8mf2_tumu(
+// CHECK-LABEL: @test_vcpop_v_u8mf2_tumu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i8> @llvm.riscv.vcpopv.mask.nxv4i8.i64(<vscale x 4 x i8> [[MASKEDOFF:%.*]], <vscale x 4 x i8> [[VS2:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
// CHECK-NEXT: ret <vscale x 4 x i8> [[TMP0]]
//
-vuint8mf2_t test_vcpopv_v_u8mf2_tumu(vbool16_t mask, vuint8mf2_t maskedoff, vuint8mf2_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u8mf2_tumu(mask, maskedoff, vs2, vl);
+vuint8mf2_t test_vcpop_v_u8mf2_tumu(vbool16_t mask, vuint8mf2_t maskedoff, vuint8mf2_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u8mf2_tumu(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u8m1_tumu(
+// CHECK-LABEL: @test_vcpop_v_u8m1_tumu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 8 x i8> @llvm.riscv.vcpopv.mask.nxv8i8.i64(<vscale x 8 x i8> [[MASKEDOFF:%.*]], <vscale x 8 x i8> [[VS2:%.*]], <vscale x 8 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
// CHECK-NEXT: ret <vscale x 8 x i8> [[TMP0]]
//
-vuint8m1_t test_vcpopv_v_u8m1_tumu(vbool8_t mask, vuint8m1_t maskedoff, vuint8m1_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u8m1_tumu(mask, maskedoff, vs2, vl);
+vuint8m1_t test_vcpop_v_u8m1_tumu(vbool8_t mask, vuint8m1_t maskedoff, vuint8m1_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u8m1_tumu(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u8m2_tumu(
+// CHECK-LABEL: @test_vcpop_v_u8m2_tumu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 16 x i8> @llvm.riscv.vcpopv.mask.nxv16i8.i64(<vscale x 16 x i8> [[MASKEDOFF:%.*]], <vscale x 16 x i8> [[VS2:%.*]], <vscale x 16 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
-vuint8m2_t test_vcpopv_v_u8m2_tumu(vbool4_t mask, vuint8m2_t maskedoff, vuint8m2_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u8m2_tumu(mask, maskedoff, vs2, vl);
+vuint8m2_t test_vcpop_v_u8m2_tumu(vbool4_t mask, vuint8m2_t maskedoff, vuint8m2_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u8m2_tumu(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u8m4_tumu(
+// CHECK-LABEL: @test_vcpop_v_u8m4_tumu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 32 x i8> @llvm.riscv.vcpopv.mask.nxv32i8.i64(<vscale x 32 x i8> [[MASKEDOFF:%.*]], <vscale x 32 x i8> [[VS2:%.*]], <vscale x 32 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
// CHECK-NEXT: ret <vscale x 32 x i8> [[TMP0]]
//
-vuint8m4_t test_vcpopv_v_u8m4_tumu(vbool2_t mask, vuint8m4_t maskedoff, vuint8m4_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u8m4_tumu(mask, maskedoff, vs2, vl);
+vuint8m4_t test_vcpop_v_u8m4_tumu(vbool2_t mask, vuint8m4_t maskedoff, vuint8m4_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u8m4_tumu(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u8m8_tumu(
+// CHECK-LABEL: @test_vcpop_v_u8m8_tumu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 64 x i8> @llvm.riscv.vcpopv.mask.nxv64i8.i64(<vscale x 64 x i8> [[MASKEDOFF:%.*]], <vscale x 64 x i8> [[VS2:%.*]], <vscale x 64 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
// CHECK-NEXT: ret <vscale x 64 x i8> [[TMP0]]
//
-vuint8m8_t test_vcpopv_v_u8m8_tumu(vbool1_t mask, vuint8m8_t maskedoff, vuint8m8_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u8m8_tumu(mask, maskedoff, vs2, vl);
+vuint8m8_t test_vcpop_v_u8m8_tumu(vbool1_t mask, vuint8m8_t maskedoff, vuint8m8_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u8m8_tumu(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u16mf4_tumu(
+// CHECK-LABEL: @test_vcpop_v_u16mf4_tumu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vcpopv.mask.nxv1i16.i64(<vscale x 1 x i16> [[MASKEDOFF:%.*]], <vscale x 1 x i16> [[VS2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
// CHECK-NEXT: ret <vscale x 1 x i16> [[TMP0]]
//
-vuint16mf4_t test_vcpopv_v_u16mf4_tumu(vbool64_t mask, vuint16mf4_t maskedoff, vuint16mf4_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u16mf4_tumu(mask, maskedoff, vs2, vl);
+vuint16mf4_t test_vcpop_v_u16mf4_tumu(vbool64_t mask, vuint16mf4_t maskedoff, vuint16mf4_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u16mf4_tumu(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u16mf2_tumu(
+// CHECK-LABEL: @test_vcpop_v_u16mf2_tumu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vcpopv.mask.nxv2i16.i64(<vscale x 2 x i16> [[MASKEDOFF:%.*]], <vscale x 2 x i16> [[VS2:%.*]], <vscale x 2 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
// CHECK-NEXT: ret <vscale x 2 x i16> [[TMP0]]
//
-vuint16mf2_t test_vcpopv_v_u16mf2_tumu(vbool32_t mask, vuint16mf2_t maskedoff, vuint16mf2_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u16mf2_tumu(mask, maskedoff, vs2, vl);
+vuint16mf2_t test_vcpop_v_u16mf2_tumu(vbool32_t mask, vuint16mf2_t maskedoff, vuint16mf2_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u16mf2_tumu(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u16m1_tumu(
+// CHECK-LABEL: @test_vcpop_v_u16m1_tumu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vcpopv.mask.nxv4i16.i64(<vscale x 4 x i16> [[MASKEDOFF:%.*]], <vscale x 4 x i16> [[VS2:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
// CHECK-NEXT: ret <vscale x 4 x i16> [[TMP0]]
//
-vuint16m1_t test_vcpopv_v_u16m1_tumu(vbool16_t mask, vuint16m1_t maskedoff, vuint16m1_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u16m1_tumu(mask, maskedoff, vs2, vl);
+vuint16m1_t test_vcpop_v_u16m1_tumu(vbool16_t mask, vuint16m1_t maskedoff, vuint16m1_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u16m1_tumu(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u16m2_tumu(
+// CHECK-LABEL: @test_vcpop_v_u16m2_tumu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vcpopv.mask.nxv8i16.i64(<vscale x 8 x i16> [[MASKEDOFF:%.*]], <vscale x 8 x i16> [[VS2:%.*]], <vscale x 8 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]]
//
-vuint16m2_t test_vcpopv_v_u16m2_tumu(vbool8_t mask, vuint16m2_t maskedoff, vuint16m2_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u16m2_tumu(mask, maskedoff, vs2, vl);
+vuint16m2_t test_vcpop_v_u16m2_tumu(vbool8_t mask, vuint16m2_t maskedoff, vuint16m2_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u16m2_tumu(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u16m4_tumu(
+// CHECK-LABEL: @test_vcpop_v_u16m4_tumu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 16 x i16> @llvm.riscv.vcpopv.mask.nxv16i16.i64(<vscale x 16 x i16> [[MASKEDOFF:%.*]], <vscale x 16 x i16> [[VS2:%.*]], <vscale x 16 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
// CHECK-NEXT: ret <vscale x 16 x i16> [[TMP0]]
//
-vuint16m4_t test_vcpopv_v_u16m4_tumu(vbool4_t mask, vuint16m4_t maskedoff, vuint16m4_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u16m4_tumu(mask, maskedoff, vs2, vl);
+vuint16m4_t test_vcpop_v_u16m4_tumu(vbool4_t mask, vuint16m4_t maskedoff, vuint16m4_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u16m4_tumu(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u16m8_tumu(
+// CHECK-LABEL: @test_vcpop_v_u16m8_tumu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 32 x i16> @llvm.riscv.vcpopv.mask.nxv32i16.i64(<vscale x 32 x i16> [[MASKEDOFF:%.*]], <vscale x 32 x i16> [[VS2:%.*]], <vscale x 32 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
// CHECK-NEXT: ret <vscale x 32 x i16> [[TMP0]]
//
-vuint16m8_t test_vcpopv_v_u16m8_tumu(vbool2_t mask, vuint16m8_t maskedoff, vuint16m8_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u16m8_tumu(mask, maskedoff, vs2, vl);
+vuint16m8_t test_vcpop_v_u16m8_tumu(vbool2_t mask, vuint16m8_t maskedoff, vuint16m8_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u16m8_tumu(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u32mf2_tumu(
+// CHECK-LABEL: @test_vcpop_v_u32mf2_tumu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vcpopv.mask.nxv1i32.i64(<vscale x 1 x i32> [[MASKEDOFF:%.*]], <vscale x 1 x i32> [[VS2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
// CHECK-NEXT: ret <vscale x 1 x i32> [[TMP0]]
//
-vuint32mf2_t test_vcpopv_v_u32mf2_tumu(vbool64_t mask, vuint32mf2_t maskedoff, vuint32mf2_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u32mf2_tumu(mask, maskedoff, vs2, vl);
+vuint32mf2_t test_vcpop_v_u32mf2_tumu(vbool64_t mask, vuint32mf2_t maskedoff, vuint32mf2_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u32mf2_tumu(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u32m1_tumu(
+// CHECK-LABEL: @test_vcpop_v_u32m1_tumu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vcpopv.mask.nxv2i32.i64(<vscale x 2 x i32> [[MASKEDOFF:%.*]], <vscale x 2 x i32> [[VS2:%.*]], <vscale x 2 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
// CHECK-NEXT: ret <vscale x 2 x i32> [[TMP0]]
//
-vuint32m1_t test_vcpopv_v_u32m1_tumu(vbool32_t mask, vuint32m1_t maskedoff, vuint32m1_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u32m1_tumu(mask, maskedoff, vs2, vl);
+vuint32m1_t test_vcpop_v_u32m1_tumu(vbool32_t mask, vuint32m1_t maskedoff, vuint32m1_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u32m1_tumu(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u32m2_tumu(
+// CHECK-LABEL: @test_vcpop_v_u32m2_tumu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vcpopv.mask.nxv4i32.i64(<vscale x 4 x i32> [[MASKEDOFF:%.*]], <vscale x 4 x i32> [[VS2:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]]
//
-vuint32m2_t test_vcpopv_v_u32m2_tumu(vbool16_t mask, vuint32m2_t maskedoff, vuint32m2_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u32m2_tumu(mask, maskedoff, vs2, vl);
+vuint32m2_t test_vcpop_v_u32m2_tumu(vbool16_t mask, vuint32m2_t maskedoff, vuint32m2_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u32m2_tumu(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u32m4_tumu(
+// CHECK-LABEL: @test_vcpop_v_u32m4_tumu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vcpopv.mask.nxv8i32.i64(<vscale x 8 x i32> [[MASKEDOFF:%.*]], <vscale x 8 x i32> [[VS2:%.*]], <vscale x 8 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
// CHECK-NEXT: ret <vscale x 8 x i32> [[TMP0]]
//
-vuint32m4_t test_vcpopv_v_u32m4_tumu(vbool8_t mask, vuint32m4_t maskedoff, vuint32m4_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u32m4_tumu(mask, maskedoff, vs2, vl);
+vuint32m4_t test_vcpop_v_u32m4_tumu(vbool8_t mask, vuint32m4_t maskedoff, vuint32m4_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u32m4_tumu(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u32m8_tumu(
+// CHECK-LABEL: @test_vcpop_v_u32m8_tumu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vcpopv.mask.nxv16i32.i64(<vscale x 16 x i32> [[MASKEDOFF:%.*]], <vscale x 16 x i32> [[VS2:%.*]], <vscale x 16 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
// CHECK-NEXT: ret <vscale x 16 x i32> [[TMP0]]
//
-vuint32m8_t test_vcpopv_v_u32m8_tumu(vbool4_t mask, vuint32m8_t maskedoff, vuint32m8_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u32m8_tumu(mask, maskedoff, vs2, vl);
+vuint32m8_t test_vcpop_v_u32m8_tumu(vbool4_t mask, vuint32m8_t maskedoff, vuint32m8_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u32m8_tumu(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u64m1_tumu(
+// CHECK-LABEL: @test_vcpop_v_u64m1_tumu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vcpopv.mask.nxv1i64.i64(<vscale x 1 x i64> [[MASKEDOFF:%.*]], <vscale x 1 x i64> [[VS2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
// CHECK-NEXT: ret <vscale x 1 x i64> [[TMP0]]
//
-vuint64m1_t test_vcpopv_v_u64m1_tumu(vbool64_t mask, vuint64m1_t maskedoff, vuint64m1_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u64m1_tumu(mask, maskedoff, vs2, vl);
+vuint64m1_t test_vcpop_v_u64m1_tumu(vbool64_t mask, vuint64m1_t maskedoff, vuint64m1_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u64m1_tumu(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u64m2_tumu(
+// CHECK-LABEL: @test_vcpop_v_u64m2_tumu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vcpopv.mask.nxv2i64.i64(<vscale x 2 x i64> [[MASKEDOFF:%.*]], <vscale x 2 x i64> [[VS2:%.*]], <vscale x 2 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP0]]
//
-vuint64m2_t test_vcpopv_v_u64m2_tumu(vbool32_t mask, vuint64m2_t maskedoff, vuint64m2_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u64m2_tumu(mask, maskedoff, vs2, vl);
+vuint64m2_t test_vcpop_v_u64m2_tumu(vbool32_t mask, vuint64m2_t maskedoff, vuint64m2_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u64m2_tumu(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u64m4_tumu(
+// CHECK-LABEL: @test_vcpop_v_u64m4_tumu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vcpopv.mask.nxv4i64.i64(<vscale x 4 x i64> [[MASKEDOFF:%.*]], <vscale x 4 x i64> [[VS2:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
// CHECK-NEXT: ret <vscale x 4 x i64> [[TMP0]]
//
-vuint64m4_t test_vcpopv_v_u64m4_tumu(vbool16_t mask, vuint64m4_t maskedoff, vuint64m4_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u64m4_tumu(mask, maskedoff, vs2, vl);
+vuint64m4_t test_vcpop_v_u64m4_tumu(vbool16_t mask, vuint64m4_t maskedoff, vuint64m4_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u64m4_tumu(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u64m8_tumu(
+// CHECK-LABEL: @test_vcpop_v_u64m8_tumu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vcpopv.mask.nxv8i64.i64(<vscale x 8 x i64> [[MASKEDOFF:%.*]], <vscale x 8 x i64> [[VS2:%.*]], <vscale x 8 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
// CHECK-NEXT: ret <vscale x 8 x i64> [[TMP0]]
//
-vuint64m8_t test_vcpopv_v_u64m8_tumu(vbool8_t mask, vuint64m8_t maskedoff, vuint64m8_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u64m8_tumu(mask, maskedoff, vs2, vl);
+vuint64m8_t test_vcpop_v_u64m8_tumu(vbool8_t mask, vuint64m8_t maskedoff, vuint64m8_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u64m8_tumu(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u8mf8_mu(
+// CHECK-LABEL: @test_vcpop_v_u8mf8_mu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 1 x i8> @llvm.riscv.vcpopv.mask.nxv1i8.i64(<vscale x 1 x i8> [[MASKEDOFF:%.*]], <vscale x 1 x i8> [[VS2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1)
// CHECK-NEXT: ret <vscale x 1 x i8> [[TMP0]]
//
-vuint8mf8_t test_vcpopv_v_u8mf8_mu(vbool64_t mask, vuint8mf8_t maskedoff, vuint8mf8_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u8mf8_mu(mask, maskedoff, vs2, vl);
+vuint8mf8_t test_vcpop_v_u8mf8_mu(vbool64_t mask, vuint8mf8_t maskedoff, vuint8mf8_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u8mf8_mu(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u8mf4_mu(
+// CHECK-LABEL: @test_vcpop_v_u8mf4_mu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i8> @llvm.riscv.vcpopv.mask.nxv2i8.i64(<vscale x 2 x i8> [[MASKEDOFF:%.*]], <vscale x 2 x i8> [[VS2:%.*]], <vscale x 2 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1)
// CHECK-NEXT: ret <vscale x 2 x i8> [[TMP0]]
//
-vuint8mf4_t test_vcpopv_v_u8mf4_mu(vbool32_t mask, vuint8mf4_t maskedoff, vuint8mf4_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u8mf4_mu(mask, maskedoff, vs2, vl);
+vuint8mf4_t test_vcpop_v_u8mf4_mu(vbool32_t mask, vuint8mf4_t maskedoff, vuint8mf4_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u8mf4_mu(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u8mf2_mu(
+// CHECK-LABEL: @test_vcpop_v_u8mf2_mu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i8> @llvm.riscv.vcpopv.mask.nxv4i8.i64(<vscale x 4 x i8> [[MASKEDOFF:%.*]], <vscale x 4 x i8> [[VS2:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1)
// CHECK-NEXT: ret <vscale x 4 x i8> [[TMP0]]
//
-vuint8mf2_t test_vcpopv_v_u8mf2_mu(vbool16_t mask, vuint8mf2_t maskedoff, vuint8mf2_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u8mf2_mu(mask, maskedoff, vs2, vl);
+vuint8mf2_t test_vcpop_v_u8mf2_mu(vbool16_t mask, vuint8mf2_t maskedoff, vuint8mf2_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u8mf2_mu(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u8m1_mu(
+// CHECK-LABEL: @test_vcpop_v_u8m1_mu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 8 x i8> @llvm.riscv.vcpopv.mask.nxv8i8.i64(<vscale x 8 x i8> [[MASKEDOFF:%.*]], <vscale x 8 x i8> [[VS2:%.*]], <vscale x 8 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1)
// CHECK-NEXT: ret <vscale x 8 x i8> [[TMP0]]
//
-vuint8m1_t test_vcpopv_v_u8m1_mu(vbool8_t mask, vuint8m1_t maskedoff, vuint8m1_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u8m1_mu(mask, maskedoff, vs2, vl);
+vuint8m1_t test_vcpop_v_u8m1_mu(vbool8_t mask, vuint8m1_t maskedoff, vuint8m1_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u8m1_mu(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u8m2_mu(
+// CHECK-LABEL: @test_vcpop_v_u8m2_mu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 16 x i8> @llvm.riscv.vcpopv.mask.nxv16i8.i64(<vscale x 16 x i8> [[MASKEDOFF:%.*]], <vscale x 16 x i8> [[VS2:%.*]], <vscale x 16 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1)
// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
-vuint8m2_t test_vcpopv_v_u8m2_mu(vbool4_t mask, vuint8m2_t maskedoff, vuint8m2_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u8m2_mu(mask, maskedoff, vs2, vl);
+vuint8m2_t test_vcpop_v_u8m2_mu(vbool4_t mask, vuint8m2_t maskedoff, vuint8m2_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u8m2_mu(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u8m4_mu(
+// CHECK-LABEL: @test_vcpop_v_u8m4_mu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 32 x i8> @llvm.riscv.vcpopv.mask.nxv32i8.i64(<vscale x 32 x i8> [[MASKEDOFF:%.*]], <vscale x 32 x i8> [[VS2:%.*]], <vscale x 32 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1)
// CHECK-NEXT: ret <vscale x 32 x i8> [[TMP0]]
//
-vuint8m4_t test_vcpopv_v_u8m4_mu(vbool2_t mask, vuint8m4_t maskedoff, vuint8m4_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u8m4_mu(mask, maskedoff, vs2, vl);
+vuint8m4_t test_vcpop_v_u8m4_mu(vbool2_t mask, vuint8m4_t maskedoff, vuint8m4_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u8m4_mu(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u8m8_mu(
+// CHECK-LABEL: @test_vcpop_v_u8m8_mu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 64 x i8> @llvm.riscv.vcpopv.mask.nxv64i8.i64(<vscale x 64 x i8> [[MASKEDOFF:%.*]], <vscale x 64 x i8> [[VS2:%.*]], <vscale x 64 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1)
// CHECK-NEXT: ret <vscale x 64 x i8> [[TMP0]]
//
-vuint8m8_t test_vcpopv_v_u8m8_mu(vbool1_t mask, vuint8m8_t maskedoff, vuint8m8_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u8m8_mu(mask, maskedoff, vs2, vl);
+vuint8m8_t test_vcpop_v_u8m8_mu(vbool1_t mask, vuint8m8_t maskedoff, vuint8m8_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u8m8_mu(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u16mf4_mu(
+// CHECK-LABEL: @test_vcpop_v_u16mf4_mu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vcpopv.mask.nxv1i16.i64(<vscale x 1 x i16> [[MASKEDOFF:%.*]], <vscale x 1 x i16> [[VS2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1)
// CHECK-NEXT: ret <vscale x 1 x i16> [[TMP0]]
//
-vuint16mf4_t test_vcpopv_v_u16mf4_mu(vbool64_t mask, vuint16mf4_t maskedoff, vuint16mf4_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u16mf4_mu(mask, maskedoff, vs2, vl);
+vuint16mf4_t test_vcpop_v_u16mf4_mu(vbool64_t mask, vuint16mf4_t maskedoff, vuint16mf4_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u16mf4_mu(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u16mf2_mu(
+// CHECK-LABEL: @test_vcpop_v_u16mf2_mu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vcpopv.mask.nxv2i16.i64(<vscale x 2 x i16> [[MASKEDOFF:%.*]], <vscale x 2 x i16> [[VS2:%.*]], <vscale x 2 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1)
// CHECK-NEXT: ret <vscale x 2 x i16> [[TMP0]]
//
-vuint16mf2_t test_vcpopv_v_u16mf2_mu(vbool32_t mask, vuint16mf2_t maskedoff, vuint16mf2_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u16mf2_mu(mask, maskedoff, vs2, vl);
+vuint16mf2_t test_vcpop_v_u16mf2_mu(vbool32_t mask, vuint16mf2_t maskedoff, vuint16mf2_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u16mf2_mu(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u16m1_mu(
+// CHECK-LABEL: @test_vcpop_v_u16m1_mu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vcpopv.mask.nxv4i16.i64(<vscale x 4 x i16> [[MASKEDOFF:%.*]], <vscale x 4 x i16> [[VS2:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1)
// CHECK-NEXT: ret <vscale x 4 x i16> [[TMP0]]
//
-vuint16m1_t test_vcpopv_v_u16m1_mu(vbool16_t mask, vuint16m1_t maskedoff, vuint16m1_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u16m1_mu(mask, maskedoff, vs2, vl);
+vuint16m1_t test_vcpop_v_u16m1_mu(vbool16_t mask, vuint16m1_t maskedoff, vuint16m1_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u16m1_mu(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u16m2_mu(
+// CHECK-LABEL: @test_vcpop_v_u16m2_mu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vcpopv.mask.nxv8i16.i64(<vscale x 8 x i16> [[MASKEDOFF:%.*]], <vscale x 8 x i16> [[VS2:%.*]], <vscale x 8 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1)
// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]]
//
-vuint16m2_t test_vcpopv_v_u16m2_mu(vbool8_t mask, vuint16m2_t maskedoff, vuint16m2_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u16m2_mu(mask, maskedoff, vs2, vl);
+vuint16m2_t test_vcpop_v_u16m2_mu(vbool8_t mask, vuint16m2_t maskedoff, vuint16m2_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u16m2_mu(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u16m4_mu(
+// CHECK-LABEL: @test_vcpop_v_u16m4_mu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 16 x i16> @llvm.riscv.vcpopv.mask.nxv16i16.i64(<vscale x 16 x i16> [[MASKEDOFF:%.*]], <vscale x 16 x i16> [[VS2:%.*]], <vscale x 16 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1)
// CHECK-NEXT: ret <vscale x 16 x i16> [[TMP0]]
//
-vuint16m4_t test_vcpopv_v_u16m4_mu(vbool4_t mask, vuint16m4_t maskedoff, vuint16m4_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u16m4_mu(mask, maskedoff, vs2, vl);
+vuint16m4_t test_vcpop_v_u16m4_mu(vbool4_t mask, vuint16m4_t maskedoff, vuint16m4_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u16m4_mu(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u16m8_mu(
+// CHECK-LABEL: @test_vcpop_v_u16m8_mu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 32 x i16> @llvm.riscv.vcpopv.mask.nxv32i16.i64(<vscale x 32 x i16> [[MASKEDOFF:%.*]], <vscale x 32 x i16> [[VS2:%.*]], <vscale x 32 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1)
// CHECK-NEXT: ret <vscale x 32 x i16> [[TMP0]]
//
-vuint16m8_t test_vcpopv_v_u16m8_mu(vbool2_t mask, vuint16m8_t maskedoff, vuint16m8_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u16m8_mu(mask, maskedoff, vs2, vl);
+vuint16m8_t test_vcpop_v_u16m8_mu(vbool2_t mask, vuint16m8_t maskedoff, vuint16m8_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u16m8_mu(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u32mf2_mu(
+// CHECK-LABEL: @test_vcpop_v_u32mf2_mu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vcpopv.mask.nxv1i32.i64(<vscale x 1 x i32> [[MASKEDOFF:%.*]], <vscale x 1 x i32> [[VS2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1)
// CHECK-NEXT: ret <vscale x 1 x i32> [[TMP0]]
//
-vuint32mf2_t test_vcpopv_v_u32mf2_mu(vbool64_t mask, vuint32mf2_t maskedoff, vuint32mf2_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u32mf2_mu(mask, maskedoff, vs2, vl);
+vuint32mf2_t test_vcpop_v_u32mf2_mu(vbool64_t mask, vuint32mf2_t maskedoff, vuint32mf2_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u32mf2_mu(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u32m1_mu(
+// CHECK-LABEL: @test_vcpop_v_u32m1_mu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vcpopv.mask.nxv2i32.i64(<vscale x 2 x i32> [[MASKEDOFF:%.*]], <vscale x 2 x i32> [[VS2:%.*]], <vscale x 2 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1)
// CHECK-NEXT: ret <vscale x 2 x i32> [[TMP0]]
//
-vuint32m1_t test_vcpopv_v_u32m1_mu(vbool32_t mask, vuint32m1_t maskedoff, vuint32m1_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u32m1_mu(mask, maskedoff, vs2, vl);
+vuint32m1_t test_vcpop_v_u32m1_mu(vbool32_t mask, vuint32m1_t maskedoff, vuint32m1_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u32m1_mu(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u32m2_mu(
+// CHECK-LABEL: @test_vcpop_v_u32m2_mu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vcpopv.mask.nxv4i32.i64(<vscale x 4 x i32> [[MASKEDOFF:%.*]], <vscale x 4 x i32> [[VS2:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1)
// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]]
//
-vuint32m2_t test_vcpopv_v_u32m2_mu(vbool16_t mask, vuint32m2_t maskedoff, vuint32m2_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u32m2_mu(mask, maskedoff, vs2, vl);
+vuint32m2_t test_vcpop_v_u32m2_mu(vbool16_t mask, vuint32m2_t maskedoff, vuint32m2_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u32m2_mu(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u32m4_mu(
+// CHECK-LABEL: @test_vcpop_v_u32m4_mu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vcpopv.mask.nxv8i32.i64(<vscale x 8 x i32> [[MASKEDOFF:%.*]], <vscale x 8 x i32> [[VS2:%.*]], <vscale x 8 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1)
// CHECK-NEXT: ret <vscale x 8 x i32> [[TMP0]]
//
-vuint32m4_t test_vcpopv_v_u32m4_mu(vbool8_t mask, vuint32m4_t maskedoff, vuint32m4_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u32m4_mu(mask, maskedoff, vs2, vl);
+vuint32m4_t test_vcpop_v_u32m4_mu(vbool8_t mask, vuint32m4_t maskedoff, vuint32m4_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u32m4_mu(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u32m8_mu(
+// CHECK-LABEL: @test_vcpop_v_u32m8_mu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vcpopv.mask.nxv16i32.i64(<vscale x 16 x i32> [[MASKEDOFF:%.*]], <vscale x 16 x i32> [[VS2:%.*]], <vscale x 16 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1)
// CHECK-NEXT: ret <vscale x 16 x i32> [[TMP0]]
//
-vuint32m8_t test_vcpopv_v_u32m8_mu(vbool4_t mask, vuint32m8_t maskedoff, vuint32m8_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u32m8_mu(mask, maskedoff, vs2, vl);
+vuint32m8_t test_vcpop_v_u32m8_mu(vbool4_t mask, vuint32m8_t maskedoff, vuint32m8_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u32m8_mu(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u64m1_mu(
+// CHECK-LABEL: @test_vcpop_v_u64m1_mu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vcpopv.mask.nxv1i64.i64(<vscale x 1 x i64> [[MASKEDOFF:%.*]], <vscale x 1 x i64> [[VS2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1)
// CHECK-NEXT: ret <vscale x 1 x i64> [[TMP0]]
//
-vuint64m1_t test_vcpopv_v_u64m1_mu(vbool64_t mask, vuint64m1_t maskedoff, vuint64m1_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u64m1_mu(mask, maskedoff, vs2, vl);
+vuint64m1_t test_vcpop_v_u64m1_mu(vbool64_t mask, vuint64m1_t maskedoff, vuint64m1_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u64m1_mu(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u64m2_mu(
+// CHECK-LABEL: @test_vcpop_v_u64m2_mu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vcpopv.mask.nxv2i64.i64(<vscale x 2 x i64> [[MASKEDOFF:%.*]], <vscale x 2 x i64> [[VS2:%.*]], <vscale x 2 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1)
// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP0]]
//
-vuint64m2_t test_vcpopv_v_u64m2_mu(vbool32_t mask, vuint64m2_t maskedoff, vuint64m2_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u64m2_mu(mask, maskedoff, vs2, vl);
+vuint64m2_t test_vcpop_v_u64m2_mu(vbool32_t mask, vuint64m2_t maskedoff, vuint64m2_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u64m2_mu(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u64m4_mu(
+// CHECK-LABEL: @test_vcpop_v_u64m4_mu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vcpopv.mask.nxv4i64.i64(<vscale x 4 x i64> [[MASKEDOFF:%.*]], <vscale x 4 x i64> [[VS2:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1)
// CHECK-NEXT: ret <vscale x 4 x i64> [[TMP0]]
//
-vuint64m4_t test_vcpopv_v_u64m4_mu(vbool16_t mask, vuint64m4_t maskedoff, vuint64m4_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u64m4_mu(mask, maskedoff, vs2, vl);
+vuint64m4_t test_vcpop_v_u64m4_mu(vbool16_t mask, vuint64m4_t maskedoff, vuint64m4_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u64m4_mu(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u64m8_mu(
+// CHECK-LABEL: @test_vcpop_v_u64m8_mu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vcpopv.mask.nxv8i64.i64(<vscale x 8 x i64> [[MASKEDOFF:%.*]], <vscale x 8 x i64> [[VS2:%.*]], <vscale x 8 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1)
// CHECK-NEXT: ret <vscale x 8 x i64> [[TMP0]]
//
-vuint64m8_t test_vcpopv_v_u64m8_mu(vbool8_t mask, vuint64m8_t maskedoff, vuint64m8_t vs2, size_t vl) {
- return __riscv_vcpopv_v_u64m8_mu(mask, maskedoff, vs2, vl);
+vuint64m8_t test_vcpop_v_u64m8_mu(vbool8_t mask, vuint64m8_t maskedoff, vuint64m8_t vs2, size_t vl) {
+ return __riscv_vcpop_v_u64m8_mu(mask, maskedoff, vs2, vl);
}
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/policy/overloaded/vcpopv.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/policy/overloaded/vcpopv.c
index 02a499d4b67d..953ccac133c3 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/policy/overloaded/vcpopv.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/policy/overloaded/vcpopv.c
@@ -16,795 +16,795 @@
#include <riscv_vector.h>
-// CHECK-LABEL: @test_vcpopv_v_u8mf8_tu(
+// CHECK-LABEL: @test_vcpop_v_u8mf8_tu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 1 x i8> @llvm.riscv.vcpopv.nxv1i8.i64(<vscale x 1 x i8> [[MASKEDOFF:%.*]], <vscale x 1 x i8> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 1 x i8> [[TMP0]]
//
-vuint8mf8_t test_vcpopv_v_u8mf8_tu(vuint8mf8_t maskedoff, vuint8mf8_t vs2, size_t vl) {
- return __riscv_vcpopv_tu(maskedoff, vs2, vl);
+vuint8mf8_t test_vcpop_v_u8mf8_tu(vuint8mf8_t maskedoff, vuint8mf8_t vs2, size_t vl) {
+ return __riscv_vcpop_tu(maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u8mf4_tu(
+// CHECK-LABEL: @test_vcpop_v_u8mf4_tu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i8> @llvm.riscv.vcpopv.nxv2i8.i64(<vscale x 2 x i8> [[MASKEDOFF:%.*]], <vscale x 2 x i8> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 2 x i8> [[TMP0]]
//
-vuint8mf4_t test_vcpopv_v_u8mf4_tu(vuint8mf4_t maskedoff, vuint8mf4_t vs2, size_t vl) {
- return __riscv_vcpopv_tu(maskedoff, vs2, vl);
+vuint8mf4_t test_vcpop_v_u8mf4_tu(vuint8mf4_t maskedoff, vuint8mf4_t vs2, size_t vl) {
+ return __riscv_vcpop_tu(maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u8mf2_tu(
+// CHECK-LABEL: @test_vcpop_v_u8mf2_tu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i8> @llvm.riscv.vcpopv.nxv4i8.i64(<vscale x 4 x i8> [[MASKEDOFF:%.*]], <vscale x 4 x i8> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 4 x i8> [[TMP0]]
//
-vuint8mf2_t test_vcpopv_v_u8mf2_tu(vuint8mf2_t maskedoff, vuint8mf2_t vs2, size_t vl) {
- return __riscv_vcpopv_tu(maskedoff, vs2, vl);
+vuint8mf2_t test_vcpop_v_u8mf2_tu(vuint8mf2_t maskedoff, vuint8mf2_t vs2, size_t vl) {
+ return __riscv_vcpop_tu(maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u8m1_tu(
+// CHECK-LABEL: @test_vcpop_v_u8m1_tu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 8 x i8> @llvm.riscv.vcpopv.nxv8i8.i64(<vscale x 8 x i8> [[MASKEDOFF:%.*]], <vscale x 8 x i8> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 8 x i8> [[TMP0]]
//
-vuint8m1_t test_vcpopv_v_u8m1_tu(vuint8m1_t maskedoff, vuint8m1_t vs2, size_t vl) {
- return __riscv_vcpopv_tu(maskedoff, vs2, vl);
+vuint8m1_t test_vcpop_v_u8m1_tu(vuint8m1_t maskedoff, vuint8m1_t vs2, size_t vl) {
+ return __riscv_vcpop_tu(maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u8m2_tu(
+// CHECK-LABEL: @test_vcpop_v_u8m2_tu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 16 x i8> @llvm.riscv.vcpopv.nxv16i8.i64(<vscale x 16 x i8> [[MASKEDOFF:%.*]], <vscale x 16 x i8> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
-vuint8m2_t test_vcpopv_v_u8m2_tu(vuint8m2_t maskedoff, vuint8m2_t vs2, size_t vl) {
- return __riscv_vcpopv_tu(maskedoff, vs2, vl);
+vuint8m2_t test_vcpop_v_u8m2_tu(vuint8m2_t maskedoff, vuint8m2_t vs2, size_t vl) {
+ return __riscv_vcpop_tu(maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u8m4_tu(
+// CHECK-LABEL: @test_vcpop_v_u8m4_tu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 32 x i8> @llvm.riscv.vcpopv.nxv32i8.i64(<vscale x 32 x i8> [[MASKEDOFF:%.*]], <vscale x 32 x i8> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 32 x i8> [[TMP0]]
//
-vuint8m4_t test_vcpopv_v_u8m4_tu(vuint8m4_t maskedoff, vuint8m4_t vs2, size_t vl) {
- return __riscv_vcpopv_tu(maskedoff, vs2, vl);
+vuint8m4_t test_vcpop_v_u8m4_tu(vuint8m4_t maskedoff, vuint8m4_t vs2, size_t vl) {
+ return __riscv_vcpop_tu(maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u8m8_tu(
+// CHECK-LABEL: @test_vcpop_v_u8m8_tu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 64 x i8> @llvm.riscv.vcpopv.nxv64i8.i64(<vscale x 64 x i8> [[MASKEDOFF:%.*]], <vscale x 64 x i8> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 64 x i8> [[TMP0]]
//
-vuint8m8_t test_vcpopv_v_u8m8_tu(vuint8m8_t maskedoff, vuint8m8_t vs2, size_t vl) {
- return __riscv_vcpopv_tu(maskedoff, vs2, vl);
+vuint8m8_t test_vcpop_v_u8m8_tu(vuint8m8_t maskedoff, vuint8m8_t vs2, size_t vl) {
+ return __riscv_vcpop_tu(maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u16mf4_tu(
+// CHECK-LABEL: @test_vcpop_v_u16mf4_tu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vcpopv.nxv1i16.i64(<vscale x 1 x i16> [[MASKEDOFF:%.*]], <vscale x 1 x i16> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 1 x i16> [[TMP0]]
//
-vuint16mf4_t test_vcpopv_v_u16mf4_tu(vuint16mf4_t maskedoff, vuint16mf4_t vs2, size_t vl) {
- return __riscv_vcpopv_tu(maskedoff, vs2, vl);
+vuint16mf4_t test_vcpop_v_u16mf4_tu(vuint16mf4_t maskedoff, vuint16mf4_t vs2, size_t vl) {
+ return __riscv_vcpop_tu(maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u16mf2_tu(
+// CHECK-LABEL: @test_vcpop_v_u16mf2_tu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vcpopv.nxv2i16.i64(<vscale x 2 x i16> [[MASKEDOFF:%.*]], <vscale x 2 x i16> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 2 x i16> [[TMP0]]
//
-vuint16mf2_t test_vcpopv_v_u16mf2_tu(vuint16mf2_t maskedoff, vuint16mf2_t vs2, size_t vl) {
- return __riscv_vcpopv_tu(maskedoff, vs2, vl);
+vuint16mf2_t test_vcpop_v_u16mf2_tu(vuint16mf2_t maskedoff, vuint16mf2_t vs2, size_t vl) {
+ return __riscv_vcpop_tu(maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u16m1_tu(
+// CHECK-LABEL: @test_vcpop_v_u16m1_tu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vcpopv.nxv4i16.i64(<vscale x 4 x i16> [[MASKEDOFF:%.*]], <vscale x 4 x i16> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 4 x i16> [[TMP0]]
//
-vuint16m1_t test_vcpopv_v_u16m1_tu(vuint16m1_t maskedoff, vuint16m1_t vs2, size_t vl) {
- return __riscv_vcpopv_tu(maskedoff, vs2, vl);
+vuint16m1_t test_vcpop_v_u16m1_tu(vuint16m1_t maskedoff, vuint16m1_t vs2, size_t vl) {
+ return __riscv_vcpop_tu(maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u16m2_tu(
+// CHECK-LABEL: @test_vcpop_v_u16m2_tu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vcpopv.nxv8i16.i64(<vscale x 8 x i16> [[MASKEDOFF:%.*]], <vscale x 8 x i16> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]]
//
-vuint16m2_t test_vcpopv_v_u16m2_tu(vuint16m2_t maskedoff, vuint16m2_t vs2, size_t vl) {
- return __riscv_vcpopv_tu(maskedoff, vs2, vl);
+vuint16m2_t test_vcpop_v_u16m2_tu(vuint16m2_t maskedoff, vuint16m2_t vs2, size_t vl) {
+ return __riscv_vcpop_tu(maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u16m4_tu(
+// CHECK-LABEL: @test_vcpop_v_u16m4_tu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 16 x i16> @llvm.riscv.vcpopv.nxv16i16.i64(<vscale x 16 x i16> [[MASKEDOFF:%.*]], <vscale x 16 x i16> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 16 x i16> [[TMP0]]
//
-vuint16m4_t test_vcpopv_v_u16m4_tu(vuint16m4_t maskedoff, vuint16m4_t vs2, size_t vl) {
- return __riscv_vcpopv_tu(maskedoff, vs2, vl);
+vuint16m4_t test_vcpop_v_u16m4_tu(vuint16m4_t maskedoff, vuint16m4_t vs2, size_t vl) {
+ return __riscv_vcpop_tu(maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u16m8_tu(
+// CHECK-LABEL: @test_vcpop_v_u16m8_tu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 32 x i16> @llvm.riscv.vcpopv.nxv32i16.i64(<vscale x 32 x i16> [[MASKEDOFF:%.*]], <vscale x 32 x i16> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 32 x i16> [[TMP0]]
//
-vuint16m8_t test_vcpopv_v_u16m8_tu(vuint16m8_t maskedoff, vuint16m8_t vs2, size_t vl) {
- return __riscv_vcpopv_tu(maskedoff, vs2, vl);
+vuint16m8_t test_vcpop_v_u16m8_tu(vuint16m8_t maskedoff, vuint16m8_t vs2, size_t vl) {
+ return __riscv_vcpop_tu(maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u32mf2_tu(
+// CHECK-LABEL: @test_vcpop_v_u32mf2_tu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vcpopv.nxv1i32.i64(<vscale x 1 x i32> [[MASKEDOFF:%.*]], <vscale x 1 x i32> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 1 x i32> [[TMP0]]
//
-vuint32mf2_t test_vcpopv_v_u32mf2_tu(vuint32mf2_t maskedoff, vuint32mf2_t vs2, size_t vl) {
- return __riscv_vcpopv_tu(maskedoff, vs2, vl);
+vuint32mf2_t test_vcpop_v_u32mf2_tu(vuint32mf2_t maskedoff, vuint32mf2_t vs2, size_t vl) {
+ return __riscv_vcpop_tu(maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u32m1_tu(
+// CHECK-LABEL: @test_vcpop_v_u32m1_tu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vcpopv.nxv2i32.i64(<vscale x 2 x i32> [[MASKEDOFF:%.*]], <vscale x 2 x i32> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 2 x i32> [[TMP0]]
//
-vuint32m1_t test_vcpopv_v_u32m1_tu(vuint32m1_t maskedoff, vuint32m1_t vs2, size_t vl) {
- return __riscv_vcpopv_tu(maskedoff, vs2, vl);
+vuint32m1_t test_vcpop_v_u32m1_tu(vuint32m1_t maskedoff, vuint32m1_t vs2, size_t vl) {
+ return __riscv_vcpop_tu(maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u32m2_tu(
+// CHECK-LABEL: @test_vcpop_v_u32m2_tu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vcpopv.nxv4i32.i64(<vscale x 4 x i32> [[MASKEDOFF:%.*]], <vscale x 4 x i32> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]]
//
-vuint32m2_t test_vcpopv_v_u32m2_tu(vuint32m2_t maskedoff, vuint32m2_t vs2, size_t vl) {
- return __riscv_vcpopv_tu(maskedoff, vs2, vl);
+vuint32m2_t test_vcpop_v_u32m2_tu(vuint32m2_t maskedoff, vuint32m2_t vs2, size_t vl) {
+ return __riscv_vcpop_tu(maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u32m4_tu(
+// CHECK-LABEL: @test_vcpop_v_u32m4_tu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vcpopv.nxv8i32.i64(<vscale x 8 x i32> [[MASKEDOFF:%.*]], <vscale x 8 x i32> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 8 x i32> [[TMP0]]
//
-vuint32m4_t test_vcpopv_v_u32m4_tu(vuint32m4_t maskedoff, vuint32m4_t vs2, size_t vl) {
- return __riscv_vcpopv_tu(maskedoff, vs2, vl);
+vuint32m4_t test_vcpop_v_u32m4_tu(vuint32m4_t maskedoff, vuint32m4_t vs2, size_t vl) {
+ return __riscv_vcpop_tu(maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u32m8_tu(
+// CHECK-LABEL: @test_vcpop_v_u32m8_tu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vcpopv.nxv16i32.i64(<vscale x 16 x i32> [[MASKEDOFF:%.*]], <vscale x 16 x i32> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 16 x i32> [[TMP0]]
//
-vuint32m8_t test_vcpopv_v_u32m8_tu(vuint32m8_t maskedoff, vuint32m8_t vs2, size_t vl) {
- return __riscv_vcpopv_tu(maskedoff, vs2, vl);
+vuint32m8_t test_vcpop_v_u32m8_tu(vuint32m8_t maskedoff, vuint32m8_t vs2, size_t vl) {
+ return __riscv_vcpop_tu(maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u64m1_tu(
+// CHECK-LABEL: @test_vcpop_v_u64m1_tu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vcpopv.nxv1i64.i64(<vscale x 1 x i64> [[MASKEDOFF:%.*]], <vscale x 1 x i64> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 1 x i64> [[TMP0]]
//
-vuint64m1_t test_vcpopv_v_u64m1_tu(vuint64m1_t maskedoff, vuint64m1_t vs2, size_t vl) {
- return __riscv_vcpopv_tu(maskedoff, vs2, vl);
+vuint64m1_t test_vcpop_v_u64m1_tu(vuint64m1_t maskedoff, vuint64m1_t vs2, size_t vl) {
+ return __riscv_vcpop_tu(maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u64m2_tu(
+// CHECK-LABEL: @test_vcpop_v_u64m2_tu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vcpopv.nxv2i64.i64(<vscale x 2 x i64> [[MASKEDOFF:%.*]], <vscale x 2 x i64> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP0]]
//
-vuint64m2_t test_vcpopv_v_u64m2_tu(vuint64m2_t maskedoff, vuint64m2_t vs2, size_t vl) {
- return __riscv_vcpopv_tu(maskedoff, vs2, vl);
+vuint64m2_t test_vcpop_v_u64m2_tu(vuint64m2_t maskedoff, vuint64m2_t vs2, size_t vl) {
+ return __riscv_vcpop_tu(maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u64m4_tu(
+// CHECK-LABEL: @test_vcpop_v_u64m4_tu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vcpopv.nxv4i64.i64(<vscale x 4 x i64> [[MASKEDOFF:%.*]], <vscale x 4 x i64> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 4 x i64> [[TMP0]]
//
-vuint64m4_t test_vcpopv_v_u64m4_tu(vuint64m4_t maskedoff, vuint64m4_t vs2, size_t vl) {
- return __riscv_vcpopv_tu(maskedoff, vs2, vl);
+vuint64m4_t test_vcpop_v_u64m4_tu(vuint64m4_t maskedoff, vuint64m4_t vs2, size_t vl) {
+ return __riscv_vcpop_tu(maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u64m8_tu(
+// CHECK-LABEL: @test_vcpop_v_u64m8_tu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vcpopv.nxv8i64.i64(<vscale x 8 x i64> [[MASKEDOFF:%.*]], <vscale x 8 x i64> [[VS2:%.*]], i64 [[VL:%.*]])
// CHECK-NEXT: ret <vscale x 8 x i64> [[TMP0]]
//
-vuint64m8_t test_vcpopv_v_u64m8_tu(vuint64m8_t maskedoff, vuint64m8_t vs2, size_t vl) {
- return __riscv_vcpopv_tu(maskedoff, vs2, vl);
+vuint64m8_t test_vcpop_v_u64m8_tu(vuint64m8_t maskedoff, vuint64m8_t vs2, size_t vl) {
+ return __riscv_vcpop_tu(maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u8mf8_tum(
+// CHECK-LABEL: @test_vcpop_v_u8mf8_tum(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 1 x i8> @llvm.riscv.vcpopv.mask.nxv1i8.i64(<vscale x 1 x i8> [[MASKEDOFF:%.*]], <vscale x 1 x i8> [[VS2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 2)
// CHECK-NEXT: ret <vscale x 1 x i8> [[TMP0]]
//
-vuint8mf8_t test_vcpopv_v_u8mf8_tum(vbool64_t mask, vuint8mf8_t maskedoff, vuint8mf8_t vs2, size_t vl) {
- return __riscv_vcpopv_tum(mask, maskedoff, vs2, vl);
+vuint8mf8_t test_vcpop_v_u8mf8_tum(vbool64_t mask, vuint8mf8_t maskedoff, vuint8mf8_t vs2, size_t vl) {
+ return __riscv_vcpop_tum(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u8mf4_tum(
+// CHECK-LABEL: @test_vcpop_v_u8mf4_tum(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i8> @llvm.riscv.vcpopv.mask.nxv2i8.i64(<vscale x 2 x i8> [[MASKEDOFF:%.*]], <vscale x 2 x i8> [[VS2:%.*]], <vscale x 2 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 2)
// CHECK-NEXT: ret <vscale x 2 x i8> [[TMP0]]
//
-vuint8mf4_t test_vcpopv_v_u8mf4_tum(vbool32_t mask, vuint8mf4_t maskedoff, vuint8mf4_t vs2, size_t vl) {
- return __riscv_vcpopv_tum(mask, maskedoff, vs2, vl);
+vuint8mf4_t test_vcpop_v_u8mf4_tum(vbool32_t mask, vuint8mf4_t maskedoff, vuint8mf4_t vs2, size_t vl) {
+ return __riscv_vcpop_tum(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u8mf2_tum(
+// CHECK-LABEL: @test_vcpop_v_u8mf2_tum(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i8> @llvm.riscv.vcpopv.mask.nxv4i8.i64(<vscale x 4 x i8> [[MASKEDOFF:%.*]], <vscale x 4 x i8> [[VS2:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 2)
// CHECK-NEXT: ret <vscale x 4 x i8> [[TMP0]]
//
-vuint8mf2_t test_vcpopv_v_u8mf2_tum(vbool16_t mask, vuint8mf2_t maskedoff, vuint8mf2_t vs2, size_t vl) {
- return __riscv_vcpopv_tum(mask, maskedoff, vs2, vl);
+vuint8mf2_t test_vcpop_v_u8mf2_tum(vbool16_t mask, vuint8mf2_t maskedoff, vuint8mf2_t vs2, size_t vl) {
+ return __riscv_vcpop_tum(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u8m1_tum(
+// CHECK-LABEL: @test_vcpop_v_u8m1_tum(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 8 x i8> @llvm.riscv.vcpopv.mask.nxv8i8.i64(<vscale x 8 x i8> [[MASKEDOFF:%.*]], <vscale x 8 x i8> [[VS2:%.*]], <vscale x 8 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 2)
// CHECK-NEXT: ret <vscale x 8 x i8> [[TMP0]]
//
-vuint8m1_t test_vcpopv_v_u8m1_tum(vbool8_t mask, vuint8m1_t maskedoff, vuint8m1_t vs2, size_t vl) {
- return __riscv_vcpopv_tum(mask, maskedoff, vs2, vl);
+vuint8m1_t test_vcpop_v_u8m1_tum(vbool8_t mask, vuint8m1_t maskedoff, vuint8m1_t vs2, size_t vl) {
+ return __riscv_vcpop_tum(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u8m2_tum(
+// CHECK-LABEL: @test_vcpop_v_u8m2_tum(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 16 x i8> @llvm.riscv.vcpopv.mask.nxv16i8.i64(<vscale x 16 x i8> [[MASKEDOFF:%.*]], <vscale x 16 x i8> [[VS2:%.*]], <vscale x 16 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 2)
// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
-vuint8m2_t test_vcpopv_v_u8m2_tum(vbool4_t mask, vuint8m2_t maskedoff, vuint8m2_t vs2, size_t vl) {
- return __riscv_vcpopv_tum(mask, maskedoff, vs2, vl);
+vuint8m2_t test_vcpop_v_u8m2_tum(vbool4_t mask, vuint8m2_t maskedoff, vuint8m2_t vs2, size_t vl) {
+ return __riscv_vcpop_tum(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u8m4_tum(
+// CHECK-LABEL: @test_vcpop_v_u8m4_tum(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 32 x i8> @llvm.riscv.vcpopv.mask.nxv32i8.i64(<vscale x 32 x i8> [[MASKEDOFF:%.*]], <vscale x 32 x i8> [[VS2:%.*]], <vscale x 32 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 2)
// CHECK-NEXT: ret <vscale x 32 x i8> [[TMP0]]
//
-vuint8m4_t test_vcpopv_v_u8m4_tum(vbool2_t mask, vuint8m4_t maskedoff, vuint8m4_t vs2, size_t vl) {
- return __riscv_vcpopv_tum(mask, maskedoff, vs2, vl);
+vuint8m4_t test_vcpop_v_u8m4_tum(vbool2_t mask, vuint8m4_t maskedoff, vuint8m4_t vs2, size_t vl) {
+ return __riscv_vcpop_tum(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u8m8_tum(
+// CHECK-LABEL: @test_vcpop_v_u8m8_tum(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 64 x i8> @llvm.riscv.vcpopv.mask.nxv64i8.i64(<vscale x 64 x i8> [[MASKEDOFF:%.*]], <vscale x 64 x i8> [[VS2:%.*]], <vscale x 64 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 2)
// CHECK-NEXT: ret <vscale x 64 x i8> [[TMP0]]
//
-vuint8m8_t test_vcpopv_v_u8m8_tum(vbool1_t mask, vuint8m8_t maskedoff, vuint8m8_t vs2, size_t vl) {
- return __riscv_vcpopv_tum(mask, maskedoff, vs2, vl);
+vuint8m8_t test_vcpop_v_u8m8_tum(vbool1_t mask, vuint8m8_t maskedoff, vuint8m8_t vs2, size_t vl) {
+ return __riscv_vcpop_tum(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u16mf4_tum(
+// CHECK-LABEL: @test_vcpop_v_u16mf4_tum(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vcpopv.mask.nxv1i16.i64(<vscale x 1 x i16> [[MASKEDOFF:%.*]], <vscale x 1 x i16> [[VS2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 2)
// CHECK-NEXT: ret <vscale x 1 x i16> [[TMP0]]
//
-vuint16mf4_t test_vcpopv_v_u16mf4_tum(vbool64_t mask, vuint16mf4_t maskedoff, vuint16mf4_t vs2, size_t vl) {
- return __riscv_vcpopv_tum(mask, maskedoff, vs2, vl);
+vuint16mf4_t test_vcpop_v_u16mf4_tum(vbool64_t mask, vuint16mf4_t maskedoff, vuint16mf4_t vs2, size_t vl) {
+ return __riscv_vcpop_tum(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u16mf2_tum(
+// CHECK-LABEL: @test_vcpop_v_u16mf2_tum(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vcpopv.mask.nxv2i16.i64(<vscale x 2 x i16> [[MASKEDOFF:%.*]], <vscale x 2 x i16> [[VS2:%.*]], <vscale x 2 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 2)
// CHECK-NEXT: ret <vscale x 2 x i16> [[TMP0]]
//
-vuint16mf2_t test_vcpopv_v_u16mf2_tum(vbool32_t mask, vuint16mf2_t maskedoff, vuint16mf2_t vs2, size_t vl) {
- return __riscv_vcpopv_tum(mask, maskedoff, vs2, vl);
+vuint16mf2_t test_vcpop_v_u16mf2_tum(vbool32_t mask, vuint16mf2_t maskedoff, vuint16mf2_t vs2, size_t vl) {
+ return __riscv_vcpop_tum(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u16m1_tum(
+// CHECK-LABEL: @test_vcpop_v_u16m1_tum(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vcpopv.mask.nxv4i16.i64(<vscale x 4 x i16> [[MASKEDOFF:%.*]], <vscale x 4 x i16> [[VS2:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 2)
// CHECK-NEXT: ret <vscale x 4 x i16> [[TMP0]]
//
-vuint16m1_t test_vcpopv_v_u16m1_tum(vbool16_t mask, vuint16m1_t maskedoff, vuint16m1_t vs2, size_t vl) {
- return __riscv_vcpopv_tum(mask, maskedoff, vs2, vl);
+vuint16m1_t test_vcpop_v_u16m1_tum(vbool16_t mask, vuint16m1_t maskedoff, vuint16m1_t vs2, size_t vl) {
+ return __riscv_vcpop_tum(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u16m2_tum(
+// CHECK-LABEL: @test_vcpop_v_u16m2_tum(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vcpopv.mask.nxv8i16.i64(<vscale x 8 x i16> [[MASKEDOFF:%.*]], <vscale x 8 x i16> [[VS2:%.*]], <vscale x 8 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 2)
// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]]
//
-vuint16m2_t test_vcpopv_v_u16m2_tum(vbool8_t mask, vuint16m2_t maskedoff, vuint16m2_t vs2, size_t vl) {
- return __riscv_vcpopv_tum(mask, maskedoff, vs2, vl);
+vuint16m2_t test_vcpop_v_u16m2_tum(vbool8_t mask, vuint16m2_t maskedoff, vuint16m2_t vs2, size_t vl) {
+ return __riscv_vcpop_tum(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u16m4_tum(
+// CHECK-LABEL: @test_vcpop_v_u16m4_tum(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 16 x i16> @llvm.riscv.vcpopv.mask.nxv16i16.i64(<vscale x 16 x i16> [[MASKEDOFF:%.*]], <vscale x 16 x i16> [[VS2:%.*]], <vscale x 16 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 2)
// CHECK-NEXT: ret <vscale x 16 x i16> [[TMP0]]
//
-vuint16m4_t test_vcpopv_v_u16m4_tum(vbool4_t mask, vuint16m4_t maskedoff, vuint16m4_t vs2, size_t vl) {
- return __riscv_vcpopv_tum(mask, maskedoff, vs2, vl);
+vuint16m4_t test_vcpop_v_u16m4_tum(vbool4_t mask, vuint16m4_t maskedoff, vuint16m4_t vs2, size_t vl) {
+ return __riscv_vcpop_tum(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u16m8_tum(
+// CHECK-LABEL: @test_vcpop_v_u16m8_tum(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 32 x i16> @llvm.riscv.vcpopv.mask.nxv32i16.i64(<vscale x 32 x i16> [[MASKEDOFF:%.*]], <vscale x 32 x i16> [[VS2:%.*]], <vscale x 32 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 2)
// CHECK-NEXT: ret <vscale x 32 x i16> [[TMP0]]
//
-vuint16m8_t test_vcpopv_v_u16m8_tum(vbool2_t mask, vuint16m8_t maskedoff, vuint16m8_t vs2, size_t vl) {
- return __riscv_vcpopv_tum(mask, maskedoff, vs2, vl);
+vuint16m8_t test_vcpop_v_u16m8_tum(vbool2_t mask, vuint16m8_t maskedoff, vuint16m8_t vs2, size_t vl) {
+ return __riscv_vcpop_tum(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u32mf2_tum(
+// CHECK-LABEL: @test_vcpop_v_u32mf2_tum(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vcpopv.mask.nxv1i32.i64(<vscale x 1 x i32> [[MASKEDOFF:%.*]], <vscale x 1 x i32> [[VS2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 2)
// CHECK-NEXT: ret <vscale x 1 x i32> [[TMP0]]
//
-vuint32mf2_t test_vcpopv_v_u32mf2_tum(vbool64_t mask, vuint32mf2_t maskedoff, vuint32mf2_t vs2, size_t vl) {
- return __riscv_vcpopv_tum(mask, maskedoff, vs2, vl);
+vuint32mf2_t test_vcpop_v_u32mf2_tum(vbool64_t mask, vuint32mf2_t maskedoff, vuint32mf2_t vs2, size_t vl) {
+ return __riscv_vcpop_tum(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u32m1_tum(
+// CHECK-LABEL: @test_vcpop_v_u32m1_tum(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vcpopv.mask.nxv2i32.i64(<vscale x 2 x i32> [[MASKEDOFF:%.*]], <vscale x 2 x i32> [[VS2:%.*]], <vscale x 2 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 2)
// CHECK-NEXT: ret <vscale x 2 x i32> [[TMP0]]
//
-vuint32m1_t test_vcpopv_v_u32m1_tum(vbool32_t mask, vuint32m1_t maskedoff, vuint32m1_t vs2, size_t vl) {
- return __riscv_vcpopv_tum(mask, maskedoff, vs2, vl);
+vuint32m1_t test_vcpop_v_u32m1_tum(vbool32_t mask, vuint32m1_t maskedoff, vuint32m1_t vs2, size_t vl) {
+ return __riscv_vcpop_tum(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u32m2_tum(
+// CHECK-LABEL: @test_vcpop_v_u32m2_tum(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vcpopv.mask.nxv4i32.i64(<vscale x 4 x i32> [[MASKEDOFF:%.*]], <vscale x 4 x i32> [[VS2:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 2)
// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]]
//
-vuint32m2_t test_vcpopv_v_u32m2_tum(vbool16_t mask, vuint32m2_t maskedoff, vuint32m2_t vs2, size_t vl) {
- return __riscv_vcpopv_tum(mask, maskedoff, vs2, vl);
+vuint32m2_t test_vcpop_v_u32m2_tum(vbool16_t mask, vuint32m2_t maskedoff, vuint32m2_t vs2, size_t vl) {
+ return __riscv_vcpop_tum(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u32m4_tum(
+// CHECK-LABEL: @test_vcpop_v_u32m4_tum(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vcpopv.mask.nxv8i32.i64(<vscale x 8 x i32> [[MASKEDOFF:%.*]], <vscale x 8 x i32> [[VS2:%.*]], <vscale x 8 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 2)
// CHECK-NEXT: ret <vscale x 8 x i32> [[TMP0]]
//
-vuint32m4_t test_vcpopv_v_u32m4_tum(vbool8_t mask, vuint32m4_t maskedoff, vuint32m4_t vs2, size_t vl) {
- return __riscv_vcpopv_tum(mask, maskedoff, vs2, vl);
+vuint32m4_t test_vcpop_v_u32m4_tum(vbool8_t mask, vuint32m4_t maskedoff, vuint32m4_t vs2, size_t vl) {
+ return __riscv_vcpop_tum(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u32m8_tum(
+// CHECK-LABEL: @test_vcpop_v_u32m8_tum(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vcpopv.mask.nxv16i32.i64(<vscale x 16 x i32> [[MASKEDOFF:%.*]], <vscale x 16 x i32> [[VS2:%.*]], <vscale x 16 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 2)
// CHECK-NEXT: ret <vscale x 16 x i32> [[TMP0]]
//
-vuint32m8_t test_vcpopv_v_u32m8_tum(vbool4_t mask, vuint32m8_t maskedoff, vuint32m8_t vs2, size_t vl) {
- return __riscv_vcpopv_tum(mask, maskedoff, vs2, vl);
+vuint32m8_t test_vcpop_v_u32m8_tum(vbool4_t mask, vuint32m8_t maskedoff, vuint32m8_t vs2, size_t vl) {
+ return __riscv_vcpop_tum(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u64m1_tum(
+// CHECK-LABEL: @test_vcpop_v_u64m1_tum(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vcpopv.mask.nxv1i64.i64(<vscale x 1 x i64> [[MASKEDOFF:%.*]], <vscale x 1 x i64> [[VS2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 2)
// CHECK-NEXT: ret <vscale x 1 x i64> [[TMP0]]
//
-vuint64m1_t test_vcpopv_v_u64m1_tum(vbool64_t mask, vuint64m1_t maskedoff, vuint64m1_t vs2, size_t vl) {
- return __riscv_vcpopv_tum(mask, maskedoff, vs2, vl);
+vuint64m1_t test_vcpop_v_u64m1_tum(vbool64_t mask, vuint64m1_t maskedoff, vuint64m1_t vs2, size_t vl) {
+ return __riscv_vcpop_tum(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u64m2_tum(
+// CHECK-LABEL: @test_vcpop_v_u64m2_tum(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vcpopv.mask.nxv2i64.i64(<vscale x 2 x i64> [[MASKEDOFF:%.*]], <vscale x 2 x i64> [[VS2:%.*]], <vscale x 2 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 2)
// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP0]]
//
-vuint64m2_t test_vcpopv_v_u64m2_tum(vbool32_t mask, vuint64m2_t maskedoff, vuint64m2_t vs2, size_t vl) {
- return __riscv_vcpopv_tum(mask, maskedoff, vs2, vl);
+vuint64m2_t test_vcpop_v_u64m2_tum(vbool32_t mask, vuint64m2_t maskedoff, vuint64m2_t vs2, size_t vl) {
+ return __riscv_vcpop_tum(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u64m4_tum(
+// CHECK-LABEL: @test_vcpop_v_u64m4_tum(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vcpopv.mask.nxv4i64.i64(<vscale x 4 x i64> [[MASKEDOFF:%.*]], <vscale x 4 x i64> [[VS2:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 2)
// CHECK-NEXT: ret <vscale x 4 x i64> [[TMP0]]
//
-vuint64m4_t test_vcpopv_v_u64m4_tum(vbool16_t mask, vuint64m4_t maskedoff, vuint64m4_t vs2, size_t vl) {
- return __riscv_vcpopv_tum(mask, maskedoff, vs2, vl);
+vuint64m4_t test_vcpop_v_u64m4_tum(vbool16_t mask, vuint64m4_t maskedoff, vuint64m4_t vs2, size_t vl) {
+ return __riscv_vcpop_tum(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u64m8_tum(
+// CHECK-LABEL: @test_vcpop_v_u64m8_tum(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vcpopv.mask.nxv8i64.i64(<vscale x 8 x i64> [[MASKEDOFF:%.*]], <vscale x 8 x i64> [[VS2:%.*]], <vscale x 8 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 2)
// CHECK-NEXT: ret <vscale x 8 x i64> [[TMP0]]
//
-vuint64m8_t test_vcpopv_v_u64m8_tum(vbool8_t mask, vuint64m8_t maskedoff, vuint64m8_t vs2, size_t vl) {
- return __riscv_vcpopv_tum(mask, maskedoff, vs2, vl);
+vuint64m8_t test_vcpop_v_u64m8_tum(vbool8_t mask, vuint64m8_t maskedoff, vuint64m8_t vs2, size_t vl) {
+ return __riscv_vcpop_tum(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u8mf8_tumu(
+// CHECK-LABEL: @test_vcpop_v_u8mf8_tumu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 1 x i8> @llvm.riscv.vcpopv.mask.nxv1i8.i64(<vscale x 1 x i8> [[MASKEDOFF:%.*]], <vscale x 1 x i8> [[VS2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
// CHECK-NEXT: ret <vscale x 1 x i8> [[TMP0]]
//
-vuint8mf8_t test_vcpopv_v_u8mf8_tumu(vbool64_t mask, vuint8mf8_t maskedoff, vuint8mf8_t vs2, size_t vl) {
- return __riscv_vcpopv_tumu(mask, maskedoff, vs2, vl);
+vuint8mf8_t test_vcpop_v_u8mf8_tumu(vbool64_t mask, vuint8mf8_t maskedoff, vuint8mf8_t vs2, size_t vl) {
+ return __riscv_vcpop_tumu(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u8mf4_tumu(
+// CHECK-LABEL: @test_vcpop_v_u8mf4_tumu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i8> @llvm.riscv.vcpopv.mask.nxv2i8.i64(<vscale x 2 x i8> [[MASKEDOFF:%.*]], <vscale x 2 x i8> [[VS2:%.*]], <vscale x 2 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
// CHECK-NEXT: ret <vscale x 2 x i8> [[TMP0]]
//
-vuint8mf4_t test_vcpopv_v_u8mf4_tumu(vbool32_t mask, vuint8mf4_t maskedoff, vuint8mf4_t vs2, size_t vl) {
- return __riscv_vcpopv_tumu(mask, maskedoff, vs2, vl);
+vuint8mf4_t test_vcpop_v_u8mf4_tumu(vbool32_t mask, vuint8mf4_t maskedoff, vuint8mf4_t vs2, size_t vl) {
+ return __riscv_vcpop_tumu(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u8mf2_tumu(
+// CHECK-LABEL: @test_vcpop_v_u8mf2_tumu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i8> @llvm.riscv.vcpopv.mask.nxv4i8.i64(<vscale x 4 x i8> [[MASKEDOFF:%.*]], <vscale x 4 x i8> [[VS2:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
// CHECK-NEXT: ret <vscale x 4 x i8> [[TMP0]]
//
-vuint8mf2_t test_vcpopv_v_u8mf2_tumu(vbool16_t mask, vuint8mf2_t maskedoff, vuint8mf2_t vs2, size_t vl) {
- return __riscv_vcpopv_tumu(mask, maskedoff, vs2, vl);
+vuint8mf2_t test_vcpop_v_u8mf2_tumu(vbool16_t mask, vuint8mf2_t maskedoff, vuint8mf2_t vs2, size_t vl) {
+ return __riscv_vcpop_tumu(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u8m1_tumu(
+// CHECK-LABEL: @test_vcpop_v_u8m1_tumu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 8 x i8> @llvm.riscv.vcpopv.mask.nxv8i8.i64(<vscale x 8 x i8> [[MASKEDOFF:%.*]], <vscale x 8 x i8> [[VS2:%.*]], <vscale x 8 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
// CHECK-NEXT: ret <vscale x 8 x i8> [[TMP0]]
//
-vuint8m1_t test_vcpopv_v_u8m1_tumu(vbool8_t mask, vuint8m1_t maskedoff, vuint8m1_t vs2, size_t vl) {
- return __riscv_vcpopv_tumu(mask, maskedoff, vs2, vl);
+vuint8m1_t test_vcpop_v_u8m1_tumu(vbool8_t mask, vuint8m1_t maskedoff, vuint8m1_t vs2, size_t vl) {
+ return __riscv_vcpop_tumu(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u8m2_tumu(
+// CHECK-LABEL: @test_vcpop_v_u8m2_tumu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 16 x i8> @llvm.riscv.vcpopv.mask.nxv16i8.i64(<vscale x 16 x i8> [[MASKEDOFF:%.*]], <vscale x 16 x i8> [[VS2:%.*]], <vscale x 16 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
-vuint8m2_t test_vcpopv_v_u8m2_tumu(vbool4_t mask, vuint8m2_t maskedoff, vuint8m2_t vs2, size_t vl) {
- return __riscv_vcpopv_tumu(mask, maskedoff, vs2, vl);
+vuint8m2_t test_vcpop_v_u8m2_tumu(vbool4_t mask, vuint8m2_t maskedoff, vuint8m2_t vs2, size_t vl) {
+ return __riscv_vcpop_tumu(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u8m4_tumu(
+// CHECK-LABEL: @test_vcpop_v_u8m4_tumu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 32 x i8> @llvm.riscv.vcpopv.mask.nxv32i8.i64(<vscale x 32 x i8> [[MASKEDOFF:%.*]], <vscale x 32 x i8> [[VS2:%.*]], <vscale x 32 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
// CHECK-NEXT: ret <vscale x 32 x i8> [[TMP0]]
//
-vuint8m4_t test_vcpopv_v_u8m4_tumu(vbool2_t mask, vuint8m4_t maskedoff, vuint8m4_t vs2, size_t vl) {
- return __riscv_vcpopv_tumu(mask, maskedoff, vs2, vl);
+vuint8m4_t test_vcpop_v_u8m4_tumu(vbool2_t mask, vuint8m4_t maskedoff, vuint8m4_t vs2, size_t vl) {
+ return __riscv_vcpop_tumu(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u8m8_tumu(
+// CHECK-LABEL: @test_vcpop_v_u8m8_tumu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 64 x i8> @llvm.riscv.vcpopv.mask.nxv64i8.i64(<vscale x 64 x i8> [[MASKEDOFF:%.*]], <vscale x 64 x i8> [[VS2:%.*]], <vscale x 64 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
// CHECK-NEXT: ret <vscale x 64 x i8> [[TMP0]]
//
-vuint8m8_t test_vcpopv_v_u8m8_tumu(vbool1_t mask, vuint8m8_t maskedoff, vuint8m8_t vs2, size_t vl) {
- return __riscv_vcpopv_tumu(mask, maskedoff, vs2, vl);
+vuint8m8_t test_vcpop_v_u8m8_tumu(vbool1_t mask, vuint8m8_t maskedoff, vuint8m8_t vs2, size_t vl) {
+ return __riscv_vcpop_tumu(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u16mf4_tumu(
+// CHECK-LABEL: @test_vcpop_v_u16mf4_tumu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vcpopv.mask.nxv1i16.i64(<vscale x 1 x i16> [[MASKEDOFF:%.*]], <vscale x 1 x i16> [[VS2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
// CHECK-NEXT: ret <vscale x 1 x i16> [[TMP0]]
//
-vuint16mf4_t test_vcpopv_v_u16mf4_tumu(vbool64_t mask, vuint16mf4_t maskedoff, vuint16mf4_t vs2, size_t vl) {
- return __riscv_vcpopv_tumu(mask, maskedoff, vs2, vl);
+vuint16mf4_t test_vcpop_v_u16mf4_tumu(vbool64_t mask, vuint16mf4_t maskedoff, vuint16mf4_t vs2, size_t vl) {
+ return __riscv_vcpop_tumu(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u16mf2_tumu(
+// CHECK-LABEL: @test_vcpop_v_u16mf2_tumu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vcpopv.mask.nxv2i16.i64(<vscale x 2 x i16> [[MASKEDOFF:%.*]], <vscale x 2 x i16> [[VS2:%.*]], <vscale x 2 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
// CHECK-NEXT: ret <vscale x 2 x i16> [[TMP0]]
//
-vuint16mf2_t test_vcpopv_v_u16mf2_tumu(vbool32_t mask, vuint16mf2_t maskedoff, vuint16mf2_t vs2, size_t vl) {
- return __riscv_vcpopv_tumu(mask, maskedoff, vs2, vl);
+vuint16mf2_t test_vcpop_v_u16mf2_tumu(vbool32_t mask, vuint16mf2_t maskedoff, vuint16mf2_t vs2, size_t vl) {
+ return __riscv_vcpop_tumu(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u16m1_tumu(
+// CHECK-LABEL: @test_vcpop_v_u16m1_tumu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vcpopv.mask.nxv4i16.i64(<vscale x 4 x i16> [[MASKEDOFF:%.*]], <vscale x 4 x i16> [[VS2:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
// CHECK-NEXT: ret <vscale x 4 x i16> [[TMP0]]
//
-vuint16m1_t test_vcpopv_v_u16m1_tumu(vbool16_t mask, vuint16m1_t maskedoff, vuint16m1_t vs2, size_t vl) {
- return __riscv_vcpopv_tumu(mask, maskedoff, vs2, vl);
+vuint16m1_t test_vcpop_v_u16m1_tumu(vbool16_t mask, vuint16m1_t maskedoff, vuint16m1_t vs2, size_t vl) {
+ return __riscv_vcpop_tumu(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u16m2_tumu(
+// CHECK-LABEL: @test_vcpop_v_u16m2_tumu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vcpopv.mask.nxv8i16.i64(<vscale x 8 x i16> [[MASKEDOFF:%.*]], <vscale x 8 x i16> [[VS2:%.*]], <vscale x 8 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]]
//
-vuint16m2_t test_vcpopv_v_u16m2_tumu(vbool8_t mask, vuint16m2_t maskedoff, vuint16m2_t vs2, size_t vl) {
- return __riscv_vcpopv_tumu(mask, maskedoff, vs2, vl);
+vuint16m2_t test_vcpop_v_u16m2_tumu(vbool8_t mask, vuint16m2_t maskedoff, vuint16m2_t vs2, size_t vl) {
+ return __riscv_vcpop_tumu(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u16m4_tumu(
+// CHECK-LABEL: @test_vcpop_v_u16m4_tumu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 16 x i16> @llvm.riscv.vcpopv.mask.nxv16i16.i64(<vscale x 16 x i16> [[MASKEDOFF:%.*]], <vscale x 16 x i16> [[VS2:%.*]], <vscale x 16 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
// CHECK-NEXT: ret <vscale x 16 x i16> [[TMP0]]
//
-vuint16m4_t test_vcpopv_v_u16m4_tumu(vbool4_t mask, vuint16m4_t maskedoff, vuint16m4_t vs2, size_t vl) {
- return __riscv_vcpopv_tumu(mask, maskedoff, vs2, vl);
+vuint16m4_t test_vcpop_v_u16m4_tumu(vbool4_t mask, vuint16m4_t maskedoff, vuint16m4_t vs2, size_t vl) {
+ return __riscv_vcpop_tumu(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u16m8_tumu(
+// CHECK-LABEL: @test_vcpop_v_u16m8_tumu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 32 x i16> @llvm.riscv.vcpopv.mask.nxv32i16.i64(<vscale x 32 x i16> [[MASKEDOFF:%.*]], <vscale x 32 x i16> [[VS2:%.*]], <vscale x 32 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
// CHECK-NEXT: ret <vscale x 32 x i16> [[TMP0]]
//
-vuint16m8_t test_vcpopv_v_u16m8_tumu(vbool2_t mask, vuint16m8_t maskedoff, vuint16m8_t vs2, size_t vl) {
- return __riscv_vcpopv_tumu(mask, maskedoff, vs2, vl);
+vuint16m8_t test_vcpop_v_u16m8_tumu(vbool2_t mask, vuint16m8_t maskedoff, vuint16m8_t vs2, size_t vl) {
+ return __riscv_vcpop_tumu(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u32mf2_tumu(
+// CHECK-LABEL: @test_vcpop_v_u32mf2_tumu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vcpopv.mask.nxv1i32.i64(<vscale x 1 x i32> [[MASKEDOFF:%.*]], <vscale x 1 x i32> [[VS2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
// CHECK-NEXT: ret <vscale x 1 x i32> [[TMP0]]
//
-vuint32mf2_t test_vcpopv_v_u32mf2_tumu(vbool64_t mask, vuint32mf2_t maskedoff, vuint32mf2_t vs2, size_t vl) {
- return __riscv_vcpopv_tumu(mask, maskedoff, vs2, vl);
+vuint32mf2_t test_vcpop_v_u32mf2_tumu(vbool64_t mask, vuint32mf2_t maskedoff, vuint32mf2_t vs2, size_t vl) {
+ return __riscv_vcpop_tumu(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u32m1_tumu(
+// CHECK-LABEL: @test_vcpop_v_u32m1_tumu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vcpopv.mask.nxv2i32.i64(<vscale x 2 x i32> [[MASKEDOFF:%.*]], <vscale x 2 x i32> [[VS2:%.*]], <vscale x 2 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
// CHECK-NEXT: ret <vscale x 2 x i32> [[TMP0]]
//
-vuint32m1_t test_vcpopv_v_u32m1_tumu(vbool32_t mask, vuint32m1_t maskedoff, vuint32m1_t vs2, size_t vl) {
- return __riscv_vcpopv_tumu(mask, maskedoff, vs2, vl);
+vuint32m1_t test_vcpop_v_u32m1_tumu(vbool32_t mask, vuint32m1_t maskedoff, vuint32m1_t vs2, size_t vl) {
+ return __riscv_vcpop_tumu(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u32m2_tumu(
+// CHECK-LABEL: @test_vcpop_v_u32m2_tumu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vcpopv.mask.nxv4i32.i64(<vscale x 4 x i32> [[MASKEDOFF:%.*]], <vscale x 4 x i32> [[VS2:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]]
//
-vuint32m2_t test_vcpopv_v_u32m2_tumu(vbool16_t mask, vuint32m2_t maskedoff, vuint32m2_t vs2, size_t vl) {
- return __riscv_vcpopv_tumu(mask, maskedoff, vs2, vl);
+vuint32m2_t test_vcpop_v_u32m2_tumu(vbool16_t mask, vuint32m2_t maskedoff, vuint32m2_t vs2, size_t vl) {
+ return __riscv_vcpop_tumu(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u32m4_tumu(
+// CHECK-LABEL: @test_vcpop_v_u32m4_tumu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vcpopv.mask.nxv8i32.i64(<vscale x 8 x i32> [[MASKEDOFF:%.*]], <vscale x 8 x i32> [[VS2:%.*]], <vscale x 8 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
// CHECK-NEXT: ret <vscale x 8 x i32> [[TMP0]]
//
-vuint32m4_t test_vcpopv_v_u32m4_tumu(vbool8_t mask, vuint32m4_t maskedoff, vuint32m4_t vs2, size_t vl) {
- return __riscv_vcpopv_tumu(mask, maskedoff, vs2, vl);
+vuint32m4_t test_vcpop_v_u32m4_tumu(vbool8_t mask, vuint32m4_t maskedoff, vuint32m4_t vs2, size_t vl) {
+ return __riscv_vcpop_tumu(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u32m8_tumu(
+// CHECK-LABEL: @test_vcpop_v_u32m8_tumu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vcpopv.mask.nxv16i32.i64(<vscale x 16 x i32> [[MASKEDOFF:%.*]], <vscale x 16 x i32> [[VS2:%.*]], <vscale x 16 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
// CHECK-NEXT: ret <vscale x 16 x i32> [[TMP0]]
//
-vuint32m8_t test_vcpopv_v_u32m8_tumu(vbool4_t mask, vuint32m8_t maskedoff, vuint32m8_t vs2, size_t vl) {
- return __riscv_vcpopv_tumu(mask, maskedoff, vs2, vl);
+vuint32m8_t test_vcpop_v_u32m8_tumu(vbool4_t mask, vuint32m8_t maskedoff, vuint32m8_t vs2, size_t vl) {
+ return __riscv_vcpop_tumu(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u64m1_tumu(
+// CHECK-LABEL: @test_vcpop_v_u64m1_tumu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vcpopv.mask.nxv1i64.i64(<vscale x 1 x i64> [[MASKEDOFF:%.*]], <vscale x 1 x i64> [[VS2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
// CHECK-NEXT: ret <vscale x 1 x i64> [[TMP0]]
//
-vuint64m1_t test_vcpopv_v_u64m1_tumu(vbool64_t mask, vuint64m1_t maskedoff, vuint64m1_t vs2, size_t vl) {
- return __riscv_vcpopv_tumu(mask, maskedoff, vs2, vl);
+vuint64m1_t test_vcpop_v_u64m1_tumu(vbool64_t mask, vuint64m1_t maskedoff, vuint64m1_t vs2, size_t vl) {
+ return __riscv_vcpop_tumu(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u64m2_tumu(
+// CHECK-LABEL: @test_vcpop_v_u64m2_tumu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vcpopv.mask.nxv2i64.i64(<vscale x 2 x i64> [[MASKEDOFF:%.*]], <vscale x 2 x i64> [[VS2:%.*]], <vscale x 2 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP0]]
//
-vuint64m2_t test_vcpopv_v_u64m2_tumu(vbool32_t mask, vuint64m2_t maskedoff, vuint64m2_t vs2, size_t vl) {
- return __riscv_vcpopv_tumu(mask, maskedoff, vs2, vl);
+vuint64m2_t test_vcpop_v_u64m2_tumu(vbool32_t mask, vuint64m2_t maskedoff, vuint64m2_t vs2, size_t vl) {
+ return __riscv_vcpop_tumu(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u64m4_tumu(
+// CHECK-LABEL: @test_vcpop_v_u64m4_tumu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vcpopv.mask.nxv4i64.i64(<vscale x 4 x i64> [[MASKEDOFF:%.*]], <vscale x 4 x i64> [[VS2:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
// CHECK-NEXT: ret <vscale x 4 x i64> [[TMP0]]
//
-vuint64m4_t test_vcpopv_v_u64m4_tumu(vbool16_t mask, vuint64m4_t maskedoff, vuint64m4_t vs2, size_t vl) {
- return __riscv_vcpopv_tumu(mask, maskedoff, vs2, vl);
+vuint64m4_t test_vcpop_v_u64m4_tumu(vbool16_t mask, vuint64m4_t maskedoff, vuint64m4_t vs2, size_t vl) {
+ return __riscv_vcpop_tumu(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u64m8_tumu(
+// CHECK-LABEL: @test_vcpop_v_u64m8_tumu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vcpopv.mask.nxv8i64.i64(<vscale x 8 x i64> [[MASKEDOFF:%.*]], <vscale x 8 x i64> [[VS2:%.*]], <vscale x 8 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
// CHECK-NEXT: ret <vscale x 8 x i64> [[TMP0]]
//
-vuint64m8_t test_vcpopv_v_u64m8_tumu(vbool8_t mask, vuint64m8_t maskedoff, vuint64m8_t vs2, size_t vl) {
- return __riscv_vcpopv_tumu(mask, maskedoff, vs2, vl);
+vuint64m8_t test_vcpop_v_u64m8_tumu(vbool8_t mask, vuint64m8_t maskedoff, vuint64m8_t vs2, size_t vl) {
+ return __riscv_vcpop_tumu(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u8mf8_mu(
+// CHECK-LABEL: @test_vcpop_v_u8mf8_mu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 1 x i8> @llvm.riscv.vcpopv.mask.nxv1i8.i64(<vscale x 1 x i8> [[MASKEDOFF:%.*]], <vscale x 1 x i8> [[VS2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1)
// CHECK-NEXT: ret <vscale x 1 x i8> [[TMP0]]
//
-vuint8mf8_t test_vcpopv_v_u8mf8_mu(vbool64_t mask, vuint8mf8_t maskedoff, vuint8mf8_t vs2, size_t vl) {
- return __riscv_vcpopv_mu(mask, maskedoff, vs2, vl);
+vuint8mf8_t test_vcpop_v_u8mf8_mu(vbool64_t mask, vuint8mf8_t maskedoff, vuint8mf8_t vs2, size_t vl) {
+ return __riscv_vcpop_mu(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u8mf4_mu(
+// CHECK-LABEL: @test_vcpop_v_u8mf4_mu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i8> @llvm.riscv.vcpopv.mask.nxv2i8.i64(<vscale x 2 x i8> [[MASKEDOFF:%.*]], <vscale x 2 x i8> [[VS2:%.*]], <vscale x 2 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1)
// CHECK-NEXT: ret <vscale x 2 x i8> [[TMP0]]
//
-vuint8mf4_t test_vcpopv_v_u8mf4_mu(vbool32_t mask, vuint8mf4_t maskedoff, vuint8mf4_t vs2, size_t vl) {
- return __riscv_vcpopv_mu(mask, maskedoff, vs2, vl);
+vuint8mf4_t test_vcpop_v_u8mf4_mu(vbool32_t mask, vuint8mf4_t maskedoff, vuint8mf4_t vs2, size_t vl) {
+ return __riscv_vcpop_mu(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u8mf2_mu(
+// CHECK-LABEL: @test_vcpop_v_u8mf2_mu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i8> @llvm.riscv.vcpopv.mask.nxv4i8.i64(<vscale x 4 x i8> [[MASKEDOFF:%.*]], <vscale x 4 x i8> [[VS2:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1)
// CHECK-NEXT: ret <vscale x 4 x i8> [[TMP0]]
//
-vuint8mf2_t test_vcpopv_v_u8mf2_mu(vbool16_t mask, vuint8mf2_t maskedoff, vuint8mf2_t vs2, size_t vl) {
- return __riscv_vcpopv_mu(mask, maskedoff, vs2, vl);
+vuint8mf2_t test_vcpop_v_u8mf2_mu(vbool16_t mask, vuint8mf2_t maskedoff, vuint8mf2_t vs2, size_t vl) {
+ return __riscv_vcpop_mu(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u8m1_mu(
+// CHECK-LABEL: @test_vcpop_v_u8m1_mu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 8 x i8> @llvm.riscv.vcpopv.mask.nxv8i8.i64(<vscale x 8 x i8> [[MASKEDOFF:%.*]], <vscale x 8 x i8> [[VS2:%.*]], <vscale x 8 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1)
// CHECK-NEXT: ret <vscale x 8 x i8> [[TMP0]]
//
-vuint8m1_t test_vcpopv_v_u8m1_mu(vbool8_t mask, vuint8m1_t maskedoff, vuint8m1_t vs2, size_t vl) {
- return __riscv_vcpopv_mu(mask, maskedoff, vs2, vl);
+vuint8m1_t test_vcpop_v_u8m1_mu(vbool8_t mask, vuint8m1_t maskedoff, vuint8m1_t vs2, size_t vl) {
+ return __riscv_vcpop_mu(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u8m2_mu(
+// CHECK-LABEL: @test_vcpop_v_u8m2_mu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 16 x i8> @llvm.riscv.vcpopv.mask.nxv16i8.i64(<vscale x 16 x i8> [[MASKEDOFF:%.*]], <vscale x 16 x i8> [[VS2:%.*]], <vscale x 16 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1)
// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
-vuint8m2_t test_vcpopv_v_u8m2_mu(vbool4_t mask, vuint8m2_t maskedoff, vuint8m2_t vs2, size_t vl) {
- return __riscv_vcpopv_mu(mask, maskedoff, vs2, vl);
+vuint8m2_t test_vcpop_v_u8m2_mu(vbool4_t mask, vuint8m2_t maskedoff, vuint8m2_t vs2, size_t vl) {
+ return __riscv_vcpop_mu(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u8m4_mu(
+// CHECK-LABEL: @test_vcpop_v_u8m4_mu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 32 x i8> @llvm.riscv.vcpopv.mask.nxv32i8.i64(<vscale x 32 x i8> [[MASKEDOFF:%.*]], <vscale x 32 x i8> [[VS2:%.*]], <vscale x 32 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1)
// CHECK-NEXT: ret <vscale x 32 x i8> [[TMP0]]
//
-vuint8m4_t test_vcpopv_v_u8m4_mu(vbool2_t mask, vuint8m4_t maskedoff, vuint8m4_t vs2, size_t vl) {
- return __riscv_vcpopv_mu(mask, maskedoff, vs2, vl);
+vuint8m4_t test_vcpop_v_u8m4_mu(vbool2_t mask, vuint8m4_t maskedoff, vuint8m4_t vs2, size_t vl) {
+ return __riscv_vcpop_mu(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u8m8_mu(
+// CHECK-LABEL: @test_vcpop_v_u8m8_mu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 64 x i8> @llvm.riscv.vcpopv.mask.nxv64i8.i64(<vscale x 64 x i8> [[MASKEDOFF:%.*]], <vscale x 64 x i8> [[VS2:%.*]], <vscale x 64 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1)
// CHECK-NEXT: ret <vscale x 64 x i8> [[TMP0]]
//
-vuint8m8_t test_vcpopv_v_u8m8_mu(vbool1_t mask, vuint8m8_t maskedoff, vuint8m8_t vs2, size_t vl) {
- return __riscv_vcpopv_mu(mask, maskedoff, vs2, vl);
+vuint8m8_t test_vcpop_v_u8m8_mu(vbool1_t mask, vuint8m8_t maskedoff, vuint8m8_t vs2, size_t vl) {
+ return __riscv_vcpop_mu(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u16mf4_mu(
+// CHECK-LABEL: @test_vcpop_v_u16mf4_mu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vcpopv.mask.nxv1i16.i64(<vscale x 1 x i16> [[MASKEDOFF:%.*]], <vscale x 1 x i16> [[VS2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1)
// CHECK-NEXT: ret <vscale x 1 x i16> [[TMP0]]
//
-vuint16mf4_t test_vcpopv_v_u16mf4_mu(vbool64_t mask, vuint16mf4_t maskedoff, vuint16mf4_t vs2, size_t vl) {
- return __riscv_vcpopv_mu(mask, maskedoff, vs2, vl);
+vuint16mf4_t test_vcpop_v_u16mf4_mu(vbool64_t mask, vuint16mf4_t maskedoff, vuint16mf4_t vs2, size_t vl) {
+ return __riscv_vcpop_mu(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u16mf2_mu(
+// CHECK-LABEL: @test_vcpop_v_u16mf2_mu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vcpopv.mask.nxv2i16.i64(<vscale x 2 x i16> [[MASKEDOFF:%.*]], <vscale x 2 x i16> [[VS2:%.*]], <vscale x 2 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1)
// CHECK-NEXT: ret <vscale x 2 x i16> [[TMP0]]
//
-vuint16mf2_t test_vcpopv_v_u16mf2_mu(vbool32_t mask, vuint16mf2_t maskedoff, vuint16mf2_t vs2, size_t vl) {
- return __riscv_vcpopv_mu(mask, maskedoff, vs2, vl);
+vuint16mf2_t test_vcpop_v_u16mf2_mu(vbool32_t mask, vuint16mf2_t maskedoff, vuint16mf2_t vs2, size_t vl) {
+ return __riscv_vcpop_mu(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u16m1_mu(
+// CHECK-LABEL: @test_vcpop_v_u16m1_mu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vcpopv.mask.nxv4i16.i64(<vscale x 4 x i16> [[MASKEDOFF:%.*]], <vscale x 4 x i16> [[VS2:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1)
// CHECK-NEXT: ret <vscale x 4 x i16> [[TMP0]]
//
-vuint16m1_t test_vcpopv_v_u16m1_mu(vbool16_t mask, vuint16m1_t maskedoff, vuint16m1_t vs2, size_t vl) {
- return __riscv_vcpopv_mu(mask, maskedoff, vs2, vl);
+vuint16m1_t test_vcpop_v_u16m1_mu(vbool16_t mask, vuint16m1_t maskedoff, vuint16m1_t vs2, size_t vl) {
+ return __riscv_vcpop_mu(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u16m2_mu(
+// CHECK-LABEL: @test_vcpop_v_u16m2_mu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vcpopv.mask.nxv8i16.i64(<vscale x 8 x i16> [[MASKEDOFF:%.*]], <vscale x 8 x i16> [[VS2:%.*]], <vscale x 8 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1)
// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]]
//
-vuint16m2_t test_vcpopv_v_u16m2_mu(vbool8_t mask, vuint16m2_t maskedoff, vuint16m2_t vs2, size_t vl) {
- return __riscv_vcpopv_mu(mask, maskedoff, vs2, vl);
+vuint16m2_t test_vcpop_v_u16m2_mu(vbool8_t mask, vuint16m2_t maskedoff, vuint16m2_t vs2, size_t vl) {
+ return __riscv_vcpop_mu(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u16m4_mu(
+// CHECK-LABEL: @test_vcpop_v_u16m4_mu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 16 x i16> @llvm.riscv.vcpopv.mask.nxv16i16.i64(<vscale x 16 x i16> [[MASKEDOFF:%.*]], <vscale x 16 x i16> [[VS2:%.*]], <vscale x 16 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1)
// CHECK-NEXT: ret <vscale x 16 x i16> [[TMP0]]
//
-vuint16m4_t test_vcpopv_v_u16m4_mu(vbool4_t mask, vuint16m4_t maskedoff, vuint16m4_t vs2, size_t vl) {
- return __riscv_vcpopv_mu(mask, maskedoff, vs2, vl);
+vuint16m4_t test_vcpop_v_u16m4_mu(vbool4_t mask, vuint16m4_t maskedoff, vuint16m4_t vs2, size_t vl) {
+ return __riscv_vcpop_mu(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u16m8_mu(
+// CHECK-LABEL: @test_vcpop_v_u16m8_mu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 32 x i16> @llvm.riscv.vcpopv.mask.nxv32i16.i64(<vscale x 32 x i16> [[MASKEDOFF:%.*]], <vscale x 32 x i16> [[VS2:%.*]], <vscale x 32 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1)
// CHECK-NEXT: ret <vscale x 32 x i16> [[TMP0]]
//
-vuint16m8_t test_vcpopv_v_u16m8_mu(vbool2_t mask, vuint16m8_t maskedoff, vuint16m8_t vs2, size_t vl) {
- return __riscv_vcpopv_mu(mask, maskedoff, vs2, vl);
+vuint16m8_t test_vcpop_v_u16m8_mu(vbool2_t mask, vuint16m8_t maskedoff, vuint16m8_t vs2, size_t vl) {
+ return __riscv_vcpop_mu(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u32mf2_mu(
+// CHECK-LABEL: @test_vcpop_v_u32mf2_mu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vcpopv.mask.nxv1i32.i64(<vscale x 1 x i32> [[MASKEDOFF:%.*]], <vscale x 1 x i32> [[VS2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1)
// CHECK-NEXT: ret <vscale x 1 x i32> [[TMP0]]
//
-vuint32mf2_t test_vcpopv_v_u32mf2_mu(vbool64_t mask, vuint32mf2_t maskedoff, vuint32mf2_t vs2, size_t vl) {
- return __riscv_vcpopv_mu(mask, maskedoff, vs2, vl);
+vuint32mf2_t test_vcpop_v_u32mf2_mu(vbool64_t mask, vuint32mf2_t maskedoff, vuint32mf2_t vs2, size_t vl) {
+ return __riscv_vcpop_mu(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u32m1_mu(
+// CHECK-LABEL: @test_vcpop_v_u32m1_mu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vcpopv.mask.nxv2i32.i64(<vscale x 2 x i32> [[MASKEDOFF:%.*]], <vscale x 2 x i32> [[VS2:%.*]], <vscale x 2 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1)
// CHECK-NEXT: ret <vscale x 2 x i32> [[TMP0]]
//
-vuint32m1_t test_vcpopv_v_u32m1_mu(vbool32_t mask, vuint32m1_t maskedoff, vuint32m1_t vs2, size_t vl) {
- return __riscv_vcpopv_mu(mask, maskedoff, vs2, vl);
+vuint32m1_t test_vcpop_v_u32m1_mu(vbool32_t mask, vuint32m1_t maskedoff, vuint32m1_t vs2, size_t vl) {
+ return __riscv_vcpop_mu(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u32m2_mu(
+// CHECK-LABEL: @test_vcpop_v_u32m2_mu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vcpopv.mask.nxv4i32.i64(<vscale x 4 x i32> [[MASKEDOFF:%.*]], <vscale x 4 x i32> [[VS2:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1)
// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]]
//
-vuint32m2_t test_vcpopv_v_u32m2_mu(vbool16_t mask, vuint32m2_t maskedoff, vuint32m2_t vs2, size_t vl) {
- return __riscv_vcpopv_mu(mask, maskedoff, vs2, vl);
+vuint32m2_t test_vcpop_v_u32m2_mu(vbool16_t mask, vuint32m2_t maskedoff, vuint32m2_t vs2, size_t vl) {
+ return __riscv_vcpop_mu(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u32m4_mu(
+// CHECK-LABEL: @test_vcpop_v_u32m4_mu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vcpopv.mask.nxv8i32.i64(<vscale x 8 x i32> [[MASKEDOFF:%.*]], <vscale x 8 x i32> [[VS2:%.*]], <vscale x 8 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1)
// CHECK-NEXT: ret <vscale x 8 x i32> [[TMP0]]
//
-vuint32m4_t test_vcpopv_v_u32m4_mu(vbool8_t mask, vuint32m4_t maskedoff, vuint32m4_t vs2, size_t vl) {
- return __riscv_vcpopv_mu(mask, maskedoff, vs2, vl);
+vuint32m4_t test_vcpop_v_u32m4_mu(vbool8_t mask, vuint32m4_t maskedoff, vuint32m4_t vs2, size_t vl) {
+ return __riscv_vcpop_mu(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u32m8_mu(
+// CHECK-LABEL: @test_vcpop_v_u32m8_mu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vcpopv.mask.nxv16i32.i64(<vscale x 16 x i32> [[MASKEDOFF:%.*]], <vscale x 16 x i32> [[VS2:%.*]], <vscale x 16 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1)
// CHECK-NEXT: ret <vscale x 16 x i32> [[TMP0]]
//
-vuint32m8_t test_vcpopv_v_u32m8_mu(vbool4_t mask, vuint32m8_t maskedoff, vuint32m8_t vs2, size_t vl) {
- return __riscv_vcpopv_mu(mask, maskedoff, vs2, vl);
+vuint32m8_t test_vcpop_v_u32m8_mu(vbool4_t mask, vuint32m8_t maskedoff, vuint32m8_t vs2, size_t vl) {
+ return __riscv_vcpop_mu(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u64m1_mu(
+// CHECK-LABEL: @test_vcpop_v_u64m1_mu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vcpopv.mask.nxv1i64.i64(<vscale x 1 x i64> [[MASKEDOFF:%.*]], <vscale x 1 x i64> [[VS2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1)
// CHECK-NEXT: ret <vscale x 1 x i64> [[TMP0]]
//
-vuint64m1_t test_vcpopv_v_u64m1_mu(vbool64_t mask, vuint64m1_t maskedoff, vuint64m1_t vs2, size_t vl) {
- return __riscv_vcpopv_mu(mask, maskedoff, vs2, vl);
+vuint64m1_t test_vcpop_v_u64m1_mu(vbool64_t mask, vuint64m1_t maskedoff, vuint64m1_t vs2, size_t vl) {
+ return __riscv_vcpop_mu(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u64m2_mu(
+// CHECK-LABEL: @test_vcpop_v_u64m2_mu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vcpopv.mask.nxv2i64.i64(<vscale x 2 x i64> [[MASKEDOFF:%.*]], <vscale x 2 x i64> [[VS2:%.*]], <vscale x 2 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1)
// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP0]]
//
-vuint64m2_t test_vcpopv_v_u64m2_mu(vbool32_t mask, vuint64m2_t maskedoff, vuint64m2_t vs2, size_t vl) {
- return __riscv_vcpopv_mu(mask, maskedoff, vs2, vl);
+vuint64m2_t test_vcpop_v_u64m2_mu(vbool32_t mask, vuint64m2_t maskedoff, vuint64m2_t vs2, size_t vl) {
+ return __riscv_vcpop_mu(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u64m4_mu(
+// CHECK-LABEL: @test_vcpop_v_u64m4_mu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vcpopv.mask.nxv4i64.i64(<vscale x 4 x i64> [[MASKEDOFF:%.*]], <vscale x 4 x i64> [[VS2:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1)
// CHECK-NEXT: ret <vscale x 4 x i64> [[TMP0]]
//
-vuint64m4_t test_vcpopv_v_u64m4_mu(vbool16_t mask, vuint64m4_t maskedoff, vuint64m4_t vs2, size_t vl) {
- return __riscv_vcpopv_mu(mask, maskedoff, vs2, vl);
+vuint64m4_t test_vcpop_v_u64m4_mu(vbool16_t mask, vuint64m4_t maskedoff, vuint64m4_t vs2, size_t vl) {
+ return __riscv_vcpop_mu(mask, maskedoff, vs2, vl);
}
-// CHECK-LABEL: @test_vcpopv_v_u64m8_mu(
+// CHECK-LABEL: @test_vcpop_v_u64m8_mu(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vcpopv.mask.nxv8i64.i64(<vscale x 8 x i64> [[MASKEDOFF:%.*]], <vscale x 8 x i64> [[VS2:%.*]], <vscale x 8 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1)
// CHECK-NEXT: ret <vscale x 8 x i64> [[TMP0]]
//
-vuint64m8_t test_vcpopv_v_u64m8_mu(vbool8_t mask, vuint64m8_t maskedoff, vuint64m8_t vs2, size_t vl) {
- return __riscv_vcpopv_mu(mask, maskedoff, vs2, vl);
+vuint64m8_t test_vcpop_v_u64m8_mu(vbool8_t mask, vuint64m8_t maskedoff, vuint64m8_t vs2, size_t vl) {
+ return __riscv_vcpop_mu(mask, maskedoff, vs2, vl);
}
diff --git a/clang/test/CodeGen/voidptr-vaarg.c b/clang/test/CodeGen/voidptr-vaarg.c
new file mode 100644
index 000000000000..d023ddf0fb5d
--- /dev/null
+++ b/clang/test/CodeGen/voidptr-vaarg.c
@@ -0,0 +1,478 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// REQUIRES: webassembly-registered-target
+// RUN: %clang_cc1 -triple wasm32-unknown-unknown -emit-llvm -o - %s | FileCheck %s
+
+// Multiple targets use emitVoidPtrVAArg to lower va_arg instructions in clang
+// PPC is complicated, excluding from this case analysis
+// ForceRightAdjust is false for all non-PPC targets
+// AllowHigherAlign is only false for two Microsoft targets, both of which
+// pass most things by reference.
+//
+// Address emitVoidPtrVAArg(CodeGenFunction &CGF, Address VAListAddr,
+// QualType ValueTy, bool IsIndirect,
+// TypeInfoChars ValueInfo, CharUnits SlotSizeAndAlign,
+// bool AllowHigherAlign, bool ForceRightAdjust =
+// false);
+//
+// Target IsIndirect SlotSize AllowHigher ForceRightAdjust
+// ARC false four true false
+// ARM varies four true false
+// Mips false 4 or 8 true false
+// RISCV varies register true false
+// PPC elided
+// LoongArch varies register true false
+// NVPTX WIP
+// AMDGPU WIP
+// X86_32 false four true false
+// X86_64 MS varies eight false false
+// CSKY false four true false
+// Webassembly varies four true false
+// AArch64 false eight true false
+// AArch64 MS false eight false false
+//
+// Webassembly passes indirectly iff it's an aggregate of multiple values
+// Choosing this as a representative architecture to check IR generation
+// partly because it has a relatively simple variadic calling convention.
+
+// Int, by itself and packed in structs
+// CHECK-LABEL: @raw_int(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[LIST_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT: store ptr [[LIST:%.*]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 4
+// CHECK-NEXT: store ptr [[ARGP_NEXT]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARGP_CUR]], align 4
+// CHECK-NEXT: ret i32 [[TMP0]]
+//
+int raw_int(__builtin_va_list list) { return __builtin_va_arg(list, int); }
+
+typedef struct {
+ int x;
+} one_int_t;
+
+// CHECK-LABEL: @one_int(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_ONE_INT_T:%.*]], align 4
+// CHECK-NEXT: [[LIST_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT: store ptr [[LIST:%.*]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 4
+// CHECK-NEXT: store ptr [[ARGP_NEXT]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[RETVAL]], ptr align 4 [[ARGP_CUR]], i32 4, i1 false)
+// CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_ONE_INT_T]], ptr [[RETVAL]], i32 0, i32 0
+// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[COERCE_DIVE]], align 4
+// CHECK-NEXT: ret i32 [[TMP0]]
+//
+one_int_t one_int(__builtin_va_list list) {
+ return __builtin_va_arg(list, one_int_t);
+}
+
+typedef struct {
+ int x;
+ int y;
+} two_int_t;
+
+// CHECK-LABEL: @two_int(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[LIST_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT: store ptr [[LIST:%.*]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 4
+// CHECK-NEXT: store ptr [[ARGP_NEXT]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGP_CUR]], align 4
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[AGG_RESULT:%.*]], ptr align 4 [[TMP0]], i32 8, i1 false)
+// CHECK-NEXT: ret void
+//
+two_int_t two_int(__builtin_va_list list) {
+ return __builtin_va_arg(list, two_int_t);
+}
+
+// Double, by itself and packed in structs
+// CHECK-LABEL: @raw_double(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[LIST_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT: store ptr [[LIST:%.*]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 7
+// CHECK-NEXT: [[ARGP_CUR_ALIGNED:%.*]] = call ptr @llvm.ptrmask.p0.i32(ptr [[TMP0]], i32 -8)
+// CHECK-NEXT: [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR_ALIGNED]], i32 8
+// CHECK-NEXT: store ptr [[ARGP_NEXT]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load double, ptr [[ARGP_CUR_ALIGNED]], align 8
+// CHECK-NEXT: ret double [[TMP1]]
+//
+double raw_double(__builtin_va_list list) {
+ return __builtin_va_arg(list, double);
+}
+
+typedef struct {
+ double x;
+} one_double_t;
+
+// CHECK-LABEL: @one_double(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_ONE_DOUBLE_T:%.*]], align 8
+// CHECK-NEXT: [[LIST_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT: store ptr [[LIST:%.*]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 7
+// CHECK-NEXT: [[ARGP_CUR_ALIGNED:%.*]] = call ptr @llvm.ptrmask.p0.i32(ptr [[TMP0]], i32 -8)
+// CHECK-NEXT: [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR_ALIGNED]], i32 8
+// CHECK-NEXT: store ptr [[ARGP_NEXT]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[RETVAL]], ptr align 8 [[ARGP_CUR_ALIGNED]], i32 8, i1 false)
+// CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_ONE_DOUBLE_T]], ptr [[RETVAL]], i32 0, i32 0
+// CHECK-NEXT: [[TMP1:%.*]] = load double, ptr [[COERCE_DIVE]], align 8
+// CHECK-NEXT: ret double [[TMP1]]
+//
+one_double_t one_double(__builtin_va_list list) {
+ return __builtin_va_arg(list, one_double_t);
+}
+
+typedef struct {
+ double x;
+ double y;
+} two_double_t;
+
+// CHECK-LABEL: @two_double(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[LIST_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT: store ptr [[LIST:%.*]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 4
+// CHECK-NEXT: store ptr [[ARGP_NEXT]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGP_CUR]], align 4
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[AGG_RESULT:%.*]], ptr align 8 [[TMP0]], i32 16, i1 false)
+// CHECK-NEXT: ret void
+//
+two_double_t two_double(__builtin_va_list list) {
+ return __builtin_va_arg(list, two_double_t);
+}
+
+// Scalar smaller than the slot size (C would promote a short to int)
+typedef struct {
+ char x;
+} one_char_t;
+
+// CHECK-LABEL: @one_char(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_ONE_CHAR_T:%.*]], align 1
+// CHECK-NEXT: [[LIST_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT: store ptr [[LIST:%.*]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 4
+// CHECK-NEXT: store ptr [[ARGP_NEXT]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[RETVAL]], ptr align 4 [[ARGP_CUR]], i32 1, i1 false)
+// CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_ONE_CHAR_T]], ptr [[RETVAL]], i32 0, i32 0
+// CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[COERCE_DIVE]], align 1
+// CHECK-NEXT: ret i8 [[TMP0]]
+//
+one_char_t one_char(__builtin_va_list list) {
+ return __builtin_va_arg(list, one_char_t);
+}
+
+typedef struct {
+ short x;
+} one_short_t;
+
+// CHECK-LABEL: @one_short(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_ONE_SHORT_T:%.*]], align 2
+// CHECK-NEXT: [[LIST_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT: store ptr [[LIST:%.*]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 4
+// CHECK-NEXT: store ptr [[ARGP_NEXT]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 2 [[RETVAL]], ptr align 4 [[ARGP_CUR]], i32 2, i1 false)
+// CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_ONE_SHORT_T]], ptr [[RETVAL]], i32 0, i32 0
+// CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr [[COERCE_DIVE]], align 2
+// CHECK-NEXT: ret i16 [[TMP0]]
+//
+one_short_t one_short(__builtin_va_list list) {
+ return __builtin_va_arg(list, one_short_t);
+}
+
+// Composite smaller than the slot size
+typedef struct {
+ _Alignas(2) char x;
+ char y;
+} char_pair_t;
+
+// CHECK-LABEL: @char_pair(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[LIST_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT: store ptr [[LIST:%.*]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 4
+// CHECK-NEXT: store ptr [[ARGP_NEXT]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGP_CUR]], align 4
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 2 [[AGG_RESULT:%.*]], ptr align 2 [[TMP0]], i32 2, i1 false)
+// CHECK-NEXT: ret void
+//
+char_pair_t char_pair(__builtin_va_list list) {
+ return __builtin_va_arg(list, char_pair_t);
+}
+
+// Empty struct
+typedef struct {
+} empty_t;
+
+// CHECK-LABEL: @empty(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_EMPTY_T:%.*]], align 1
+// CHECK-NEXT: [[LIST_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT: store ptr [[LIST:%.*]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 0
+// CHECK-NEXT: store ptr [[ARGP_NEXT]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[RETVAL]], ptr align 4 [[ARGP_CUR]], i32 0, i1 false)
+// CHECK-NEXT: ret void
+//
+empty_t empty(__builtin_va_list list) {
+ return __builtin_va_arg(list, empty_t);
+}
+
+typedef struct {
+ empty_t x;
+ int y;
+} empty_int_t;
+
+// CHECK-LABEL: @empty_int(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_EMPTY_INT_T:%.*]], align 4
+// CHECK-NEXT: [[LIST_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT: store ptr [[LIST:%.*]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 4
+// CHECK-NEXT: store ptr [[ARGP_NEXT]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[RETVAL]], ptr align 4 [[ARGP_CUR]], i32 4, i1 false)
+// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[RETVAL]], align 4
+// CHECK-NEXT: ret i32 [[TMP0]]
+//
+empty_int_t empty_int(__builtin_va_list list) {
+ return __builtin_va_arg(list, empty_int_t);
+}
+
+typedef struct {
+ int x;
+ empty_t y;
+} int_empty_t;
+
+// CHECK-LABEL: @int_empty(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_INT_EMPTY_T:%.*]], align 4
+// CHECK-NEXT: [[LIST_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT: store ptr [[LIST:%.*]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 4
+// CHECK-NEXT: store ptr [[ARGP_NEXT]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[RETVAL]], ptr align 4 [[ARGP_CUR]], i32 4, i1 false)
+// CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_INT_EMPTY_T]], ptr [[RETVAL]], i32 0, i32 0
+// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[COERCE_DIVE]], align 4
+// CHECK-NEXT: ret i32 [[TMP0]]
+//
+int_empty_t int_empty(__builtin_va_list list) {
+ return __builtin_va_arg(list, int_empty_t);
+}
+
+// Need multiple va_arg instructions to check the postincrement
+// Using types that are passed directly as the indirect handling
+// is independent of the alignment handling in emitVoidPtrDirectVAArg.
+
+// CHECK-LABEL: @multiple_int(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[LIST_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT: [[OUT0_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT: [[OUT1_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT: [[OUT2_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT: store ptr [[LIST:%.*]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: store ptr [[OUT0:%.*]], ptr [[OUT0_ADDR]], align 4
+// CHECK-NEXT: store ptr [[OUT1:%.*]], ptr [[OUT1_ADDR]], align 4
+// CHECK-NEXT: store ptr [[OUT2:%.*]], ptr [[OUT2_ADDR]], align 4
+// CHECK-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 4
+// CHECK-NEXT: store ptr [[ARGP_NEXT]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARGP_CUR]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[OUT0_ADDR]], align 4
+// CHECK-NEXT: store i32 [[TMP0]], ptr [[TMP1]], align 4
+// CHECK-NEXT: [[ARGP_CUR1:%.*]] = load ptr, ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[ARGP_NEXT2:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR1]], i32 4
+// CHECK-NEXT: store ptr [[ARGP_NEXT2]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARGP_CUR1]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[OUT1_ADDR]], align 4
+// CHECK-NEXT: store i32 [[TMP2]], ptr [[TMP3]], align 4
+// CHECK-NEXT: [[ARGP_CUR3:%.*]] = load ptr, ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[ARGP_NEXT4:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR3]], i32 4
+// CHECK-NEXT: store ptr [[ARGP_NEXT4]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[ARGP_CUR3]], align 4
+// CHECK-NEXT: [[TMP5:%.*]] = load ptr, ptr [[OUT2_ADDR]], align 4
+// CHECK-NEXT: store i32 [[TMP4]], ptr [[TMP5]], align 4
+// CHECK-NEXT: ret void
+//
+void multiple_int(__builtin_va_list list, int *out0, int *out1, int *out2) {
+ *out0 = __builtin_va_arg(list, int);
+ *out1 = __builtin_va_arg(list, int);
+ *out2 = __builtin_va_arg(list, int);
+}
+
+// Scalars in structs are an easy way of specifying alignment from C
+// CHECK-LABEL: @increasing_alignment(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[LIST_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT: [[OUT0_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT: [[OUT1_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT: [[OUT2_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT: [[OUT3_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT: store ptr [[LIST:%.*]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: store ptr [[OUT0:%.*]], ptr [[OUT0_ADDR]], align 4
+// CHECK-NEXT: store ptr [[OUT1:%.*]], ptr [[OUT1_ADDR]], align 4
+// CHECK-NEXT: store ptr [[OUT2:%.*]], ptr [[OUT2_ADDR]], align 4
+// CHECK-NEXT: store ptr [[OUT3:%.*]], ptr [[OUT3_ADDR]], align 4
+// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[OUT0_ADDR]], align 4
+// CHECK-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 4
+// CHECK-NEXT: store ptr [[ARGP_NEXT]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[TMP0]], ptr align 4 [[ARGP_CUR]], i32 1, i1 false)
+// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[OUT1_ADDR]], align 4
+// CHECK-NEXT: [[ARGP_CUR1:%.*]] = load ptr, ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[ARGP_NEXT2:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR1]], i32 4
+// CHECK-NEXT: store ptr [[ARGP_NEXT2]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 2 [[TMP1]], ptr align 4 [[ARGP_CUR1]], i32 2, i1 false)
+// CHECK-NEXT: [[ARGP_CUR3:%.*]] = load ptr, ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[ARGP_NEXT4:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR3]], i32 4
+// CHECK-NEXT: store ptr [[ARGP_NEXT4]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARGP_CUR3]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[OUT2_ADDR]], align 4
+// CHECK-NEXT: store i32 [[TMP2]], ptr [[TMP3]], align 4
+// CHECK-NEXT: [[ARGP_CUR5:%.*]] = load ptr, ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR5]], i32 7
+// CHECK-NEXT: [[ARGP_CUR5_ALIGNED:%.*]] = call ptr @llvm.ptrmask.p0.i32(ptr [[TMP4]], i32 -8)
+// CHECK-NEXT: [[ARGP_NEXT6:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR5_ALIGNED]], i32 8
+// CHECK-NEXT: store ptr [[ARGP_NEXT6]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[TMP5:%.*]] = load double, ptr [[ARGP_CUR5_ALIGNED]], align 8
+// CHECK-NEXT: [[TMP6:%.*]] = load ptr, ptr [[OUT3_ADDR]], align 4
+// CHECK-NEXT: store double [[TMP5]], ptr [[TMP6]], align 8
+// CHECK-NEXT: ret void
+//
+void increasing_alignment(__builtin_va_list list, one_char_t *out0,
+ one_short_t *out1, int *out2, double *out3) {
+ *out0 = __builtin_va_arg(list, one_char_t);
+ *out1 = __builtin_va_arg(list, one_short_t);
+ *out2 = __builtin_va_arg(list, int);
+ *out3 = __builtin_va_arg(list, double);
+}
+
+// CHECK-LABEL: @decreasing_alignment(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[LIST_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT: [[OUT0_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT: [[OUT1_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT: [[OUT2_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT: [[OUT3_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT: store ptr [[LIST:%.*]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: store ptr [[OUT0:%.*]], ptr [[OUT0_ADDR]], align 4
+// CHECK-NEXT: store ptr [[OUT1:%.*]], ptr [[OUT1_ADDR]], align 4
+// CHECK-NEXT: store ptr [[OUT2:%.*]], ptr [[OUT2_ADDR]], align 4
+// CHECK-NEXT: store ptr [[OUT3:%.*]], ptr [[OUT3_ADDR]], align 4
+// CHECK-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 7
+// CHECK-NEXT: [[ARGP_CUR_ALIGNED:%.*]] = call ptr @llvm.ptrmask.p0.i32(ptr [[TMP0]], i32 -8)
+// CHECK-NEXT: [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR_ALIGNED]], i32 8
+// CHECK-NEXT: store ptr [[ARGP_NEXT]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load double, ptr [[ARGP_CUR_ALIGNED]], align 8
+// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[OUT0_ADDR]], align 4
+// CHECK-NEXT: store double [[TMP1]], ptr [[TMP2]], align 8
+// CHECK-NEXT: [[ARGP_CUR1:%.*]] = load ptr, ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[ARGP_NEXT2:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR1]], i32 4
+// CHECK-NEXT: store ptr [[ARGP_NEXT2]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARGP_CUR1]], align 4
+// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[OUT1_ADDR]], align 4
+// CHECK-NEXT: store i32 [[TMP3]], ptr [[TMP4]], align 4
+// CHECK-NEXT: [[TMP5:%.*]] = load ptr, ptr [[OUT2_ADDR]], align 4
+// CHECK-NEXT: [[ARGP_CUR3:%.*]] = load ptr, ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[ARGP_NEXT4:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR3]], i32 4
+// CHECK-NEXT: store ptr [[ARGP_NEXT4]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 2 [[TMP5]], ptr align 4 [[ARGP_CUR3]], i32 2, i1 false)
+// CHECK-NEXT: [[TMP6:%.*]] = load ptr, ptr [[OUT3_ADDR]], align 4
+// CHECK-NEXT: [[ARGP_CUR5:%.*]] = load ptr, ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[ARGP_NEXT6:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR5]], i32 4
+// CHECK-NEXT: store ptr [[ARGP_NEXT6]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[TMP6]], ptr align 4 [[ARGP_CUR5]], i32 1, i1 false)
+// CHECK-NEXT: ret void
+//
+void decreasing_alignment(__builtin_va_list list, double *out0, int *out1,
+ one_short_t *out2, one_char_t *out3) {
+ *out0 = __builtin_va_arg(list, double);
+ *out1 = __builtin_va_arg(list, int);
+ *out2 = __builtin_va_arg(list, one_short_t);
+ *out3 = __builtin_va_arg(list, one_char_t);
+}
+
+// Typical edge cases, none hit special handling in VAArg lowering.
+typedef struct {
+ int x[16];
+ double y[8];
+} large_value_t;
+
+// CHECK-LABEL: @large_value(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[LIST_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT: store ptr [[LIST:%.*]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: store ptr [[OUT:%.*]], ptr [[OUT_ADDR]], align 4
+// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[OUT_ADDR]], align 4
+// CHECK-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 4
+// CHECK-NEXT: store ptr [[ARGP_NEXT]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[ARGP_CUR]], align 4
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[TMP0]], ptr align 8 [[TMP1]], i32 128, i1 false)
+// CHECK-NEXT: ret void
+//
+void large_value(__builtin_va_list list, large_value_t *out) {
+ *out = __builtin_va_arg(list, large_value_t);
+}
+
+typedef int v128_t __attribute__((__vector_size__(16), __aligned__(16)));
+// CHECK-LABEL: @vector(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[LIST_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT: store ptr [[LIST:%.*]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: store ptr [[OUT:%.*]], ptr [[OUT_ADDR]], align 4
+// CHECK-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 15
+// CHECK-NEXT: [[ARGP_CUR_ALIGNED:%.*]] = call ptr @llvm.ptrmask.p0.i32(ptr [[TMP0]], i32 -16)
+// CHECK-NEXT: [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR_ALIGNED]], i32 16
+// CHECK-NEXT: store ptr [[ARGP_NEXT]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[ARGP_CUR_ALIGNED]], align 16
+// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[OUT_ADDR]], align 4
+// CHECK-NEXT: store <4 x i32> [[TMP1]], ptr [[TMP2]], align 16
+// CHECK-NEXT: ret void
+//
+void vector(__builtin_va_list list, v128_t *out) {
+ *out = __builtin_va_arg(list, v128_t);
+}
+
+typedef struct BF {
+ float not_an_i32[2];
+ int A : 1;
+ char B;
+ int C : 13;
+} BF;
+
+// CHECK-LABEL: @bitfield(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[LIST_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT: store ptr [[LIST:%.*]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: store ptr [[OUT:%.*]], ptr [[OUT_ADDR]], align 4
+// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[OUT_ADDR]], align 4
+// CHECK-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 4
+// CHECK-NEXT: store ptr [[ARGP_NEXT]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[ARGP_CUR]], align 4
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP0]], ptr align 4 [[TMP1]], i32 12, i1 false)
+// CHECK-NEXT: ret void
+//
+void bitfield(__builtin_va_list list, BF *out) {
+ *out = __builtin_va_arg(list, BF);
+}
diff --git a/clang/test/CodeGenCUDA/cuda-builtin-vars.cu b/clang/test/CodeGenCUDA/cuda-builtin-vars.cu
index ba5e5f13ebe7..7880a8036f8c 100644
--- a/clang/test/CodeGenCUDA/cuda-builtin-vars.cu
+++ b/clang/test/CodeGenCUDA/cuda-builtin-vars.cu
@@ -6,21 +6,21 @@
__attribute__((global))
void kernel(int *out) {
int i = 0;
- out[i++] = threadIdx.x; // CHECK: call noundef i32 @llvm.nvvm.read.ptx.sreg.tid.x()
- out[i++] = threadIdx.y; // CHECK: call noundef i32 @llvm.nvvm.read.ptx.sreg.tid.y()
- out[i++] = threadIdx.z; // CHECK: call noundef i32 @llvm.nvvm.read.ptx.sreg.tid.z()
+ out[i++] = threadIdx.x; // CHECK: call noundef{{.*}} i32 @llvm.nvvm.read.ptx.sreg.tid.x()
+ out[i++] = threadIdx.y; // CHECK: call noundef{{.*}} i32 @llvm.nvvm.read.ptx.sreg.tid.y()
+ out[i++] = threadIdx.z; // CHECK: call noundef{{.*}} i32 @llvm.nvvm.read.ptx.sreg.tid.z()
- out[i++] = blockIdx.x; // CHECK: call noundef i32 @llvm.nvvm.read.ptx.sreg.ctaid.x()
- out[i++] = blockIdx.y; // CHECK: call noundef i32 @llvm.nvvm.read.ptx.sreg.ctaid.y()
- out[i++] = blockIdx.z; // CHECK: call noundef i32 @llvm.nvvm.read.ptx.sreg.ctaid.z()
+ out[i++] = blockIdx.x; // CHECK: call noundef{{.*}} i32 @llvm.nvvm.read.ptx.sreg.ctaid.x()
+ out[i++] = blockIdx.y; // CHECK: call noundef{{.*}} i32 @llvm.nvvm.read.ptx.sreg.ctaid.y()
+ out[i++] = blockIdx.z; // CHECK: call noundef{{.*}} i32 @llvm.nvvm.read.ptx.sreg.ctaid.z()
- out[i++] = blockDim.x; // CHECK: call noundef i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
- out[i++] = blockDim.y; // CHECK: call noundef i32 @llvm.nvvm.read.ptx.sreg.ntid.y()
- out[i++] = blockDim.z; // CHECK: call noundef i32 @llvm.nvvm.read.ptx.sreg.ntid.z()
+ out[i++] = blockDim.x; // CHECK: call noundef{{.*}} i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
+ out[i++] = blockDim.y; // CHECK: call noundef{{.*}} i32 @llvm.nvvm.read.ptx.sreg.ntid.y()
+ out[i++] = blockDim.z; // CHECK: call noundef{{.*}} i32 @llvm.nvvm.read.ptx.sreg.ntid.z()
- out[i++] = gridDim.x; // CHECK: call noundef i32 @llvm.nvvm.read.ptx.sreg.nctaid.x()
- out[i++] = gridDim.y; // CHECK: call noundef i32 @llvm.nvvm.read.ptx.sreg.nctaid.y()
- out[i++] = gridDim.z; // CHECK: call noundef i32 @llvm.nvvm.read.ptx.sreg.nctaid.z()
+ out[i++] = gridDim.x; // CHECK: call noundef{{.*}} i32 @llvm.nvvm.read.ptx.sreg.nctaid.x()
+ out[i++] = gridDim.y; // CHECK: call noundef{{.*}} i32 @llvm.nvvm.read.ptx.sreg.nctaid.y()
+ out[i++] = gridDim.z; // CHECK: call noundef{{.*}} i32 @llvm.nvvm.read.ptx.sreg.nctaid.z()
out[i++] = warpSize; // CHECK: store i32 32,
diff --git a/clang/test/CodeGenCXX/inline-then-fold-variadics.cpp b/clang/test/CodeGenCXX/inline-then-fold-variadics.cpp
new file mode 100644
index 000000000000..a0673b96626d
--- /dev/null
+++ b/clang/test/CodeGenCXX/inline-then-fold-variadics.cpp
@@ -0,0 +1,181 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature
+// REQUIRES: webassembly-registered-target
+
+// Simple calls to known variadic functions that are completely elided when
+// optimisations are on This is a functional check that the expand-variadic pass
+// is consistent with clang's va_arg handling
+
+// When expand-variadics is added to the default pipeline, clang -O1 will
+// suffice here -Wno-varargs avoids warning second argument to 'va_start' is not
+// the last named parameter
+
+// RUN: %clang_cc1 %s -triple wasm32-unknown-unknown -Wno-varargs -O1 -emit-llvm -o - | opt - -S --passes='module(expand-variadics,default<O1>)' --expand-variadics-override=optimize -o - | FileCheck %s
+
+#include <stdarg.h>
+#include <stdint.h>
+
+template <typename X, typename Y> static X first(...) {
+ va_list va;
+ __builtin_va_start(va, 0);
+ X r = va_arg(va, X);
+ va_end(va);
+ return r;
+}
+
+template <typename X, typename Y> static Y second(...) {
+ va_list va;
+ __builtin_va_start(va, 0);
+ va_arg(va, X);
+ Y r = va_arg(va, Y);
+ va_end(va);
+ return r;
+}
+
+extern "C" {
+
+// CHECK-LABEL: define {{[^@]+}}@first_pair_i32
+// CHECK-SAME: (i32 noundef returned [[X:%.*]], i32 noundef [[Y:%.*]])
+// CHECK-NEXT: entry:
+// CHECK-NEXT: ret i32 [[X]]
+//
+int first_pair_i32(int x, int y) { return first<int, int>(x, y); }
+
+// CHECK-LABEL: define {{[^@]+}}@second_pair_i32
+// CHECK-SAME: (i32 noundef [[X:%.*]], i32 noundef returned [[Y:%.*]])
+// CHECK-NEXT: entry:
+// CHECK-NEXT: ret i32 [[Y]]
+//
+int second_pair_i32(int x, int y) { return second<int, int>(x, y); }
+
+// CHECK-LABEL: define {{[^@]+}}@first_pair_f64
+// CHECK-SAME: (double noundef returned [[X:%.*]], double noundef [[Y:%.*]])
+// CHECK-NEXT: entry:
+// CHECK-NEXT: ret double [[X]]
+//
+double first_pair_f64(double x, double y) {
+ return first<double, double>(x, y);
+}
+
+// CHECK-LABEL: define {{[^@]+}}@second_pair_f64
+// CHECK-SAME: (double noundef [[X:%.*]], double noundef returned [[Y:%.*]])
+// CHECK-NEXT: entry:
+// CHECK-NEXT: ret double [[Y]]
+//
+double second_pair_f64(double x, double y) {
+ return second<double, double>(x, y);
+}
+}
+
+extern "C" {
+
+// CHECK-LABEL: define {{[^@]+}}@first_i32_f64
+// CHECK-SAME: (i32 noundef returned [[X:%.*]], double noundef [[Y:%.*]])
+// CHECK-NEXT: entry:
+// CHECK-NEXT: ret i32 [[X]]
+//
+int first_i32_f64(int x, double y) { return first<int, double>(x, y); }
+
+// CHECK-LABEL: define {{[^@]+}}@second_i32_f64
+// CHECK-SAME: (i32 noundef [[X:%.*]], double noundef returned [[Y:%.*]])
+// CHECK-NEXT: entry:
+// CHECK-NEXT: ret double [[Y]]
+//
+double second_i32_f64(int x, double y) { return second<int, double>(x, y); }
+
+// CHECK-LABEL: define {{[^@]+}}@first_f64_i32
+// CHECK-SAME: (double noundef returned [[X:%.*]], i32 noundef [[Y:%.*]])
+// CHECK-NEXT: entry:
+// CHECK-NEXT: ret double [[X]]
+//
+double first_f64_i32(double x, int y) { return first<double, int>(x, y); }
+
+// CHECK-LABEL: define {{[^@]+}}@second_f64_i32
+// CHECK-SAME: (double noundef [[X:%.*]], i32 noundef returned [[Y:%.*]])
+// CHECK-NEXT: entry:
+// CHECK-NEXT: ret i32 [[Y]]
+//
+int second_f64_i32(double x, int y) { return second<double, int>(x, y); }
+}
+
+extern "C" {
+typedef uint64_t ulong2 __attribute__((__vector_size__(16), __aligned__(16)));
+
+// CHECK-LABEL: define {{[^@]+}}@first_i32_ulong2
+// CHECK-SAME: (i32 noundef returned [[X:%.*]], ptr nocapture noundef readonly [[Y:%.*]])
+// CHECK-NEXT: entry:
+// CHECK-NEXT: ret i32 [[X]]
+//
+int first_i32_ulong2(int x, ulong2 *y) { return first<int, ulong2>(x, *y); }
+
+// CHECK-LABEL: define {{[^@]+}}@second_i32_ulong2
+// CHECK-SAME: (i32 noundef [[X:%.*]], ptr nocapture noundef readonly [[Y:%.*]], ptr nocapture noundef writeonly [[R:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = load <2 x i64>, ptr [[Y]], align 16, !tbaa [[TBAA2:![0-9]+]]
+// CHECK-NEXT: store <2 x i64> [[TMP0]], ptr [[R]], align 16, !tbaa [[TBAA2]]
+// CHECK-NEXT: ret void
+//
+void second_i32_ulong2(int x, ulong2 *y, ulong2 *r) {
+ *r = second<int, ulong2>(x, *y);
+}
+
+// CHECK-LABEL: define {{[^@]+}}@first_ulong2_i32
+// CHECK-SAME: (ptr nocapture noundef readonly [[X:%.*]], i32 noundef [[Y:%.*]], ptr nocapture noundef writeonly [[R:%.*]]) local_unnamed_addr #[[ATTR1]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = load <2 x i64>, ptr [[X]], align 16, !tbaa [[TBAA2]]
+// CHECK-NEXT: store <2 x i64> [[TMP0]], ptr [[R]], align 16, !tbaa [[TBAA2]]
+// CHECK-NEXT: ret void
+//
+void first_ulong2_i32(ulong2 *x, int y, ulong2 *r) {
+ *r = first<ulong2, int>(*x, y);
+}
+
+// CHECK-LABEL: define {{[^@]+}}@second_ulong2_i32
+// CHECK-SAME: (ptr nocapture noundef readonly [[X:%.*]], i32 noundef returned [[Y:%.*]])
+// CHECK-NEXT: entry:
+// CHECK-NEXT: ret i32 [[Y]]
+//
+int second_ulong2_i32(ulong2 *x, int y) { return second<ulong2, int>(*x, y); }
+}
+
+// ascending alignment
+typedef struct {
+ char c;
+ short s;
+ int i;
+ long l;
+ float f;
+ double d;
+} asc;
+
+extern "C" {
+
+// CHECK-LABEL: define {{[^@]+}}@first_i32_asc
+// CHECK-SAME: (i32 noundef returned [[X:%.*]], ptr nocapture noundef readonly [[Y:%.*]])
+// CHECK-NEXT: entry:
+// CHECK-NEXT: ret i32 [[X]]
+//
+int first_i32_asc(int x, asc *y) { return first<int, asc>(x, *y); }
+
+// CHECK-LABEL: define {{[^@]+}}@second_i32_asc
+// CHECK-SAME: (i32 noundef [[X:%.*]], ptr nocapture noundef readonly [[Y:%.*]], ptr nocapture noundef writeonly [[R:%.*]]) local_unnamed_addr #[[ATTR1]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: tail call void @llvm.memmove.p0.p0.i32(ptr noundef nonnull align 8 dereferenceable(24) [[R]], ptr noundef nonnull align 1 dereferenceable(24) [[Y]], i32 24, i1 false)
+// CHECK-NEXT: ret void
+//
+void second_i32_asc(int x, asc *y, asc *r) { *r = second<int, asc>(x, *y); }
+
+// CHECK-LABEL: define {{[^@]+}}@first_asc_i32
+// CHECK-SAME: (ptr nocapture noundef readonly [[X:%.*]], i32 noundef [[Y:%.*]], ptr nocapture noundef writeonly [[R:%.*]]) local_unnamed_addr #[[ATTR1]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: tail call void @llvm.memmove.p0.p0.i32(ptr noundef nonnull align 8 dereferenceable(24) [[R]], ptr noundef nonnull align 1 dereferenceable(24) [[X]], i32 24, i1 false)
+// CHECK-NEXT: ret void
+//
+void first_asc_i32(asc *x, int y, asc *r) { *r = first<asc, int>(*x, y); }
+
+// CHECK-LABEL: define {{[^@]+}}@second_asc_i32
+// CHECK-SAME: (ptr nocapture noundef readonly [[X:%.*]], i32 noundef returned [[Y:%.*]])
+// CHECK-NEXT: entry:
+// CHECK-NEXT: ret i32 [[Y]]
+//
+int second_asc_i32(asc *x, int y) { return second<asc, int>(*x, y); }
+}
diff --git a/clang/test/CodeGenCXX/pointers-to-data-members.cpp b/clang/test/CodeGenCXX/pointers-to-data-members.cpp
index 29f1c3f48e3a..cf1d6c018409 100644
--- a/clang/test/CodeGenCXX/pointers-to-data-members.cpp
+++ b/clang/test/CodeGenCXX/pointers-to-data-members.cpp
@@ -1,4 +1,5 @@
// RUN: %clang_cc1 %s -emit-llvm -o %t.ll -triple=x86_64-apple-darwin10
+// RUN: %clang_cc1 %s -emit-llvm -o %t.ll -triple=x86_64-apple-darwin10 -fexperimental-new-constant-interpreter
// RUN: FileCheck %s < %t.ll
// RUN: FileCheck -check-prefix=CHECK-GLOBAL %s < %t.ll
diff --git a/clang/test/CodeGenCXX/template-param-objects-linkage.cpp b/clang/test/CodeGenCXX/template-param-objects-linkage.cpp
index 63e7d8c64686..9c148ed83753 100644
--- a/clang/test/CodeGenCXX/template-param-objects-linkage.cpp
+++ b/clang/test/CodeGenCXX/template-param-objects-linkage.cpp
@@ -1,4 +1,5 @@
// RUN: %clang_cc1 -triple x86_64-linux-gnu -std=c++20 %s -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 -triple x86_64-linux-gnu -std=c++20 %s -emit-llvm -o - -fexperimental-new-constant-interpreter | FileCheck %s
struct S { char buf[32]; };
template<S s> constexpr const char* f() { return s.buf; }
diff --git a/clang/test/CodeGenOpenCL/amdgpu-features.cl b/clang/test/CodeGenOpenCL/amdgpu-features.cl
index 2fda52dcd2dc..854ab39791f1 100644
--- a/clang/test/CodeGenOpenCL/amdgpu-features.cl
+++ b/clang/test/CodeGenOpenCL/amdgpu-features.cl
@@ -49,6 +49,7 @@
// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx1103 -emit-llvm -o - %s | FileCheck --check-prefix=GFX1103 %s
// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx1150 -emit-llvm -o - %s | FileCheck --check-prefix=GFX1150 %s
// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx1151 -emit-llvm -o - %s | FileCheck --check-prefix=GFX1151 %s
+// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx1152 -emit-llvm -o - %s | FileCheck --check-prefix=GFX1152 %s
// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx1200 -emit-llvm -o - %s | FileCheck --check-prefix=GFX1200 %s
// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx1201 -emit-llvm -o - %s | FileCheck --check-prefix=GFX1201 %s
@@ -100,6 +101,7 @@
// GFX1103: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot10-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32"
// GFX1150: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot10-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32"
// GFX1151: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot10-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32"
+// GFX1152: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot10-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32"
// GFX1200: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot10-insts,+dot11-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+fp8-conversion-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32"
// GFX1201: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot10-insts,+dot11-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+fp8-conversion-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32"
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx11.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx11.cl
index d17ff81e5d43..66061786cca6 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx11.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx11.cl
@@ -5,6 +5,7 @@
// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1103 -emit-llvm -o - %s | FileCheck %s
// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1150 -emit-llvm -o - %s | FileCheck %s
// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1151 -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1152 -emit-llvm -o - %s | FileCheck %s
typedef unsigned int uint;
typedef unsigned long ulong;
diff --git a/clang/test/Driver/aarch64-oryon-1.c b/clang/test/Driver/aarch64-oryon-1.c
new file mode 100644
index 000000000000..952ba5df74ba
--- /dev/null
+++ b/clang/test/Driver/aarch64-oryon-1.c
@@ -0,0 +1,19 @@
+// RUN: %clang -target aarch64 -mcpu=oryon-1 -### -c %s 2>&1 | FileCheck -check-prefix=Phoenix %s
+// RUN: %clang -target aarch64 -mlittle-endian -mcpu=oryon-1 -### -c %s 2>&1 | FileCheck -check-prefix=Phoenix %s
+// RUN: %clang -target aarch64_be -mlittle-endian -mcpu=oryon-1 -### -c %s 2>&1 | FileCheck -check-prefix=Phoenix %s
+// RUN: %clang -target aarch64 -mtune=oryon-1 -### -c %s 2>&1 | FileCheck -check-prefix=Phoenix-TUNE %s
+// RUN: %clang -target aarch64 -mlittle-endian -mtune=oryon-1 -### -c %s 2>&1 | FileCheck -check-prefix=Phoenix-TUNE %s
+// RUN: %clang -target aarch64_be -mlittle-endian -mtune=oryon-1 -### -c %s 2>&1 | FileCheck -check-prefix=Phoenix-TUNE %s
+// Phoenix: "-cc1"{{.*}} "-triple" "aarch64{{(--)?}}"{{.*}} "-target-cpu" "oryon-1" "-target-feature" "+v8.6a"
+// Phoenix-TUNE: "-cc1"{{.*}} "-triple" "aarch64{{(--)?}}"{{.*}} "-target-cpu" "generic"
+
+// RUN: %clang -target arm64 -mcpu=oryon-1 -### -c %s 2>&1 | FileCheck -check-prefix=ARM64-Phoenix %s
+// RUN: %clang -target arm64 -mlittle-endian -mcpu=oryon-1 -### -c %s 2>&1 | FileCheck -check-prefix=ARM64-Phoenix %s
+// RUN: %clang -target arm64 -mtune=oryon-1 -### -c %s 2>&1 | FileCheck -check-prefix=ARM64-Phoenix-TUNE %s
+// RUN: %clang -target arm64 -mlittle-endian -mtune=oryon-1 -### -c %s 2>&1 | FileCheck -check-prefix=ARM64-Phoenix-TUNE %s
+// ARM64-Phoenix: "-cc1"{{.*}} "-triple" "arm64{{.*}}" "-target-cpu" "oryon-1" "-target-feature" "+v8.6a"
+// ARM64-Phoenix-TUNE: "-cc1"{{.*}} "-triple" "arm64{{.*}}" "-target-cpu" "generic"
+
+// RUN: %clang -target aarch64 -mcpu=oryon-1 -mtune=cortex-a53 -### -c %s 2>&1 | FileCheck -check-prefix=MCPU-MTUNE-Phoenix %s
+// RUN: %clang -target aarch64 -mtune=cortex-a53 -mcpu=oryon-1 -### -c %s 2>&1 | FileCheck -check-prefix=MCPU-MTUNE-Phoenix %s
+// MCPU-MTUNE-Phoenix: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "oryon-1"
diff --git a/clang/test/Driver/amdgpu-macros.cl b/clang/test/Driver/amdgpu-macros.cl
index a878a7decee9..3e4a570671ba 100644
--- a/clang/test/Driver/amdgpu-macros.cl
+++ b/clang/test/Driver/amdgpu-macros.cl
@@ -127,6 +127,7 @@
// RUN: %clang -E -dM -target amdgcn -mcpu=gfx1103 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=32 -DCPU=gfx1103 -DFAMILY=GFX11
// RUN: %clang -E -dM -target amdgcn -mcpu=gfx1150 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=32 -DCPU=gfx1150 -DFAMILY=GFX11
// RUN: %clang -E -dM -target amdgcn -mcpu=gfx1151 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=32 -DCPU=gfx1151 -DFAMILY=GFX11
+// RUN: %clang -E -dM -target amdgcn -mcpu=gfx1152 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=32 -DCPU=gfx1152 -DFAMILY=GFX11
// RUN: %clang -E -dM -target amdgcn -mcpu=gfx1200 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=32 -DCPU=gfx1200 -DFAMILY=GFX12
// RUN: %clang -E -dM -target amdgcn -mcpu=gfx1201 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=32 -DCPU=gfx1201 -DFAMILY=GFX12
diff --git a/clang/test/Driver/amdgpu-mcpu.cl b/clang/test/Driver/amdgpu-mcpu.cl
index 5b6a22016f04..4b0ef92b682a 100644
--- a/clang/test/Driver/amdgpu-mcpu.cl
+++ b/clang/test/Driver/amdgpu-mcpu.cl
@@ -112,6 +112,7 @@
// RUN: %clang -### -target amdgcn -mcpu=gfx1103 %s 2>&1 | FileCheck --check-prefix=GFX1103 %s
// RUN: %clang -### -target amdgcn -mcpu=gfx1150 %s 2>&1 | FileCheck --check-prefix=GFX1150 %s
// RUN: %clang -### -target amdgcn -mcpu=gfx1151 %s 2>&1 | FileCheck --check-prefix=GFX1151 %s
+// RUN: %clang -### -target amdgcn -mcpu=gfx1152 %s 2>&1 | FileCheck --check-prefix=GFX1152 %s
// RUN: %clang -### -target amdgcn -mcpu=gfx1200 %s 2>&1 | FileCheck --check-prefix=GFX1200 %s
// RUN: %clang -### -target amdgcn -mcpu=gfx1201 %s 2>&1 | FileCheck --check-prefix=GFX1201 %s
@@ -164,6 +165,7 @@
// GFX1103: "-target-cpu" "gfx1103"
// GFX1150: "-target-cpu" "gfx1150"
// GFX1151: "-target-cpu" "gfx1151"
+// GFX1152: "-target-cpu" "gfx1152"
// GFX1200: "-target-cpu" "gfx1200"
// GFX1201: "-target-cpu" "gfx1201"
diff --git a/clang/test/Interpreter/pretty-print.c b/clang/test/Interpreter/pretty-print.c
new file mode 100644
index 000000000000..d21749a649e1
--- /dev/null
+++ b/clang/test/Interpreter/pretty-print.c
@@ -0,0 +1,11 @@
+// REQUIRES: host-supports-jit
+// UNSUPPORTED: system-aix
+// RUN: cat %s | clang-repl -Xcc -xc | FileCheck %s
+// RUN: cat %s | clang-repl -Xcc -std=c++11 | FileCheck %s
+
+// Fails with `Symbols not found: [ __clang_Interpreter_SetValueNoAlloc ]`.
+// UNSUPPORTED: hwasan
+
+const char* c_str = "Hello, world!"; c_str
+
+// CHECK: Not implement yet.
diff --git a/clang/test/Misc/target-invalid-cpu-note.c b/clang/test/Misc/target-invalid-cpu-note.c
index bad1374b9397..cb5b6752850c 100644
--- a/clang/test/Misc/target-invalid-cpu-note.c
+++ b/clang/test/Misc/target-invalid-cpu-note.c
@@ -5,11 +5,11 @@
// RUN: not %clang_cc1 -triple arm64--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix AARCH64
// AARCH64: error: unknown target CPU 'not-a-cpu'
-// AARCH64-NEXT: note: valid target CPU values are: cortex-a34, cortex-a35, cortex-a53, cortex-a55, cortex-a510, cortex-a520, cortex-a520ae, cortex-a57, cortex-a65, cortex-a65ae, cortex-a72, cortex-a73, cortex-a75, cortex-a76, cortex-a76ae, cortex-a77, cortex-a78, cortex-a78ae, cortex-a78c, cortex-a710, cortex-a715, cortex-a720, cortex-a720ae, cortex-r82, cortex-r82ae, cortex-x1, cortex-x1c, cortex-x2, cortex-x3, cortex-x4, neoverse-e1, neoverse-n1, neoverse-n2, neoverse-n3, neoverse-512tvb, neoverse-v1, neoverse-v2, neoverse-v3, neoverse-v3ae, cyclone, apple-a7, apple-a8, apple-a9, apple-a10, apple-a11, apple-a12, apple-a13, apple-a14, apple-a15, apple-a16, apple-a17, apple-m1, apple-m2, apple-m3, apple-s4, apple-s5, exynos-m3, exynos-m4, exynos-m5, falkor, saphira, kryo, thunderx2t99, thunderx3t110, thunderx, thunderxt88, thunderxt81, thunderxt83, tsv110, a64fx, carmel, ampere1, ampere1a, ampere1b, cobalt-100, grace{{$}}
+// AARCH64-NEXT: note: valid target CPU values are: cortex-a34, cortex-a35, cortex-a53, cortex-a55, cortex-a510, cortex-a520, cortex-a520ae, cortex-a57, cortex-a65, cortex-a65ae, cortex-a72, cortex-a73, cortex-a75, cortex-a76, cortex-a76ae, cortex-a77, cortex-a78, cortex-a78ae, cortex-a78c, cortex-a710, cortex-a715, cortex-a720, cortex-a720ae, cortex-r82, cortex-r82ae, cortex-x1, cortex-x1c, cortex-x2, cortex-x3, cortex-x4, neoverse-e1, neoverse-n1, neoverse-n2, neoverse-n3, neoverse-512tvb, neoverse-v1, neoverse-v2, neoverse-v3, neoverse-v3ae, cyclone, apple-a7, apple-a8, apple-a9, apple-a10, apple-a11, apple-a12, apple-a13, apple-a14, apple-a15, apple-a16, apple-a17, apple-m1, apple-m2, apple-m3, apple-s4, apple-s5, exynos-m3, exynos-m4, exynos-m5, falkor, saphira, kryo, thunderx2t99, thunderx3t110, thunderx, thunderxt88, thunderxt81, thunderxt83, tsv110, a64fx, carmel, ampere1, ampere1a, ampere1b, oryon-1, cobalt-100, grace{{$}}
// RUN: not %clang_cc1 -triple arm64--- -tune-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix TUNE_AARCH64
// TUNE_AARCH64: error: unknown target CPU 'not-a-cpu'
-// TUNE_AARCH64-NEXT: note: valid target CPU values are: cortex-a34, cortex-a35, cortex-a53, cortex-a55, cortex-a510, cortex-a520, cortex-a520ae, cortex-a57, cortex-a65, cortex-a65ae, cortex-a72, cortex-a73, cortex-a75, cortex-a76, cortex-a76ae, cortex-a77, cortex-a78, cortex-a78ae, cortex-a78c, cortex-a710, cortex-a715, cortex-a720, cortex-a720ae, cortex-r82, cortex-r82ae, cortex-x1, cortex-x1c, cortex-x2, cortex-x3, cortex-x4, neoverse-e1, neoverse-n1, neoverse-n2, neoverse-n3, neoverse-512tvb, neoverse-v1, neoverse-v2, neoverse-v3, neoverse-v3ae, cyclone, apple-a7, apple-a8, apple-a9, apple-a10, apple-a11, apple-a12, apple-a13, apple-a14, apple-a15, apple-a16, apple-a17, apple-m1, apple-m2, apple-m3, apple-s4, apple-s5, exynos-m3, exynos-m4, exynos-m5, falkor, saphira, kryo, thunderx2t99, thunderx3t110, thunderx, thunderxt88, thunderxt81, thunderxt83, tsv110, a64fx, carmel, ampere1, ampere1a, ampere1b, cobalt-100, grace{{$}}
+// TUNE_AARCH64-NEXT: note: valid target CPU values are: cortex-a34, cortex-a35, cortex-a53, cortex-a55, cortex-a510, cortex-a520, cortex-a520ae, cortex-a57, cortex-a65, cortex-a65ae, cortex-a72, cortex-a73, cortex-a75, cortex-a76, cortex-a76ae, cortex-a77, cortex-a78, cortex-a78ae, cortex-a78c, cortex-a710, cortex-a715, cortex-a720, cortex-a720ae, cortex-r82, cortex-r82ae, cortex-x1, cortex-x1c, cortex-x2, cortex-x3, cortex-x4, neoverse-e1, neoverse-n1, neoverse-n2, neoverse-n3, neoverse-512tvb, neoverse-v1, neoverse-v2, neoverse-v3, neoverse-v3ae, cyclone, apple-a7, apple-a8, apple-a9, apple-a10, apple-a11, apple-a12, apple-a13, apple-a14, apple-a15, apple-a16, apple-a17, apple-m1, apple-m2, apple-m3, apple-s4, apple-s5, exynos-m3, exynos-m4, exynos-m5, falkor, saphira, kryo, thunderx2t99, thunderx3t110, thunderx, thunderxt88, thunderxt81, thunderxt83, tsv110, a64fx, carmel, ampere1, ampere1a, ampere1b, oryon-1, cobalt-100, grace{{$}}
// RUN: not %clang_cc1 -triple i386--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix X86
// X86: error: unknown target CPU 'not-a-cpu'
@@ -29,7 +29,7 @@
// RUN: not %clang_cc1 -triple nvptx--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix NVPTX
// NVPTX: error: unknown target CPU 'not-a-cpu'
-// NVPTX-NEXT: note: valid target CPU values are: sm_20, sm_21, sm_30, sm_32, sm_35, sm_37, sm_50, sm_52, sm_53, sm_60, sm_61, sm_62, sm_70, sm_72, sm_75, sm_80, sm_86, sm_87, sm_89, sm_90, sm_90a, gfx600, gfx601, gfx602, gfx700, gfx701, gfx702, gfx703, gfx704, gfx705, gfx801, gfx802, gfx803, gfx805, gfx810, gfx9-generic, gfx900, gfx902, gfx904, gfx906, gfx908, gfx909, gfx90a, gfx90c, gfx940, gfx941, gfx942, gfx10-1-generic, gfx1010, gfx1011, gfx1012, gfx1013, gfx10-3-generic, gfx1030, gfx1031, gfx1032, gfx1033, gfx1034, gfx1035, gfx1036, gfx11-generic, gfx1100, gfx1101, gfx1102, gfx1103, gfx1150, gfx1151, gfx12-generic, gfx1200, gfx1201{{$}}
+// NVPTX-NEXT: note: valid target CPU values are: sm_20, sm_21, sm_30, sm_32, sm_35, sm_37, sm_50, sm_52, sm_53, sm_60, sm_61, sm_62, sm_70, sm_72, sm_75, sm_80, sm_86, sm_87, sm_89, sm_90, sm_90a, gfx600, gfx601, gfx602, gfx700, gfx701, gfx702, gfx703, gfx704, gfx705, gfx801, gfx802, gfx803, gfx805, gfx810, gfx9-generic, gfx900, gfx902, gfx904, gfx906, gfx908, gfx909, gfx90a, gfx90c, gfx940, gfx941, gfx942, gfx10-1-generic, gfx1010, gfx1011, gfx1012, gfx1013, gfx10-3-generic, gfx1030, gfx1031, gfx1032, gfx1033, gfx1034, gfx1035, gfx1036, gfx11-generic, gfx1100, gfx1101, gfx1102, gfx1103, gfx1150, gfx1151, gfx1152, gfx12-generic, gfx1200, gfx1201{{$}}
// RUN: not %clang_cc1 -triple r600--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix R600
// R600: error: unknown target CPU 'not-a-cpu'
@@ -37,7 +37,7 @@
// RUN: not %clang_cc1 -triple amdgcn--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix AMDGCN
// AMDGCN: error: unknown target CPU 'not-a-cpu'
-// AMDGCN-NEXT: note: valid target CPU values are: gfx600, tahiti, gfx601, pitcairn, verde, gfx602, hainan, oland, gfx700, kaveri, gfx701, hawaii, gfx702, gfx703, kabini, mullins, gfx704, bonaire, gfx705, gfx801, carrizo, gfx802, iceland, tonga, gfx803, fiji, polaris10, polaris11, gfx805, tongapro, gfx810, stoney, gfx900, gfx902, gfx904, gfx906, gfx908, gfx909, gfx90a, gfx90c, gfx940, gfx941, gfx942, gfx1010, gfx1011, gfx1012, gfx1013, gfx1030, gfx1031, gfx1032, gfx1033, gfx1034, gfx1035, gfx1036, gfx1100, gfx1101, gfx1102, gfx1103, gfx1150, gfx1151, gfx1200, gfx1201, gfx9-generic, gfx10-1-generic, gfx10-3-generic, gfx11-generic, gfx12-generic{{$}}
+// AMDGCN-NEXT: note: valid target CPU values are: gfx600, tahiti, gfx601, pitcairn, verde, gfx602, hainan, oland, gfx700, kaveri, gfx701, hawaii, gfx702, gfx703, kabini, mullins, gfx704, bonaire, gfx705, gfx801, carrizo, gfx802, iceland, tonga, gfx803, fiji, polaris10, polaris11, gfx805, tongapro, gfx810, stoney, gfx900, gfx902, gfx904, gfx906, gfx908, gfx909, gfx90a, gfx90c, gfx940, gfx941, gfx942, gfx1010, gfx1011, gfx1012, gfx1013, gfx1030, gfx1031, gfx1032, gfx1033, gfx1034, gfx1035, gfx1036, gfx1100, gfx1101, gfx1102, gfx1103, gfx1150, gfx1151, gfx1152, gfx1200, gfx1201, gfx9-generic, gfx10-1-generic, gfx10-3-generic, gfx11-generic, gfx12-generic{{$}}
// RUN: not %clang_cc1 -triple wasm64--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix WEBASM
// WEBASM: error: unknown target CPU 'not-a-cpu'
diff --git a/clang/test/SemaCXX/attr-weak.cpp b/clang/test/SemaCXX/attr-weak.cpp
index f065bfd9483f..0f9a2975e5f6 100644
--- a/clang/test/SemaCXX/attr-weak.cpp
+++ b/clang/test/SemaCXX/attr-weak.cpp
@@ -1,4 +1,5 @@
// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fsyntax-only -verify -std=c++11 %s
+// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fsyntax-only -verify -std=c++11 %s -fexperimental-new-constant-interpreter
static int test0 __attribute__((weak)); // expected-error {{weak declaration cannot have internal linkage}}
static void test1() __attribute__((weak)); // expected-error {{weak declaration cannot have internal linkage}}
diff --git a/clang/test/SemaCXX/builtin-is-bitwise-cloneable-fsanitize.cpp b/clang/test/SemaCXX/builtin-is-bitwise-cloneable-fsanitize.cpp
new file mode 100644
index 000000000000..d47a39a0754c
--- /dev/null
+++ b/clang/test/SemaCXX/builtin-is-bitwise-cloneable-fsanitize.cpp
@@ -0,0 +1,34 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux -DSANITIZER_ENABLED -fsanitize=address -fsanitize-address-field-padding=1 %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux %s
+
+struct S {
+ ~S() {}
+ virtual void foo() {}
+
+ int buffer[1];
+ int other_field = 0;
+};
+
+union U {
+ S s;
+};
+
+struct Derived : S {};
+
+static_assert(!__is_trivially_copyable(S));
+#ifdef SANITIZER_ENABLED
+// Don't allow memcpy when the struct has poisoned padding bits.
+// The sanitizer adds posion padding bits to struct S.
+static_assert(sizeof(S) > 16);
+static_assert(!__is_bitwise_cloneable(S));
+static_assert(sizeof(U) == sizeof(S)); // no padding bit for U.
+static_assert(!__is_bitwise_cloneable(U));
+static_assert(!__is_bitwise_cloneable(S[2]));
+static_assert(!__is_bitwise_cloneable(Derived));
+#else
+static_assert(sizeof(S) == 16);
+static_assert(__is_bitwise_cloneable(S));
+static_assert(__is_bitwise_cloneable(U));
+static_assert(__is_bitwise_cloneable(S[2]));
+static_assert(__is_bitwise_cloneable(Derived));
+#endif
diff --git a/clang/test/SemaCXX/builtin-is-bitwise-cloneable.cpp b/clang/test/SemaCXX/builtin-is-bitwise-cloneable.cpp
new file mode 100644
index 000000000000..1781cf48449f
--- /dev/null
+++ b/clang/test/SemaCXX/builtin-is-bitwise-cloneable.cpp
@@ -0,0 +1,8 @@
+// RUN: %clang_cc1 -fsyntax-only -verify %s
+//
+struct DynamicClass { virtual int Foo(); };
+static_assert(!__is_trivially_copyable(DynamicClass));
+static_assert(__is_bitwise_cloneable(DynamicClass));
+
+struct InComplete; // expected-note{{forward declaration}}
+static_assert(!__is_bitwise_cloneable(InComplete)); // expected-error{{incomplete type 'InComplete' used in type trait expression}}
diff --git a/clang/test/SemaCXX/constexpr-default-arg.cpp b/clang/test/SemaCXX/constexpr-default-arg.cpp
index 901123bfb359..ec9b2927880b 100644
--- a/clang/test/SemaCXX/constexpr-default-arg.cpp
+++ b/clang/test/SemaCXX/constexpr-default-arg.cpp
@@ -32,8 +32,8 @@ void test_default_arg2() {
}
// Check that multiple CXXDefaultInitExprs don't cause an assertion failure.
-struct A { int &&r = 0; };
+struct A { int &&r = 0; }; // expected-note 2{{default member initializer}}
struct B { A x, y; };
-B b = {}; // expected-no-diagnostics
+B b = {}; // expected-warning 2{{lifetime extension of temporary created by aggregate initialization using a default member initializer is not yet supported}}
}
diff --git a/clang/test/SemaCXX/cxx11-default-member-initializers.cpp b/clang/test/SemaCXX/cxx11-default-member-initializers.cpp
index 1ea8b98cd863..dd8e9c6b7fc1 100644
--- a/clang/test/SemaCXX/cxx11-default-member-initializers.cpp
+++ b/clang/test/SemaCXX/cxx11-default-member-initializers.cpp
@@ -27,80 +27,6 @@ class MemInit {
C m = s;
};
-namespace std {
-typedef decltype(sizeof(int)) size_t;
-
-// libc++'s implementation
-template <class _E> class initializer_list {
- const _E *__begin_;
- size_t __size_;
-
- initializer_list(const _E *__b, size_t __s) : __begin_(__b), __size_(__s) {}
-
-public:
- typedef _E value_type;
- typedef const _E &reference;
- typedef const _E &const_reference;
- typedef size_t size_type;
-
- typedef const _E *iterator;
- typedef const _E *const_iterator;
-
- initializer_list() : __begin_(nullptr), __size_(0) {}
-
- size_t size() const { return __size_; }
- const _E *begin() const { return __begin_; }
- const _E *end() const { return __begin_ + __size_; }
-};
-} // namespace std
-
-#if __cplusplus >= 201703L
-namespace test_rebuild {
-template <typename T, int> class C {
-public:
- C(std::initializer_list<T>);
-};
-
-template <typename T> using Ptr = __remove_pointer(T) *;
-template <typename T> C(T) -> C<Ptr<T>, sizeof(T)>;
-
-class A {
-public:
- template <typename T1, typename T2> T1 *some_func(T2 &&);
-};
-
-struct B : A {
- // Test CXXDefaultInitExpr rebuild issue in
- // https://github.com/llvm/llvm-project/pull/87933
- int *ar = some_func<int>(C{some_func<int>(0)});
- B() {}
-};
-
-int TestBody_got;
-template <int> class Vector {
-public:
- Vector(std::initializer_list<int>);
-};
-template <typename... Ts> Vector(Ts...) -> Vector<sizeof...(Ts)>;
-class ProgramBuilder {
-public:
- template <typename T, typename ARGS> int *create(ARGS);
-};
-
-struct TypeTest : ProgramBuilder {
- int *str_f16 = create<int>(Vector{0});
- TypeTest() {}
-};
-class TypeTest_Element_Test : TypeTest {
- void TestBody();
-};
-void TypeTest_Element_Test::TestBody() {
- int *expect = str_f16;
- &TestBody_got != expect; // expected-warning {{inequality comparison result unused}}
-}
-} // namespace test_rebuild
-#endif // __cplusplus >= 201703L
-
#if __cplusplus >= 202002L
// This test ensures cleanup expressions are correctly produced
// in the presence of default member initializers.
diff --git a/clang/test/SemaCXX/eval-crashes.cpp b/clang/test/SemaCXX/eval-crashes.cpp
index a06f60f71e9c..017df977b26b 100644
--- a/clang/test/SemaCXX/eval-crashes.cpp
+++ b/clang/test/SemaCXX/eval-crashes.cpp
@@ -25,9 +25,11 @@ namespace pr33140_0b {
}
namespace pr33140_2 {
- struct A { int &&r = 0; };
+ // FIXME: The declaration of 'b' below should lifetime-extend two int
+ // temporaries.
+ struct A { int &&r = 0; }; // expected-note 2{{initializing field 'r' with default member initializer}}
struct B { A x, y; };
- B b = {};
+ B b = {}; // expected-warning 2{{lifetime extension of temporary created by aggregate initialization using a default member initializer is not yet supported}}
}
namespace pr33140_3 {
diff --git a/clang/test/SemaCXX/nullptr_in_arithmetic_ops.cpp b/clang/test/SemaCXX/nullptr_in_arithmetic_ops.cpp
index 6273d9c42e0b..98bec184164b 100644
--- a/clang/test/SemaCXX/nullptr_in_arithmetic_ops.cpp
+++ b/clang/test/SemaCXX/nullptr_in_arithmetic_ops.cpp
@@ -1,4 +1,5 @@
// RUN: %clang_cc1 -fsyntax-only -Wno-tautological-pointer-compare -fblocks -std=c++11 -verify %s
+// RUN: %clang_cc1 -fsyntax-only -Wno-tautological-pointer-compare -fblocks -std=c++11 -verify %s -fexperimental-new-constant-interpreter
void foo() {
int a;
diff --git a/clang/test/SemaObjCXX/arc-type-traits.mm b/clang/test/SemaObjCXX/arc-type-traits.mm
index 2d30ae450f3b..25bc8b362140 100644
--- a/clang/test/SemaObjCXX/arc-type-traits.mm
+++ b/clang/test/SemaObjCXX/arc-type-traits.mm
@@ -221,3 +221,12 @@ TRAIT_IS_TRUE(__is_trivially_relocatable, __unsafe_unretained id);
TRAIT_IS_TRUE(__is_trivially_relocatable, HasStrong);
TRAIT_IS_FALSE(__is_trivially_relocatable, HasWeak);
TRAIT_IS_TRUE(__is_trivially_relocatable, HasUnsafeUnretained);
+
+// __is_bitwise_cloneable
+TRAIT_IS_FALSE(__is_bitwise_cloneable, __strong id);
+TRAIT_IS_FALSE(__is_bitwise_cloneable, __weak id);
+TRAIT_IS_FALSE(__is_bitwise_cloneable, __autoreleasing id);
+TRAIT_IS_TRUE(__is_trivial, __unsafe_unretained id);
+TRAIT_IS_FALSE(__is_bitwise_cloneable, HasStrong);
+TRAIT_IS_FALSE(__is_bitwise_cloneable, HasWeak);
+TRAIT_IS_TRUE(__is_bitwise_cloneable, HasUnsafeUnretained);
diff --git a/clang/test/SemaOpenCL/builtins-amdgcn-gfx940-err.cl b/clang/test/SemaOpenCL/builtins-amdgcn-gfx940-err.cl
index 487cc53e8ad8..2a1ba4300864 100644
--- a/clang/test/SemaOpenCL/builtins-amdgcn-gfx940-err.cl
+++ b/clang/test/SemaOpenCL/builtins-amdgcn-gfx940-err.cl
@@ -3,8 +3,10 @@
typedef unsigned int u32;
-void test_global_load_lds_unsupported_size(global u32* src, local u32 *dst, u32 size) {
- __builtin_amdgcn_global_load_lds(src, dst, size, /*offset=*/0, /*aux=*/0); // expected-error{{expression is not an integer constant expression}}
+void test_global_load_lds_unsupported_size(global u32* src, local u32 *dst, u32 size, u32 offset, u32 aux) {
+ __builtin_amdgcn_global_load_lds(src, dst, size, /*offset=*/0, /*aux=*/0); // expected-error{{argument to '__builtin_amdgcn_global_load_lds' must be a constant integer}}
+ __builtin_amdgcn_global_load_lds(src, dst, /*size=*/4, offset, /*aux=*/0); // expected-error{{argument to '__builtin_amdgcn_global_load_lds' must be a constant integer}}
+ __builtin_amdgcn_global_load_lds(src, dst, /*size=*/4, /*offset=*/0, aux); // expected-error{{argument to '__builtin_amdgcn_global_load_lds' must be a constant integer}}
__builtin_amdgcn_global_load_lds(src, dst, /*size=*/5, /*offset=*/0, /*aux=*/0); // expected-error{{invalid size value}} expected-note {{size must be 1, 2, or 4}}
__builtin_amdgcn_global_load_lds(src, dst, /*size=*/0, /*offset=*/0, /*aux=*/0); // expected-error{{invalid size value}} expected-note {{size must be 1, 2, or 4}}
__builtin_amdgcn_global_load_lds(src, dst, /*size=*/3, /*offset=*/0, /*aux=*/0); // expected-error{{invalid size value}} expected-note {{size must be 1, 2, or 4}}
diff --git a/clang/unittests/AST/Interp/toAPValue.cpp b/clang/unittests/AST/Interp/toAPValue.cpp
index e56453aba2c5..d6879d6e0bca 100644
--- a/clang/unittests/AST/Interp/toAPValue.cpp
+++ b/clang/unittests/AST/Interp/toAPValue.cpp
@@ -186,3 +186,49 @@ TEST(ToAPValue, FunctionPointersC) {
ASSERT_EQ(I, 17);
}
}
+
+TEST(ToAPValue, MemberPointers) {
+ constexpr char Code[] = "struct S {\n"
+ " int m, n;\n"
+ "};\n"
+ "constexpr int S::*pm = &S::m;\n"
+ "constexpr int S::*nn = nullptr;\n";
+
+ auto AST = tooling::buildASTFromCodeWithArgs(
+ Code, {"-fexperimental-new-constant-interpreter"});
+
+ auto &Ctx = AST->getASTContext().getInterpContext();
+ Program &Prog = Ctx.getProgram();
+
+ auto getDecl = [&](const char *Name) -> const ValueDecl * {
+ auto Nodes =
+ match(valueDecl(hasName(Name)).bind("var"), AST->getASTContext());
+ assert(Nodes.size() == 1);
+ const auto *D = Nodes[0].getNodeAs<ValueDecl>("var");
+ assert(D);
+ return D;
+ };
+
+ auto getGlobalPtr = [&](const char *Name) -> Pointer {
+ const VarDecl *D = cast<VarDecl>(getDecl(Name));
+ return Prog.getPtrGlobal(*Prog.getGlobal(D));
+ };
+
+ {
+ const Pointer &GP = getGlobalPtr("pm");
+ ASSERT_TRUE(GP.isLive());
+ const MemberPointer &FP = GP.deref<MemberPointer>();
+ APValue A = FP.toAPValue();
+ ASSERT_EQ(A.getMemberPointerDecl(), getDecl("m"));
+ ASSERT_EQ(A.getKind(), APValue::MemberPointer);
+ }
+
+ {
+ const Pointer &GP = getGlobalPtr("nn");
+ ASSERT_TRUE(GP.isLive());
+ const MemberPointer &NP = GP.deref<MemberPointer>();
+ ASSERT_TRUE(NP.isZero());
+ APValue A = NP.toAPValue();
+ ASSERT_EQ(A.getKind(), APValue::MemberPointer);
+ }
+}
diff --git a/clang/unittests/Format/FormatTest.cpp b/clang/unittests/Format/FormatTest.cpp
index 4e427268fb82..dbc1916825f3 100644
--- a/clang/unittests/Format/FormatTest.cpp
+++ b/clang/unittests/Format/FormatTest.cpp
@@ -9241,6 +9241,14 @@ TEST_F(FormatTest, AlignsAfterOpenBracket) {
" b));",
Style);
+ Style.ColumnLimit = 30;
+ verifyFormat("for (int foo = 0; foo < FOO;\n"
+ " ++foo) {\n"
+ " bar(foo);\n"
+ "}",
+ Style);
+ Style.ColumnLimit = 80;
+
Style.AlignAfterOpenBracket = FormatStyle::BAS_AlwaysBreak;
Style.BinPackArguments = false;
Style.BinPackParameters = false;
diff --git a/clang/www/cxx_dr_status.html b/clang/www/cxx_dr_status.html
index b046468c8531..43857447d83b 100755
--- a/clang/www/cxx_dr_status.html
+++ b/clang/www/cxx_dr_status.html
@@ -10698,7 +10698,7 @@ and <I>POD class</I></td>
<td><a href="https://cplusplus.github.io/CWG/issues/1815.html">1815</a></td>
<td>CD4</td>
<td>Lifetime extension in aggregate initialization</td>
- <td class="unreleased" align="center">Clang 19</td>
+ <td class="none" align="center">No</td>
</tr>
<tr id="1816">
<td><a href="https://cplusplus.github.io/CWG/issues/1816.html">1816</a></td>
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_bitvector.h b/compiler-rt/lib/sanitizer_common/sanitizer_bitvector.h
index 07a59ab11c42..eef1e7e9d957 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_bitvector.h
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_bitvector.h
@@ -321,23 +321,23 @@ class TwoLevelBitVector {
};
private:
- void check(uptr idx) const { CHECK_LE(idx, size()); }
+ void check(uptr idx) const { CHECK_LT(idx, size()); }
uptr idx0(uptr idx) const {
uptr res = idx / (BV::kSize * BV::kSize);
- CHECK_LE(res, kLevel1Size);
+ CHECK_LT(res, kLevel1Size);
return res;
}
uptr idx1(uptr idx) const {
uptr res = (idx / BV::kSize) % BV::kSize;
- CHECK_LE(res, BV::kSize);
+ CHECK_LT(res, BV::kSize);
return res;
}
uptr idx2(uptr idx) const {
uptr res = idx % BV::kSize;
- CHECK_LE(res, BV::kSize);
+ CHECK_LT(res, BV::kSize);
return res;
}
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_linux_libcdep.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_linux_libcdep.cpp
index cccbb4d256df..6d05411222d9 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_linux_libcdep.cpp
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_linux_libcdep.cpp
@@ -54,6 +54,8 @@
# undef MAP_NORESERVE
# define MAP_NORESERVE 0
extern const Elf_Auxinfo *__elf_aux_vector;
+extern "C" int __sys_sigaction(int signum, const struct sigaction *act,
+ struct sigaction *oldact);
# endif
# if SANITIZER_NETBSD
@@ -93,12 +95,22 @@ SANITIZER_WEAK_ATTRIBUTE int real_sigaction(int signum, const void *act,
void *oldact);
int internal_sigaction(int signum, const void *act, void *oldact) {
-# if !SANITIZER_GO
+# if SANITIZER_FREEBSD
+ // On FreeBSD, call the sigaction syscall directly (part of libsys in FreeBSD
+ // 15) since the libc version goes via a global interposing table. Due to
+ // library initialization order the table can be relocated after the call to
+ // InitializeDeadlySignals() which then crashes when dereferencing the
+ // uninitialized pointer in libc.
+ return __sys_sigaction(signum, (const struct sigaction *)act,
+ (struct sigaction *)oldact);
+# else
+# if !SANITIZER_GO
if (&real_sigaction)
return real_sigaction(signum, act, oldact);
-# endif
+# endif
return sigaction(signum, (const struct sigaction *)act,
(struct sigaction *)oldact);
+# endif
}
void GetThreadStackTopAndBottom(bool at_initialization, uptr *stack_top,
diff --git a/compiler-rt/lib/sanitizer_common/tests/sanitizer_allocator_test.cpp b/compiler-rt/lib/sanitizer_common/tests/sanitizer_allocator_test.cpp
index 58f2c8f7b333..1a1ccce82d25 100644
--- a/compiler-rt/lib/sanitizer_common/tests/sanitizer_allocator_test.cpp
+++ b/compiler-rt/lib/sanitizer_common/tests/sanitizer_allocator_test.cpp
@@ -69,12 +69,17 @@ const uptr kAllocatorSpace = ~(uptr)0;
const uptr kAllocatorSize = 0x2000000000ULL; // 128G.
static const u64 kAddressSpaceSize = 1ULL << 38;
typedef VeryDenseSizeClassMap SizeClassMap;
-#else
+# elif SANITIZER_APPLE
static const uptr kAllocatorSpace = 0x700000000000ULL;
static const uptr kAllocatorSize = 0x010000000000ULL; // 1T.
static const u64 kAddressSpaceSize = 1ULL << 47;
typedef DefaultSizeClassMap SizeClassMap;
-#endif
+# else
+static const uptr kAllocatorSpace = 0x500000000000ULL;
+static const uptr kAllocatorSize = 0x010000000000ULL; // 1T.
+static const u64 kAddressSpaceSize = 1ULL << 47;
+typedef DefaultSizeClassMap SizeClassMap;
+# endif
template <typename AddressSpaceViewTy>
struct AP64 { // Allocator Params. Short name for shorter demangled names..
diff --git a/compiler-rt/test/dfsan/sscanf.c b/compiler-rt/test/dfsan/sscanf.c
new file mode 100644
index 000000000000..dbc2de4ba96c
--- /dev/null
+++ b/compiler-rt/test/dfsan/sscanf.c
@@ -0,0 +1,19 @@
+// RUN: %clang_dfsan %s -o %t && %run %t
+// XFAIL: *
+
+#include <assert.h>
+#include <stdio.h>
+
+int main(int argc, char *argv[]) {
+ char buf[256] = "10000000000-100000000000 rw-p 00000000 00:00 0";
+ long rss = 0;
+ // This test exposes a bug in DFSan's sscanf, that leads to flakiness
+ // in release_shadow_space.c (see
+ // https://github.com/llvm/llvm-project/issues/91287)
+ if (sscanf(buf, "Garbage text before, %ld, Garbage text after", &rss) == 1) {
+ printf("Error: matched %ld\n", rss);
+ return 1;
+ }
+
+ return 0;
+}
diff --git a/flang/cmake/modules/AddFlangOffloadRuntime.cmake b/flang/cmake/modules/AddFlangOffloadRuntime.cmake
index 0af12c8cfd54..6407be5d038b 100644
--- a/flang/cmake/modules/AddFlangOffloadRuntime.cmake
+++ b/flang/cmake/modules/AddFlangOffloadRuntime.cmake
@@ -101,6 +101,7 @@ macro(enable_omp_offload_compilation files)
"gfx908;gfx90a;gfx90c;gfx940;gfx1010;gfx1030"
"gfx1031;gfx1032;gfx1033;gfx1034;gfx1035;gfx1036"
"gfx1100;gfx1101;gfx1102;gfx1103;gfx1150;gfx1151"
+ "gfx1152"
)
set(all_nvptx_architectures
"sm_35;sm_37;sm_50;sm_52;sm_53;sm_60;sm_61;sm_62"
diff --git a/flang/docs/Intrinsics.md b/flang/docs/Intrinsics.md
index 41129b10083b..8853d4d9e1c7 100644
--- a/flang/docs/Intrinsics.md
+++ b/flang/docs/Intrinsics.md
@@ -967,4 +967,35 @@ program test_etime
print *, tarray(1)
print *, tarray(2)
end program test_etime
+```
+
+### Non-Standard Intrinsics: GETCWD
+
+#### Description
+`GETCWD(C, STATUS)` returns current working directory.
+
+This intrinsic is provided in both subroutine and function forms; however, only one form can be used in any given program unit.
+
+*C* and *STATUS* are `INTENT(OUT)` and provide the following:
+
+| | |
+|------------|---------------------------------------------------------------------------------------------------|
+| `C` | Current work directory. The type shall be `CHARACTER` and of default kind. |
+| `STATUS` | (Optional) Status flag. Returns 0 on success, a system specific and nonzero error code otherwise. The type shall be `INTEGER` and of a kind greater or equal to 4. |
+
+#### Usage and Info
+
+- **Standard:** GNU extension
+- **Class:** Subroutine, function
+- **Syntax:** `CALL GETCWD(C, STATUS)`, `STATUS = GETCWD(C)`
+
+#### Example
+```Fortran
+PROGRAM example_getcwd
+ CHARACTER(len=255) :: cwd
+ INTEGER :: status
+ CALL getcwd(cwd, status)
+ PRINT *, cwd
+ PRINT *, status
+END PROGRAM
``` \ No newline at end of file
diff --git a/flang/include/flang/Optimizer/Builder/IntrinsicCall.h b/flang/include/flang/Optimizer/Builder/IntrinsicCall.h
index c47e41eab18b..8ef5d59b92f0 100644
--- a/flang/include/flang/Optimizer/Builder/IntrinsicCall.h
+++ b/flang/include/flang/Optimizer/Builder/IntrinsicCall.h
@@ -232,6 +232,8 @@ struct IntrinsicLibrary {
mlir::Value genFloor(mlir::Type, llvm::ArrayRef<mlir::Value>);
mlir::Value genFraction(mlir::Type resultType,
mlir::ArrayRef<mlir::Value> args);
+ fir::ExtendedValue genGetCwd(std::optional<mlir::Type> resultType,
+ llvm::ArrayRef<fir::ExtendedValue> args);
void genGetCommand(mlir::ArrayRef<fir::ExtendedValue> args);
mlir::Value genGetPID(mlir::Type resultType,
llvm::ArrayRef<mlir::Value> args);
diff --git a/flang/include/flang/Optimizer/Builder/Runtime/Command.h b/flang/include/flang/Optimizer/Builder/Runtime/Command.h
index 976fb3aa0b6f..0d60a367d999 100644
--- a/flang/include/flang/Optimizer/Builder/Runtime/Command.h
+++ b/flang/include/flang/Optimizer/Builder/Runtime/Command.h
@@ -53,5 +53,10 @@ mlir::Value genGetEnvVariable(fir::FirOpBuilder &, mlir::Location,
mlir::Value length, mlir::Value trimName,
mlir::Value errmsg);
+/// Generate a call to the GetCwd runtime function which implements
+/// the GETCWD intrinsic.
+mlir::Value genGetCwd(fir::FirOpBuilder &builder, mlir::Location loc,
+ mlir::Value c);
+
} // namespace fir::runtime
#endif // FORTRAN_OPTIMIZER_BUILDER_RUNTIME_COMMAND_H
diff --git a/flang/include/flang/Optimizer/Dialect/FIRAttr.td b/flang/include/flang/Optimizer/Dialect/FIRAttr.td
index 0c34b640a5c9..aedb6769186e 100644
--- a/flang/include/flang/Optimizer/Dialect/FIRAttr.td
+++ b/flang/include/flang/Optimizer/Dialect/FIRAttr.td
@@ -67,6 +67,36 @@ def fir_BoxFieldAttr : I32EnumAttr<
let cppNamespace = "fir";
}
+def fir_ReduceOperationEnum : I32BitEnumAttr<"ReduceOperationEnum",
+ "intrinsic operations and functions supported by DO CONCURRENT REDUCE",
+ [
+ I32BitEnumAttrCaseBit<"Add", 0, "add">,
+ I32BitEnumAttrCaseBit<"Multiply", 1, "multiply">,
+ I32BitEnumAttrCaseBit<"AND", 2, "and">,
+ I32BitEnumAttrCaseBit<"OR", 3, "or">,
+ I32BitEnumAttrCaseBit<"EQV", 4, "eqv">,
+ I32BitEnumAttrCaseBit<"NEQV", 5, "neqv">,
+ I32BitEnumAttrCaseBit<"MAX", 6, "max">,
+ I32BitEnumAttrCaseBit<"MIN", 7, "min">,
+ I32BitEnumAttrCaseBit<"IAND", 8, "iand">,
+ I32BitEnumAttrCaseBit<"IOR", 9, "ior">,
+ I32BitEnumAttrCaseBit<"EIOR", 10, "eior">
+ ]> {
+ let separator = ", ";
+ let cppNamespace = "::fir";
+ let printBitEnumPrimaryGroups = 1;
+}
+
+def fir_ReduceAttr : fir_Attr<"Reduce"> {
+ let mnemonic = "reduce_attr";
+
+ let parameters = (ins
+ "ReduceOperationEnum":$reduce_operation
+ );
+
+ let assemblyFormat = "`<` $reduce_operation `>`";
+}
+
// mlir::SideEffects::Resource for modelling operations which add debugging information
def DebuggingResource : Resource<"::fir::DebuggingResource">;
diff --git a/flang/include/flang/Optimizer/Dialect/FIROps.td b/flang/include/flang/Optimizer/Dialect/FIROps.td
index 37fbd1f9692a..e7da3af5485c 100644
--- a/flang/include/flang/Optimizer/Dialect/FIROps.td
+++ b/flang/include/flang/Optimizer/Dialect/FIROps.td
@@ -2125,8 +2125,8 @@ class region_Op<string mnemonic, list<Trait> traits = []> :
let hasVerifier = 1;
}
-def fir_DoLoopOp : region_Op<"do_loop",
- [DeclareOpInterfaceMethods<LoopLikeOpInterface,
+def fir_DoLoopOp : region_Op<"do_loop", [AttrSizedOperandSegments,
+ DeclareOpInterfaceMethods<LoopLikeOpInterface,
["getYieldedValuesMutable"]>]> {
let summary = "generalized loop operation";
let description = [{
@@ -2156,9 +2156,11 @@ def fir_DoLoopOp : region_Op<"do_loop",
Index:$lowerBound,
Index:$upperBound,
Index:$step,
+ Variadic<AnyType>:$reduceOperands,
Variadic<AnyType>:$initArgs,
OptionalAttr<UnitAttr>:$unordered,
- OptionalAttr<UnitAttr>:$finalValue
+ OptionalAttr<UnitAttr>:$finalValue,
+ OptionalAttr<ArrayAttr>:$reduceAttrs
);
let results = (outs Variadic<AnyType>:$results);
let regions = (region SizedRegion<1>:$region);
@@ -2169,6 +2171,8 @@ def fir_DoLoopOp : region_Op<"do_loop",
"mlir::Value":$step, CArg<"bool", "false">:$unordered,
CArg<"bool", "false">:$finalCountValue,
CArg<"mlir::ValueRange", "std::nullopt">:$iterArgs,
+ CArg<"mlir::ValueRange", "std::nullopt">:$reduceOperands,
+ CArg<"llvm::ArrayRef<mlir::Attribute>", "{}">:$reduceAttrs,
CArg<"llvm::ArrayRef<mlir::NamedAttribute>", "{}">:$attributes)>
];
@@ -2181,11 +2185,12 @@ def fir_DoLoopOp : region_Op<"do_loop",
return getBody()->getArguments().drop_front();
}
mlir::Operation::operand_range getIterOperands() {
- return getOperands().drop_front(getNumControlOperands());
+ return getOperands()
+ .drop_front(getNumControlOperands() + getNumReduceOperands());
}
llvm::MutableArrayRef<mlir::OpOperand> getInitsMutable() {
- return
- getOperation()->getOpOperands().drop_front(getNumControlOperands());
+ return getOperation()->getOpOperands()
+ .drop_front(getNumControlOperands() + getNumReduceOperands());
}
void setLowerBound(mlir::Value bound) { (*this)->setOperand(0, bound); }
@@ -2200,11 +2205,25 @@ def fir_DoLoopOp : region_Op<"do_loop",
unsigned getNumControlOperands() { return 3; }
/// Does the operation hold operands for loop-carried values
bool hasIterOperands() {
- return (*this)->getNumOperands() > getNumControlOperands();
+ return getNumIterOperands() > 0;
+ }
+ /// Does the operation hold operands for reduction variables
+ bool hasReduceOperands() {
+ return getNumReduceOperands() > 0;
+ }
+ /// Get Number of variadic operands
+ unsigned getNumOperands(unsigned idx) {
+ auto segments = (*this)->getAttrOfType<mlir::DenseI32ArrayAttr>(
+ getOperandSegmentSizeAttr());
+ return static_cast<unsigned>(segments[idx]);
+ }
+ // Get Number of reduction operands
+ unsigned getNumReduceOperands() {
+ return getNumOperands(3);
}
/// Get Number of loop-carried values
unsigned getNumIterOperands() {
- return (*this)->getNumOperands() - getNumControlOperands();
+ return getNumOperands(4);
}
/// Get the body of the loop
diff --git a/flang/include/flang/Optimizer/Transforms/Passes.h b/flang/include/flang/Optimizer/Transforms/Passes.h
index a7ba704fdb39..2d43f4d4c55b 100644
--- a/flang/include/flang/Optimizer/Transforms/Passes.h
+++ b/flang/include/flang/Optimizer/Transforms/Passes.h
@@ -60,9 +60,6 @@ std::unique_ptr<mlir::Pass> createAffineDemotionPass();
std::unique_ptr<mlir::Pass>
createArrayValueCopyPass(fir::ArrayValueCopyOptions options = {});
std::unique_ptr<mlir::Pass> createCFGConversionPassWithNSW();
-std::unique_ptr<mlir::Pass> createExternalNameConversionPass();
-std::unique_ptr<mlir::Pass>
-createExternalNameConversionPass(bool appendUnderscore);
std::unique_ptr<mlir::Pass> createMemDataFlowOptPass();
std::unique_ptr<mlir::Pass> createPromoteToAffinePass();
std::unique_ptr<mlir::Pass>
diff --git a/flang/include/flang/Optimizer/Transforms/Passes.td b/flang/include/flang/Optimizer/Transforms/Passes.td
index 82638200e5e2..cac590a8da00 100644
--- a/flang/include/flang/Optimizer/Transforms/Passes.td
+++ b/flang/include/flang/Optimizer/Transforms/Passes.td
@@ -163,7 +163,6 @@ def ExternalNameConversion : Pass<"external-name-interop", "mlir::ModuleOp"> {
let description = [{
Demangle FIR internal name and mangle them for external interoperability.
}];
- let constructor = "::fir::createExternalNameConversionPass()";
let options = [
Option<"appendUnderscoreOpt", "append-underscore",
"bool", /*default=*/"true",
diff --git a/flang/include/flang/Runtime/command.h b/flang/include/flang/Runtime/command.h
index c67d171c8e2f..7ab3f6442dcf 100644
--- a/flang/include/flang/Runtime/command.h
+++ b/flang/include/flang/Runtime/command.h
@@ -55,6 +55,10 @@ std::int32_t RTNAME(GetEnvVariable)(const Descriptor &name,
const Descriptor *value = nullptr, const Descriptor *length = nullptr,
bool trim_name = true, const Descriptor *errmsg = nullptr,
const char *sourceFile = nullptr, int line = 0);
+
+// Calls getcwd()
+std::int32_t RTNAME(GetCwd)(
+ const Descriptor &cwd, const char *sourceFile, int line);
}
} // namespace Fortran::runtime
diff --git a/flang/include/flang/Runtime/magic-numbers.h b/flang/include/flang/Runtime/magic-numbers.h
index 38ccc5e7d3df..1cded1fd6323 100644
--- a/flang/include/flang/Runtime/magic-numbers.h
+++ b/flang/include/flang/Runtime/magic-numbers.h
@@ -69,6 +69,11 @@ Additional status code for a bad pointer DEALLOCATE.
#define FORTRAN_RUNTIME_STAT_BAD_POINTER_DEALLOCATION 110
#if 0
+Status codes for GETCWD.
+#endif
+#define FORTRAN_RUNTIME_STAT_MISSING_CWD 111
+
+#if 0
ieee_class_type values
The sequence is that of F18 Clause 17.2p3, but nothing depends on that.
#endif
diff --git a/flang/include/flang/Tools/CLOptions.inc b/flang/include/flang/Tools/CLOptions.inc
index c5c35e9a6a33..d0399d65f565 100644
--- a/flang/include/flang/Tools/CLOptions.inc
+++ b/flang/include/flang/Tools/CLOptions.inc
@@ -233,9 +233,8 @@ inline void addBoxedProcedurePass(mlir::PassManager &pm) {
inline void addExternalNameConversionPass(
mlir::PassManager &pm, bool appendUnderscore = true) {
- addPassConditionally(pm, disableExternalNameConversion, [&]() {
- return fir::createExternalNameConversionPass(appendUnderscore);
- });
+ addPassConditionally(pm, disableExternalNameConversion,
+ [&]() { return fir::createExternalNameConversion({appendUnderscore}); });
}
// Use inliner extension point callback to register the default inliner pass.
diff --git a/flang/lib/Evaluate/intrinsics.cpp b/flang/lib/Evaluate/intrinsics.cpp
index 12d13422574b..58c1b6989f49 100644
--- a/flang/lib/Evaluate/intrinsics.cpp
+++ b/flang/lib/Evaluate/intrinsics.cpp
@@ -514,6 +514,10 @@ static const IntrinsicInterface genericIntrinsicFunction[]{
{"gamma", {{"x", SameReal}}, SameReal},
{"get_team", {{"level", DefaultInt, Rank::scalar, Optionality::optional}},
TeamType, Rank::scalar, IntrinsicClass::transformationalFunction},
+ {"getcwd",
+ {{"c", DefaultChar, Rank::scalar, Optionality::required,
+ common::Intent::Out}},
+ TypePattern{IntType, KindCode::greaterOrEqualToKind, 4}},
{"getpid", {}, DefaultInt},
{"huge",
{{"x", SameIntOrReal, Rank::anyOrAssumedRank, Optionality::required,
@@ -1406,6 +1410,12 @@ static const IntrinsicInterface intrinsicSubroutine[]{
{"errmsg", DefaultChar, Rank::scalar, Optionality::optional,
common::Intent::InOut}},
{}, Rank::elemental, IntrinsicClass::impureSubroutine},
+ {"getcwd",
+ {{"c", DefaultChar, Rank::scalar, Optionality::required,
+ common::Intent::Out},
+ {"status", TypePattern{IntType, KindCode::greaterOrEqualToKind, 4},
+ Rank::scalar, Optionality::optional, common::Intent::Out}},
+ {}, Rank::elemental, IntrinsicClass::impureSubroutine},
{"move_alloc",
{{"from", SameType, Rank::known, Optionality::required,
common::Intent::InOut},
@@ -2574,7 +2584,7 @@ bool IntrinsicProcTable::Implementation::IsDualIntrinsic(
const std::string &name) const {
// Collection for some intrinsics with function and subroutine form,
// in order to pass the semantic check.
- static const std::string dualIntrinsic[]{{"etime"}};
+ static const std::string dualIntrinsic[]{{"etime"}, {"getcwd"}};
return std::find_if(std::begin(dualIntrinsic), std::end(dualIntrinsic),
[&name](const std::string &dualName) {
diff --git a/flang/lib/Lower/OpenMP/Clauses.h b/flang/lib/Lower/OpenMP/Clauses.h
index f7cd0ea83ad1..98fb5dcf7722 100644
--- a/flang/lib/Lower/OpenMP/Clauses.h
+++ b/flang/lib/Lower/OpenMP/Clauses.h
@@ -36,30 +36,64 @@ struct TypeTy : public evaluate::SomeType {
bool operator==(const TypeTy &t) const { return true; }
};
-using IdTy = semantics::Symbol *;
+template <typename ExprTy>
+struct IdTyTemplate {
+ // "symbol" is always non-null for id's of actual objects.
+ Fortran::semantics::Symbol *symbol;
+ std::optional<ExprTy> designator;
+
+ bool operator==(const IdTyTemplate &other) const {
+ // If symbols are different, then the objects are different.
+ if (symbol != other.symbol)
+ return false;
+ if (symbol == nullptr)
+ return true;
+ // Equal symbols don't necessarily indicate identical objects,
+ // for example, a derived object component may use a single symbol,
+ // which will refer to different objects for different designators,
+ // e.g. a%c and b%c.
+ return designator == other.designator;
+ }
+
+ operator bool() const { return symbol != nullptr; }
+};
+
using ExprTy = SomeExpr;
template <typename T>
using List = tomp::ListT<T>;
} // namespace Fortran::lower::omp
+// Specialization of the ObjectT template
namespace tomp::type {
template <>
-struct ObjectT<Fortran::lower::omp::IdTy, Fortran::lower::omp::ExprTy> {
- using IdTy = Fortran::lower::omp::IdTy;
+struct ObjectT<Fortran::lower::omp::IdTyTemplate<Fortran::lower::omp::ExprTy>,
+ Fortran::lower::omp::ExprTy> {
+ using IdTy = Fortran::lower::omp::IdTyTemplate<Fortran::lower::omp::ExprTy>;
using ExprTy = Fortran::lower::omp::ExprTy;
- IdTy id() const { return symbol; }
- Fortran::semantics::Symbol *sym() const { return symbol; }
- const std::optional<ExprTy> &ref() const { return designator; }
+ IdTy id() const { return identity; }
+ Fortran::semantics::Symbol *sym() const { return identity.symbol; }
+ const std::optional<ExprTy> &ref() const { return identity.designator; }
- IdTy symbol;
- std::optional<ExprTy> designator;
+ IdTy identity;
};
} // namespace tomp::type
namespace Fortran::lower::omp {
+using IdTy = IdTyTemplate<ExprTy>;
+}
+namespace std {
+template <>
+struct hash<Fortran::lower::omp::IdTy> {
+ size_t operator()(const Fortran::lower::omp::IdTy &id) const {
+ return static_cast<size_t>(reinterpret_cast<uintptr_t>(id.symbol));
+ }
+};
+} // namespace std
+
+namespace Fortran::lower::omp {
using Object = tomp::ObjectT<IdTy, ExprTy>;
using ObjectList = tomp::ObjectListT<IdTy, ExprTy>;
diff --git a/flang/lib/Lower/OpenMP/DataSharingProcessor.h b/flang/lib/Lower/OpenMP/DataSharingProcessor.h
index 80a956de35ba..fb340e6fdb10 100644
--- a/flang/lib/Lower/OpenMP/DataSharingProcessor.h
+++ b/flang/lib/Lower/OpenMP/DataSharingProcessor.h
@@ -44,7 +44,9 @@ private:
void Post(const T &) {}
bool Pre(const parser::OpenMPConstruct &omp) {
- currentConstruct = &omp;
+ // Skip constructs that may not have privatizations.
+ if (!std::holds_alternative<parser::OpenMPCriticalConstruct>(omp.u))
+ currentConstruct = &omp;
return true;
}
diff --git a/flang/lib/Lower/OpenMP/Utils.cpp b/flang/lib/Lower/OpenMP/Utils.cpp
index eff915f569f2..da94352a84a7 100644
--- a/flang/lib/Lower/OpenMP/Utils.cpp
+++ b/flang/lib/Lower/OpenMP/Utils.cpp
@@ -188,7 +188,7 @@ void addChildIndexAndMapToParent(
std::map<const semantics::Symbol *,
llvm::SmallVector<OmpMapMemberIndicesData>> &parentMemberIndices,
mlir::omp::MapInfoOp &mapOp, semantics::SemanticsContext &semaCtx) {
- std::optional<evaluate::DataRef> dataRef = ExtractDataRef(object.designator);
+ std::optional<evaluate::DataRef> dataRef = ExtractDataRef(object.ref());
assert(dataRef.has_value() &&
"DataRef could not be extracted during mapping of derived type "
"cannot proceed");
diff --git a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
index 072899751bc8..d3f6fa16ac80 100644
--- a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
+++ b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
@@ -280,6 +280,10 @@ static constexpr IntrinsicHandler handlers[]{
{"trim_name", asAddr, handleDynamicOptional},
{"errmsg", asBox, handleDynamicOptional}}},
/*isElemental=*/false},
+ {"getcwd",
+ &I::genGetCwd,
+ {{{"c", asBox}, {"status", asAddr, handleDynamicOptional}}},
+ /*isElemental=*/false},
{"getpid", &I::genGetPID},
{"iachar", &I::genIchar},
{"iall",
@@ -3476,6 +3480,37 @@ mlir::Value IntrinsicLibrary::genFraction(mlir::Type resultType,
fir::runtime::genFraction(builder, loc, fir::getBase(args[0])));
}
+// GETCWD
+fir::ExtendedValue
+IntrinsicLibrary::genGetCwd(std::optional<mlir::Type> resultType,
+ llvm::ArrayRef<fir::ExtendedValue> args) {
+ assert((args.size() == 1 && resultType.has_value()) ||
+ (args.size() >= 1 && !resultType.has_value()));
+
+ mlir::Value cwd = fir::getBase(args[0]);
+ mlir::Value statusValue = fir::runtime::genGetCwd(builder, loc, cwd);
+
+ if (resultType.has_value()) {
+ // Function form, return status.
+ return statusValue;
+ } else {
+ // Subroutine form, store status and return none.
+ const fir::ExtendedValue &status = args[1];
+ if (!isStaticallyAbsent(status)) {
+ mlir::Value statusAddr = fir::getBase(status);
+ mlir::Value statusIsPresentAtRuntime =
+ builder.genIsNotNullAddr(loc, statusAddr);
+ builder.genIfThen(loc, statusIsPresentAtRuntime)
+ .genThen([&]() {
+ builder.createStoreWithConvert(loc, statusValue, statusAddr);
+ })
+ .end();
+ }
+ }
+
+ return {};
+}
+
// GET_COMMAND
void IntrinsicLibrary::genGetCommand(llvm::ArrayRef<fir::ExtendedValue> args) {
assert(args.size() == 4);
diff --git a/flang/lib/Optimizer/Builder/Runtime/Command.cpp b/flang/lib/Optimizer/Builder/Runtime/Command.cpp
index 1d719e7bbd9a..8320d89493b3 100644
--- a/flang/lib/Optimizer/Builder/Runtime/Command.cpp
+++ b/flang/lib/Optimizer/Builder/Runtime/Command.cpp
@@ -88,3 +88,16 @@ mlir::Value fir::runtime::genGetEnvVariable(fir::FirOpBuilder &builder,
sourceFile, sourceLine);
return builder.create<fir::CallOp>(loc, runtimeFunc, args).getResult(0);
}
+
+mlir::Value fir::runtime::genGetCwd(fir::FirOpBuilder &builder,
+ mlir::Location loc, mlir::Value cwd) {
+ mlir::func::FuncOp func =
+ fir::runtime::getRuntimeFunc<mkRTKey(GetCwd)>(loc, builder);
+ auto runtimeFuncTy = func.getFunctionType();
+ mlir::Value sourceFile = fir::factory::locationToFilename(builder, loc);
+ mlir::Value sourceLine =
+ fir::factory::locationToLineNo(builder, loc, runtimeFuncTy.getInput(2));
+ llvm::SmallVector<mlir::Value> args = fir::runtime::createArguments(
+ builder, loc, runtimeFuncTy, cwd, sourceFile, sourceLine);
+ return builder.create<fir::CallOp>(loc, func, args).getResult(0);
+}
diff --git a/flang/lib/Optimizer/Dialect/FIRAttr.cpp b/flang/lib/Optimizer/Dialect/FIRAttr.cpp
index 2faba63dfba0..a0202a015922 100644
--- a/flang/lib/Optimizer/Dialect/FIRAttr.cpp
+++ b/flang/lib/Optimizer/Dialect/FIRAttr.cpp
@@ -297,6 +297,6 @@ void fir::printFirAttribute(FIROpsDialect *dialect, mlir::Attribute attr,
void FIROpsDialect::registerAttributes() {
addAttributes<ClosedIntervalAttr, ExactTypeAttr, FortranVariableFlagsAttr,
- LowerBoundAttr, PointIntervalAttr, RealAttr, SubclassAttr,
- UpperBoundAttr>();
+ LowerBoundAttr, PointIntervalAttr, RealAttr, ReduceAttr,
+ SubclassAttr, UpperBoundAttr>();
}
diff --git a/flang/lib/Optimizer/Dialect/FIROps.cpp b/flang/lib/Optimizer/Dialect/FIROps.cpp
index b530a9dc1bcc..75ca738211ab 100644
--- a/flang/lib/Optimizer/Dialect/FIROps.cpp
+++ b/flang/lib/Optimizer/Dialect/FIROps.cpp
@@ -2456,9 +2456,16 @@ void fir::DoLoopOp::build(mlir::OpBuilder &builder,
mlir::OperationState &result, mlir::Value lb,
mlir::Value ub, mlir::Value step, bool unordered,
bool finalCountValue, mlir::ValueRange iterArgs,
+ mlir::ValueRange reduceOperands,
+ llvm::ArrayRef<mlir::Attribute> reduceAttrs,
llvm::ArrayRef<mlir::NamedAttribute> attributes) {
result.addOperands({lb, ub, step});
+ result.addOperands(reduceOperands);
result.addOperands(iterArgs);
+ result.addAttribute(getOperandSegmentSizeAttr(),
+ builder.getDenseI32ArrayAttr(
+ {1, 1, 1, static_cast<int32_t>(reduceOperands.size()),
+ static_cast<int32_t>(iterArgs.size())}));
if (finalCountValue) {
result.addTypes(builder.getIndexType());
result.addAttribute(getFinalValueAttrName(result.name),
@@ -2477,6 +2484,9 @@ void fir::DoLoopOp::build(mlir::OpBuilder &builder,
if (unordered)
result.addAttribute(getUnorderedAttrName(result.name),
builder.getUnitAttr());
+ if (!reduceAttrs.empty())
+ result.addAttribute(getReduceAttrsAttrName(result.name),
+ builder.getArrayAttr(reduceAttrs));
result.addAttributes(attributes);
}
@@ -2502,24 +2512,51 @@ mlir::ParseResult fir::DoLoopOp::parse(mlir::OpAsmParser &parser,
if (mlir::succeeded(parser.parseOptionalKeyword("unordered")))
result.addAttribute("unordered", builder.getUnitAttr());
+ // Parse the reduction arguments.
+ llvm::SmallVector<mlir::OpAsmParser::UnresolvedOperand> reduceOperands;
+ llvm::SmallVector<mlir::Type> reduceArgTypes;
+ if (succeeded(parser.parseOptionalKeyword("reduce"))) {
+ // Parse reduction attributes and variables.
+ llvm::SmallVector<ReduceAttr> attributes;
+ if (failed(parser.parseCommaSeparatedList(
+ mlir::AsmParser::Delimiter::Paren, [&]() {
+ if (parser.parseAttribute(attributes.emplace_back()) ||
+ parser.parseArrow() ||
+ parser.parseOperand(reduceOperands.emplace_back()) ||
+ parser.parseColonType(reduceArgTypes.emplace_back()))
+ return mlir::failure();
+ return mlir::success();
+ })))
+ return mlir::failure();
+ // Resolve input operands.
+ for (auto operand_type : llvm::zip(reduceOperands, reduceArgTypes))
+ if (parser.resolveOperand(std::get<0>(operand_type),
+ std::get<1>(operand_type), result.operands))
+ return mlir::failure();
+ llvm::SmallVector<mlir::Attribute> arrayAttr(attributes.begin(),
+ attributes.end());
+ result.addAttribute(getReduceAttrsAttrName(result.name),
+ builder.getArrayAttr(arrayAttr));
+ }
+
// Parse the optional initial iteration arguments.
llvm::SmallVector<mlir::OpAsmParser::Argument> regionArgs;
- llvm::SmallVector<mlir::OpAsmParser::UnresolvedOperand> operands;
+ llvm::SmallVector<mlir::OpAsmParser::UnresolvedOperand> iterOperands;
llvm::SmallVector<mlir::Type> argTypes;
bool prependCount = false;
regionArgs.push_back(inductionVariable);
if (succeeded(parser.parseOptionalKeyword("iter_args"))) {
// Parse assignment list and results type list.
- if (parser.parseAssignmentList(regionArgs, operands) ||
+ if (parser.parseAssignmentList(regionArgs, iterOperands) ||
parser.parseArrowTypeList(result.types))
return mlir::failure();
- if (result.types.size() == operands.size() + 1)
+ if (result.types.size() == iterOperands.size() + 1)
prependCount = true;
// Resolve input operands.
llvm::ArrayRef<mlir::Type> resTypes = result.types;
- for (auto operand_type :
- llvm::zip(operands, prependCount ? resTypes.drop_front() : resTypes))
+ for (auto operand_type : llvm::zip(
+ iterOperands, prependCount ? resTypes.drop_front() : resTypes))
if (parser.resolveOperand(std::get<0>(operand_type),
std::get<1>(operand_type), result.operands))
return mlir::failure();
@@ -2530,6 +2567,12 @@ mlir::ParseResult fir::DoLoopOp::parse(mlir::OpAsmParser &parser,
prependCount = true;
}
+ // Set the operandSegmentSizes attribute
+ result.addAttribute(getOperandSegmentSizeAttr(),
+ builder.getDenseI32ArrayAttr(
+ {1, 1, 1, static_cast<int32_t>(reduceOperands.size()),
+ static_cast<int32_t>(iterOperands.size())}));
+
if (parser.parseOptionalAttrDictWithKeyword(result.attributes))
return mlir::failure();
@@ -2606,6 +2649,10 @@ mlir::LogicalResult fir::DoLoopOp::verify() {
i++;
}
+ auto reduceAttrs = getReduceAttrsAttr();
+ if (getNumReduceOperands() != (reduceAttrs ? reduceAttrs.size() : 0))
+ return emitOpError(
+ "mismatch in number of reduction variables and reduction attributes");
return mlir::success();
}
@@ -2615,6 +2662,17 @@ void fir::DoLoopOp::print(mlir::OpAsmPrinter &p) {
<< getUpperBound() << " step " << getStep();
if (getUnordered())
p << " unordered";
+ if (hasReduceOperands()) {
+ p << " reduce(";
+ auto attrs = getReduceAttrsAttr();
+ auto operands = getReduceOperands();
+ llvm::interleaveComma(llvm::zip(attrs, operands), p, [&](auto it) {
+ p << std::get<0>(it) << " -> " << std::get<1>(it) << " : "
+ << std::get<1>(it).getType();
+ });
+ p << ')';
+ printBlockTerminators = true;
+ }
if (hasIterOperands()) {
p << " iter_args(";
auto regionArgs = getRegionIterArgs();
@@ -2628,8 +2686,9 @@ void fir::DoLoopOp::print(mlir::OpAsmPrinter &p) {
p << " -> " << getResultTypes();
printBlockTerminators = true;
}
- p.printOptionalAttrDictWithKeyword((*this)->getAttrs(),
- {"unordered", "finalValue"});
+ p.printOptionalAttrDictWithKeyword(
+ (*this)->getAttrs(),
+ {"unordered", "finalValue", "reduceAttrs", "operandSegmentSizes"});
p << ' ';
p.printRegion(getRegion(), /*printEntryBlockArgs=*/false,
printBlockTerminators);
diff --git a/flang/lib/Optimizer/Transforms/ExternalNameConversion.cpp b/flang/lib/Optimizer/Transforms/ExternalNameConversion.cpp
index b265c74c33dd..648628fd1c9a 100644
--- a/flang/lib/Optimizer/Transforms/ExternalNameConversion.cpp
+++ b/flang/lib/Optimizer/Transforms/ExternalNameConversion.cpp
@@ -45,17 +45,11 @@ namespace {
class ExternalNameConversionPass
: public fir::impl::ExternalNameConversionBase<ExternalNameConversionPass> {
public:
- ExternalNameConversionPass(bool appendUnderscoring)
- : appendUnderscores(appendUnderscoring) {}
-
- ExternalNameConversionPass() { usePassOpt = true; }
+ using ExternalNameConversionBase<
+ ExternalNameConversionPass>::ExternalNameConversionBase;
mlir::ModuleOp getModule() { return getOperation(); }
void runOnOperation() override;
-
-private:
- bool appendUnderscores;
- bool usePassOpt = false;
};
} // namespace
@@ -63,7 +57,6 @@ void ExternalNameConversionPass::runOnOperation() {
auto op = getOperation();
auto *context = &getContext();
- appendUnderscores = (usePassOpt) ? appendUnderscoreOpt : appendUnderscores;
llvm::DenseMap<mlir::StringAttr, mlir::FlatSymbolRefAttr> remappings;
// Update names of external Fortran functions and names of Common Block
// globals.
@@ -74,7 +67,8 @@ void ExternalNameConversionPass::runOnOperation() {
mlir::SymbolTable::getSymbolAttrName());
auto deconstructedName = fir::NameUniquer::deconstruct(symName);
if (fir::NameUniquer::isExternalFacingUniquedName(deconstructedName)) {
- auto newName = mangleExternalName(deconstructedName, appendUnderscores);
+ auto newName =
+ mangleExternalName(deconstructedName, appendUnderscoreOpt);
auto newAttr = mlir::StringAttr::get(context, newName);
mlir::SymbolTable::setSymbolName(&funcOrGlobal, newAttr);
auto newSymRef = mlir::FlatSymbolRefAttr::get(newAttr);
@@ -101,12 +95,3 @@ void ExternalNameConversionPass::runOnOperation() {
nestedOp->setAttr(update.first, update.second);
});
}
-
-std::unique_ptr<mlir::Pass> fir::createExternalNameConversionPass() {
- return std::make_unique<ExternalNameConversionPass>();
-}
-
-std::unique_ptr<mlir::Pass>
-fir::createExternalNameConversionPass(bool appendUnderscoring) {
- return std::make_unique<ExternalNameConversionPass>(appendUnderscoring);
-}
diff --git a/flang/runtime/command.cpp b/flang/runtime/command.cpp
index b573c5dfd797..e642248a25e6 100644
--- a/flang/runtime/command.cpp
+++ b/flang/runtime/command.cpp
@@ -17,12 +17,19 @@
#ifdef _WIN32
#include "flang/Common/windows-include.h"
+#include <direct.h>
+#define getcwd _getcwd
+#define PATH_MAX MAX_PATH
// On Windows GetCurrentProcessId returns a DWORD aka uint32_t
#include <processthreadsapi.h>
inline pid_t getpid() { return GetCurrentProcessId(); }
#else
#include <unistd.h> //getpid()
+
+#ifndef PATH_MAX
+#define PATH_MAX 4096
+#endif
#endif
namespace Fortran::runtime {
@@ -239,4 +246,23 @@ std::int32_t RTNAME(GetEnvVariable)(const Descriptor &name,
return StatOk;
}
+std::int32_t RTNAME(GetCwd)(
+ const Descriptor &cwd, const char *sourceFile, int line) {
+ Terminator terminator{sourceFile, line};
+
+ RUNTIME_CHECK(terminator, IsValidCharDescriptor(&cwd));
+
+ char *buf{(char *)AllocateMemoryOrCrash(terminator, PATH_MAX)};
+
+ if (!getcwd(buf, PATH_MAX)) {
+ return StatMissingCurrentWorkDirectory;
+ }
+
+ std::int64_t strLen{StringLength(buf)};
+ std::int32_t status{CopyCharsToDescriptor(cwd, buf, strLen)};
+
+ std::free(buf);
+ return status;
+}
+
} // namespace Fortran::runtime
diff --git a/flang/runtime/stat.h b/flang/runtime/stat.h
index 4f46f52ecb29..71faeb027d90 100644
--- a/flang/runtime/stat.h
+++ b/flang/runtime/stat.h
@@ -41,6 +41,7 @@ enum Stat {
StatLocked = FORTRAN_RUNTIME_STAT_LOCKED,
StatLockedOtherImage = FORTRAN_RUNTIME_STAT_LOCKED_OTHER_IMAGE,
StatMissingEnvVariable = FORTRAN_RUNTIME_STAT_MISSING_ENV_VAR,
+ StatMissingCurrentWorkDirectory = FORTRAN_RUNTIME_STAT_MISSING_CWD,
StatStoppedImage = FORTRAN_RUNTIME_STAT_STOPPED_IMAGE,
StatUnlocked = FORTRAN_RUNTIME_STAT_UNLOCKED,
StatUnlockedFailedImage = FORTRAN_RUNTIME_STAT_UNLOCKED_FAILED_IMAGE,
diff --git a/flang/test/Fir/loop03.fir b/flang/test/Fir/loop03.fir
new file mode 100644
index 000000000000..b88dcaf8639b
--- /dev/null
+++ b/flang/test/Fir/loop03.fir
@@ -0,0 +1,17 @@
+// Test the reduction semantics of fir.do_loop
+// RUN: fir-opt %s | FileCheck %s
+
+func.func @reduction() {
+ %bound = arith.constant 10 : index
+ %step = arith.constant 1 : index
+ %sum = fir.alloca i32
+// CHECK: %[[VAL_0:.*]] = fir.alloca i32
+// CHECK: fir.do_loop %[[VAL_1:.*]] = %[[VAL_2:.*]] to %[[VAL_3:.*]] step %[[VAL_4:.*]] unordered reduce(#fir.reduce_attr<add> -> %[[VAL_0]] : !fir.ref<i32>) {
+ fir.do_loop %iv = %step to %bound step %step unordered reduce(#fir.reduce_attr<add> -> %sum : !fir.ref<i32>) {
+ %index = fir.convert %iv : (index) -> i32
+ %1 = fir.load %sum : !fir.ref<i32>
+ %2 = arith.addi %index, %1 : i32
+ fir.store %2 to %sum : !fir.ref<i32>
+ }
+ return
+}
diff --git a/flang/test/Lower/Intrinsics/getcwd-function.f90 b/flang/test/Lower/Intrinsics/getcwd-function.f90
new file mode 100644
index 000000000000..50b64729294f
--- /dev/null
+++ b/flang/test/Lower/Intrinsics/getcwd-function.f90
@@ -0,0 +1,23 @@
+! Test GETCWD with dynamically optional arguments.
+! RUN: bbc -emit-fir %s -o - | FileCheck %s
+
+! CHECK-LABEL: func.func @_QPtest(
+! CHECK-SAME: %[[cwdArg:.*]]: !fir.boxchar<1> {fir.bindc_name = "cwd"}) -> i32 {
+integer function test(cwd)
+ CHARACTER(len=255) :: cwd
+ test = getcwd(cwd)
+ ! CHECK-NEXT: %[[c8:.*]] = arith.constant 8 : i32
+ ! CHECK-NEXT: %[[c255:.*]] = arith.constant 255 : index
+ ! CHECK-NEXT: %[[DSCOPE:.*]] = fir.dummy_scope : !fir.dscope
+ ! CHECK-NEXT: %[[cwdUnbox:.*]]:2 = fir.unboxchar %[[cwdArg]] : (!fir.boxchar<1>) -> (!fir.ref<!fir.char<1,?>>, index)
+ ! CHECK-NEXT: %[[cwdCast:.*]] = fir.convert %[[cwdUnbox]]#0 : (!fir.ref<!fir.char<1,?>>) -> !fir.ref<!fir.char<1,255>>
+ ! CHECK-NEXT: %[[cwdDeclare:.*]] = fir.declare %[[cwdCast]] typeparams %[[c255]] dummy_scope %[[DSCOPE]] {uniq_name = "_QFtestEcwd"} : (!fir.ref<!fir.char<1,255>>, index, !fir.dscope) -> !fir.ref<!fir.char<1,255>>
+ ! CHECK-NEXT: %[[test:.*]] = fir.alloca i32 {bindc_name = "test", uniq_name = "_QFtestEtest"}
+ ! CHECK-NEXT: %[[testAddr:.*]] = fir.declare %[[test]] {uniq_name = "_QFtestEtest"} : (!fir.ref<i32>) -> !fir.ref<i32>
+ ! CHECK-NEXT: %[[cwdBox:.*]] = fir.embox %[[cwdDeclare]] : (!fir.ref<!fir.char<1,255>>) -> !fir.box<!fir.char<1,255>>
+ ! CHECK: %[[cwd:.*]] = fir.convert %[[cwdBox]] : (!fir.box<!fir.char<1,255>>) -> !fir.box<none>
+ ! CHECK: %[[statusValue:.*]] = fir.call @_FortranAGetCwd(%[[cwd]], %[[VAL_9:.*]], %[[c8]]) fastmath<contract> : (!fir.box<none>, !fir.ref<i8>, i32) -> i32
+ ! CHECK-NEXT: fir.store %[[statusValue]] to %[[testAddr]] : !fir.ref<i32>
+ ! CHECK-NEXT: %[[returnValue:.*]] = fir.load %[[testAddr]] : !fir.ref<i32>
+ ! CHECK-NEXT: return %[[returnValue]] : i32
+end function
diff --git a/flang/test/Lower/Intrinsics/getcwd-optional.f90 b/flang/test/Lower/Intrinsics/getcwd-optional.f90
new file mode 100644
index 000000000000..3e2a221f0c3f
--- /dev/null
+++ b/flang/test/Lower/Intrinsics/getcwd-optional.f90
@@ -0,0 +1,29 @@
+! Test GETCWD with dynamically optional arguments.
+! RUN: bbc -emit-fir %s -o - | FileCheck %s
+
+
+! CHECK-LABEL: func.func @_QPtest(
+! CHECK-SAME: %[[cwdArg:.*]]: !fir.boxchar<1> {fir.bindc_name = "cwd"},
+! CHECK-SAME: %[[statusArg:.*]]: !fir.ref<i32> {fir.bindc_name = "status", fir.optional}) {
+subroutine test(cwd, status)
+ CHARACTER(len=255) :: cwd
+ INTEGER, OPTIONAL :: status
+ call getcwd(cwd, status)
+ ! CHECK-NEXT: %[[c0:.*]] = arith.constant 0 : i64
+ ! CHECK-NEXT: %[[c11:.*]] = arith.constant 11 : i32
+ ! CHECK-NEXT: %[[c255:.*]] = arith.constant 255 : index
+ ! CHECK-NEXT: %[[DSCOPE:.*]] = fir.dummy_scope : !fir.dscope
+ ! CHECK-NEXT: %[[cwdUnbox:.*]]:2 = fir.unboxchar %[[cwdArg]] : (!fir.boxchar<1>) -> (!fir.ref<!fir.char<1,?>>, index)
+ ! CHECK-NEXT: %[[cwdCast:.*]] = fir.convert %[[cwdUnbox]]#0 : (!fir.ref<!fir.char<1,?>>) -> !fir.ref<!fir.char<1,255>>
+ ! CHECK-NEXT: %[[cwdDeclare:.*]] = fir.declare %[[cwdCast]] typeparams %[[c255]] dummy_scope %[[DSCOPE]] {uniq_name = "_QFtestEcwd"} : (!fir.ref<!fir.char<1,255>>, index, !fir.dscope) -> !fir.ref<!fir.char<1,255>>
+ ! CHECK-NEXT: %[[statusAddr:.*]] = fir.declare %[[statusArg]] dummy_scope %[[DSCOPE]] {fortran_attrs = #fir.var_attrs<optional>, uniq_name = "_QFtestEstatus"} : (!fir.ref<i32>, !fir.dscope) -> !fir.ref<i32>
+ ! CHECK-NEXT: %[[cwdBox:.*]] = fir.embox %[[cwdDeclare]] : (!fir.ref<!fir.char<1,255>>) -> !fir.box<!fir.char<1,255>>
+ ! CHECK: %[[cwd:.*]] = fir.convert %[[cwdBox]] : (!fir.box<!fir.char<1,255>>) -> !fir.box<none>
+ ! CHECK: %[[statusValue:.*]] = fir.call @_FortranAGetCwd(%[[cwd]], %[[VAL_8:.*]], %[[c11]]) fastmath<contract> : (!fir.box<none>, !fir.ref<i8>, i32) -> i32
+ ! CHECK-NEXT: %[[statusCast:.*]] = fir.convert %[[statusAddr]] : (!fir.ref<i32>) -> i64
+ ! CHECK-NEXT: %[[isPresent:.*]] = arith.cmpi ne, %[[statusCast]], %[[c0]] : i64
+ ! CHECK-NEXT: fir.if %[[isPresent]] {
+ ! CHECK-NEXT: fir.store %[[statusValue]] to %[[statusAddr]] : !fir.ref<i32>
+ ! CHECK-NEXT: }
+ ! CHECK-NEXT: return
+end subroutine
diff --git a/flang/test/Lower/Intrinsics/getcwd.f90 b/flang/test/Lower/Intrinsics/getcwd.f90
new file mode 100644
index 000000000000..fe207854aff0
--- /dev/null
+++ b/flang/test/Lower/Intrinsics/getcwd.f90
@@ -0,0 +1,44 @@
+! RUN: bbc -emit-fir %s -o - | FileCheck %s
+
+! CHECK-LABEL: func.func @_QPcwd_only(
+! CHECK-SAME: %[[cwdArg:.*]]: !fir.boxchar<1> {fir.bindc_name = "cwd"}) {
+subroutine cwd_only(cwd)
+ CHARACTER(len=255) :: cwd
+ call getcwd(cwd)
+ ! CHECK-NEXT: %[[c7:.*]] = arith.constant 7 : i32
+ ! CHECK-NEXT: %[[c255:.*]] = arith.constant 255 : index
+ ! CHECK-NEXT: %[[DSCOPE:.*]] = fir.dummy_scope : !fir.dscope
+ ! CHECK-NEXT: %[[cwdUnbox:.*]]:2 = fir.unboxchar %[[cwdArg]] : (!fir.boxchar<1>) -> (!fir.ref<!fir.char<1,?>>, index)
+ ! CHECK-NEXT: %[[cwdCast:.*]] = fir.convert %[[cwdUnbox]]#0 : (!fir.ref<!fir.char<1,?>>) -> !fir.ref<!fir.char<1,255>>
+ ! CHECK-NEXT: %[[cwdDeclare:.*]] = fir.declare %[[cwdCast]] typeparams %[[c255]] dummy_scope %[[DSCOPE]] {uniq_name = "_QFcwd_onlyEcwd"} : (!fir.ref<!fir.char<1,255>>, index, !fir.dscope) -> !fir.ref<!fir.char<1,255>>
+ ! CHECK-NEXT: %[[cwdBox:.*]] = fir.embox %[[cwdDeclare]] : (!fir.ref<!fir.char<1,255>>) -> !fir.box<!fir.char<1,255>>
+ ! CHECK: %[[cwd:.*]] = fir.convert %[[cwdBox]] : (!fir.box<!fir.char<1,255>>) -> !fir.box<none>
+ ! CHECK: %[[statusValue:.*]] = fir.call @_FortranAGetCwd(%[[cwd]], %[[VAL_7:.*]], %[[c7]]) fastmath<contract> : (!fir.box<none>, !fir.ref<i8>, i32) -> i32
+ ! CHECK-NEXT: return
+end subroutine cwd_only
+
+! CHECK-LABEL: func.func @_QPall_arguments(
+! CHECK-SAME: %[[cwdArg:.*]]: !fir.boxchar<1> {fir.bindc_name = "cwd"},
+! CHECK-SAME: %[[statusArg:.*]]: !fir.ref<i32> {fir.bindc_name = "status"}) {
+subroutine all_arguments(cwd, status)
+ CHARACTER(len=255) :: cwd
+ INTEGER :: status
+ call getcwd(cwd, status)
+ ! CHECK-NEXT: %[[c0:.*]] = arith.constant 0 : i64
+ ! CHECK-NEXT: %[[c26:.*]] = arith.constant 26 : i32
+ ! CHECK-NEXT: %[[c255:.*]] = arith.constant 255 : index
+ ! CHECK-NEXT: %[[DSCOPE:.*]] = fir.dummy_scope : !fir.dscope
+ ! CHECK-NEXT: %[[cwdUnbox:.*]]:2 = fir.unboxchar %[[cwdArg]] : (!fir.boxchar<1>) -> (!fir.ref<!fir.char<1,?>>, index)
+ ! CHECK-NEXT: %[[cwdCast:.*]] = fir.convert %[[cwdUnbox]]#0 : (!fir.ref<!fir.char<1,?>>) -> !fir.ref<!fir.char<1,255>>
+ ! CHECK-NEXT: %[[cwdDeclare:.*]] = fir.declare %[[cwdCast]] typeparams %[[c255]] dummy_scope %[[DSCOPE]] {uniq_name = "_QFall_argumentsEcwd"} : (!fir.ref<!fir.char<1,255>>, index, !fir.dscope) -> !fir.ref<!fir.char<1,255>>
+ ! CHECK-NEXT: %[[statusAddr:.*]] = fir.declare %[[statusArg]] dummy_scope %0 {uniq_name = "_QFall_argumentsEstatus"} : (!fir.ref<i32>, !fir.dscope) -> !fir.ref<i32>
+ ! CHECK-NEXT: %[[cwdBox:.*]] = fir.embox %[[cwdDeclare]] : (!fir.ref<!fir.char<1,255>>) -> !fir.box<!fir.char<1,255>>
+ ! CHECK: %[[cwd:.*]] = fir.convert %[[cwdBox]] : (!fir.box<!fir.char<1,255>>) -> !fir.box<none>
+ ! CHECK: %[[statusValue:.*]] = fir.call @_FortranAGetCwd(%[[cwd]], %[[VAL_8:.*]], %[[c26]]) fastmath<contract> : (!fir.box<none>, !fir.ref<i8>, i32) -> i32
+ ! CHECK-NEXT: %[[statusCast:.*]] = fir.convert %[[statusAddr]] : (!fir.ref<i32>) -> i64
+ ! CHECK-NEXT: %[[isPresent:.*]] = arith.cmpi ne, %[[statusCast]], %[[c0]] : i64
+ ! CHECK-NEXT: fir.if %[[isPresent]] {
+ ! CHECK-NEXT: fir.store %[[statusValue]] to %[[statusAddr]] : !fir.ref<i32>
+ ! CHECK-NEXT: }
+ ! CHECK-NEXT: return
+end subroutine all_arguments \ No newline at end of file
diff --git a/flang/test/Lower/OpenMP/critical.f90 b/flang/test/Lower/OpenMP/critical.f90
index d62c58b3081a..c52ae688811e 100644
--- a/flang/test/Lower/OpenMP/critical.f90
+++ b/flang/test/Lower/OpenMP/critical.f90
@@ -51,3 +51,27 @@ subroutine predetermined_privatization()
end do
!$omp end parallel do
end
+
+! https://github.com/llvm/llvm-project/issues/75767
+!CHECK-LABEL: func @_QPparallel_critical_privatization(
+subroutine parallel_critical_privatization()
+ integer :: i
+
+ !CHECK: %[[I:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFparallel_critical_privatizationEi"}
+ !CHECK: %[[I_DECL:.*]]:2 = hlfir.declare %[[I]] {uniq_name = "_QFparallel_critical_privatizationEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+ !CHECK: omp.parallel {
+ !CHECK: %[[PRIV_I:.*]] = fir.alloca i32 {bindc_name = "i", pinned, uniq_name = "_QFparallel_critical_privatizationEi"}
+ !CHECK: %[[PRIV_I_DECL:.*]]:2 = hlfir.declare %[[PRIV_I]] {uniq_name = "_QFparallel_critical_privatizationEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+ !CHECK: %[[TEMP:.*]] = fir.load %[[I_DECL]]#0 : !fir.ref<i32>
+ !CHECK: hlfir.assign %[[TEMP]] to %[[PRIV_I_DECL]]#0 temporary_lhs : i32, !fir.ref<i32>
+ !$omp parallel default(firstprivate)
+ !CHECK: omp.critical {
+ !$omp critical
+ !CHECK: %[[C200:.*]] = arith.constant 200 : i32
+ !CHECK: hlfir.assign %[[C200]] to %[[PRIV_I_DECL]]#0 : i32, !fir.ref<i32>
+ i = 200
+ !CHECK: }
+ !$omp end critical
+ !CHECK: }
+ !$omp end parallel
+end subroutine
diff --git a/flang/test/Lower/OpenMP/map-component-ref.f90 b/flang/test/Lower/OpenMP/map-component-ref.f90
index 2c582667f38d..21b56ab303ac 100644
--- a/flang/test/Lower/OpenMP/map-component-ref.f90
+++ b/flang/test/Lower/OpenMP/map-component-ref.f90
@@ -1,21 +1,22 @@
! RUN: %flang_fc1 -emit-hlfir -fopenmp %s -o - | FileCheck %s
! RUN: bbc -fopenmp -emit-hlfir %s -o - | FileCheck %s
-! CHECK: %[[V0:[0-9]+]] = fir.alloca !fir.type<_QFfooTt0{a0:i32,a1:i32}> {bindc_name = "a", uniq_name = "_QFfooEa"}
-! CHECK: %[[V1:[0-9]+]]:2 = hlfir.declare %[[V0]] {uniq_name = "_QFfooEa"} : (!fir.ref<!fir.type<_QFfooTt0{a0:i32,a1:i32}>>) -> (!fir.ref<!fir.type<_QFfooTt0{a0:i32,a1:i32}>>, !fir.ref<!fir.type<_QFfooTt0{a0:i32,a1:i32}>>)
-! CHECK: %[[V2:[0-9]+]] = hlfir.designate %[[V1]]#0{"a1"} : (!fir.ref<!fir.type<_QFfooTt0{a0:i32,a1:i32}>>) -> !fir.ref<i32>
+! CHECK-LABEL: func.func @_QPfoo1
+! CHECK: %[[V0:[0-9]+]] = fir.alloca !fir.type<_QFfoo1Tt0{a0:i32,a1:i32}> {bindc_name = "a", uniq_name = "_QFfoo1Ea"}
+! CHECK: %[[V1:[0-9]+]]:2 = hlfir.declare %[[V0]] {uniq_name = "_QFfoo1Ea"} : (!fir.ref<!fir.type<_QFfoo1Tt0{a0:i32,a1:i32}>>) -> (!fir.ref<!fir.type<_QFfoo1Tt0{a0:i32,a1:i32}>>, !fir.ref<!fir.type<_QFfoo1Tt0{a0:i32,a1:i32}>>)
+! CHECK: %[[V2:[0-9]+]] = hlfir.designate %[[V1]]#0{"a1"} : (!fir.ref<!fir.type<_QFfoo1Tt0{a0:i32,a1:i32}>>) -> !fir.ref<i32>
! CHECK: %[[V3:[0-9]+]] = omp.map.info var_ptr(%[[V2]] : !fir.ref<i32>, i32) map_clauses(tofrom) capture(ByRef) -> !fir.ref<i32> {name = "a%a1"}
-! CHECK: %[[V4:[0-9]+]] = omp.map.info var_ptr(%[[V1]]#1 : !fir.ref<!fir.type<_QFfooTt0{a0:i32,a1:i32}>>, !fir.type<_QFfooTt0{a0:i32,a1:i32}>) map_clauses(tofrom) capture(ByRef) members(%[[V3]] : [1] : !fir.ref<i32>) -> !fir.ref<!fir.type<_QFfooTt0{a0:i32,a1:i32}>> {name = "a", partial_map = true}
-! CHECK: omp.target map_entries(%[[V3]] -> %arg0, %[[V4]] -> %arg1 : !fir.ref<i32>, !fir.ref<!fir.type<_QFfooTt0{a0:i32,a1:i32}>>) {
-! CHECK: ^bb0(%arg0: !fir.ref<i32>, %arg1: !fir.ref<!fir.type<_QFfooTt0{a0:i32,a1:i32}>>):
-! CHECK: %[[V5:[0-9]+]]:2 = hlfir.declare %arg1 {uniq_name = "_QFfooEa"} : (!fir.ref<!fir.type<_QFfooTt0{a0:i32,a1:i32}>>) -> (!fir.ref<!fir.type<_QFfooTt0{a0:i32,a1:i32}>>, !fir.ref<!fir.type<_QFfooTt0{a0:i32,a1:i32}>>)
+! CHECK: %[[V4:[0-9]+]] = omp.map.info var_ptr(%[[V1]]#1 : !fir.ref<!fir.type<_QFfoo1Tt0{a0:i32,a1:i32}>>, !fir.type<_QFfoo1Tt0{a0:i32,a1:i32}>) map_clauses(tofrom) capture(ByRef) members(%[[V3]] : [1] : !fir.ref<i32>) -> !fir.ref<!fir.type<_QFfoo1Tt0{a0:i32,a1:i32}>> {name = "a", partial_map = true}
+! CHECK: omp.target map_entries(%[[V3]] -> %arg0, %[[V4]] -> %arg1 : !fir.ref<i32>, !fir.ref<!fir.type<_QFfoo1Tt0{a0:i32,a1:i32}>>) {
+! CHECK: ^bb0(%arg0: !fir.ref<i32>, %arg1: !fir.ref<!fir.type<_QFfoo1Tt0{a0:i32,a1:i32}>>):
+! CHECK: %[[V5:[0-9]+]]:2 = hlfir.declare %arg1 {uniq_name = "_QFfoo1Ea"} : (!fir.ref<!fir.type<_QFfoo1Tt0{a0:i32,a1:i32}>>) -> (!fir.ref<!fir.type<_QFfoo1Tt0{a0:i32,a1:i32}>>, !fir.ref<!fir.type<_QFfoo1Tt0{a0:i32,a1:i32}>>)
! CHECK: %c0_i32 = arith.constant 0 : i32
-! CHECK: %[[V6:[0-9]+]] = hlfir.designate %[[V5]]#0{"a1"} : (!fir.ref<!fir.type<_QFfooTt0{a0:i32,a1:i32}>>) -> !fir.ref<i32>
+! CHECK: %[[V6:[0-9]+]] = hlfir.designate %[[V5]]#0{"a1"} : (!fir.ref<!fir.type<_QFfoo1Tt0{a0:i32,a1:i32}>>) -> !fir.ref<i32>
! CHECK: hlfir.assign %c0_i32 to %[[V6]] : i32, !fir.ref<i32>
! CHECK: omp.terminator
! CHECK: }
-subroutine foo()
+subroutine foo1()
implicit none
type t0
@@ -29,3 +30,25 @@ subroutine foo()
!$omp end target
end
+
+! CHECK-LABEL: func.func @_QPfoo2
+! CHECK-DAG: omp.map.info var_ptr(%{{[0-9]+}} : {{.*}} map_clauses(to) capture(ByRef) bounds(%{{[0-9]+}}) -> {{.*}} {name = "t%b(1_8)%a(1)"}
+! CHECK-DAG: omp.map.info var_ptr(%{{[0-9]+}} : {{.*}} map_clauses(from) capture(ByRef) bounds(%{{[0-9]+}}) -> {{.*}} {name = "u%b(1_8)%a(1)"}
+subroutine foo2()
+ implicit none
+
+ type t0
+ integer :: a(10)
+ end type
+
+ type t1
+ type(t0) :: b(10)
+ end type
+
+ type(t1) :: t, u
+
+!$omp target map(to: t%b(1)%a(1)) map(from: u%b(1)%a(1))
+ t%b(1)%a(1) = u%b(1)%a(1)
+!$omp end target
+
+end
diff --git a/flang/test/Semantics/getcwd.f90 b/flang/test/Semantics/getcwd.f90
new file mode 100644
index 000000000000..b6ff16eeec5a
--- /dev/null
+++ b/flang/test/Semantics/getcwd.f90
@@ -0,0 +1,35 @@
+! RUN: %python %S/test_errors.py %s %flang_fc1 -pedantic
+! Tests for the GETCWD intrinsics
+
+subroutine bad_kind_error(cwd, status)
+ CHARACTER(len=255) :: cwd
+ INTEGER(2) :: status
+ !ERROR: Actual argument for 'status=' has bad type or kind 'INTEGER(2)'
+ call getcwd(cwd, status)
+end subroutine bad_kind_error
+
+subroutine bad_args_error()
+ !ERROR: missing mandatory 'c=' argument
+ call getcwd()
+end subroutine bad_args_error
+
+subroutine bad_apply_form(cwd)
+ CHARACTER(len=255) :: cwd
+ INTEGER :: status
+ !Declaration of 'getcwd'
+ call getcwd(cwd, status)
+ !ERROR: Cannot call subroutine 'getcwd' like a function
+ status = getcwd(cwd)
+end subroutine bad_apply_form
+
+subroutine good_subroutine(cwd, status)
+ CHARACTER(len=255) :: cwd
+ INTEGER :: status
+ call getcwd(cwd, status)
+end subroutine good_subroutine
+
+subroutine good_function(cwd, status)
+ CHARACTER(len=255) :: cwd
+ INTEGER :: status
+ status = getcwd(cwd)
+end subroutine good_function \ No newline at end of file
diff --git a/libc/config/gpu/entrypoints.txt b/libc/config/gpu/entrypoints.txt
index b678350e9fcb..2217a696fc5d 100644
--- a/libc/config/gpu/entrypoints.txt
+++ b/libc/config/gpu/entrypoints.txt
@@ -1,3 +1,13 @@
+if(LIBC_TARGET_ARCHITECTURE_IS_AMDGPU)
+ set(extra_entrypoints
+ # stdio.h entrypoints
+ libc.src.stdio.sprintf
+ libc.src.stdio.snprintf
+ libc.src.stdio.vsprintf
+ libc.src.stdio.vsnprintf
+ )
+endif()
+
set(TARGET_LIBC_ENTRYPOINTS
# assert.h entrypoints
libc.src.assert.__assert_fail
@@ -175,6 +185,7 @@ set(TARGET_LIBC_ENTRYPOINTS
libc.src.errno.errno
# stdio.h entrypoints
+ ${extra_entrypoints}
libc.src.stdio.feof
libc.src.stdio.ferror
libc.src.stdio.fseek
diff --git a/libc/config/linux/aarch64/entrypoints.txt b/libc/config/linux/aarch64/entrypoints.txt
index 2a4f789925e8..33ecff813a1f 100644
--- a/libc/config/linux/aarch64/entrypoints.txt
+++ b/libc/config/linux/aarch64/entrypoints.txt
@@ -506,7 +506,15 @@ if(LIBC_TYPES_HAS_FLOAT16)
libc.src.math.fdimf16
libc.src.math.floorf16
libc.src.math.fmaxf16
+ libc.src.math.fmaximumf16
+ libc.src.math.fmaximum_magf16
+ libc.src.math.fmaximum_mag_numf16
+ libc.src.math.fmaximum_numf16
libc.src.math.fminf16
+ libc.src.math.fminimumf16
+ libc.src.math.fminimum_magf16
+ libc.src.math.fminimum_mag_numf16
+ libc.src.math.fminimum_numf16
libc.src.math.fromfpf16
libc.src.math.fromfpxf16
libc.src.math.llrintf16
@@ -514,6 +522,13 @@ if(LIBC_TYPES_HAS_FLOAT16)
libc.src.math.lrintf16
libc.src.math.lroundf16
libc.src.math.nearbyintf16
+ libc.src.math.nextafterf16
+ libc.src.math.nextdownf16
+ # Temporarily disable nexttowardf16 on aarch64 because the conversion
+ # between _Float16 and long double will crash clang-11. This is fixed in
+ # clang-12 and after: https://godbolt.org/z/8ceT9454c
+ # libc.src.math.nexttowardf16
+ libc.src.math.nextupf16
libc.src.math.rintf16
libc.src.math.roundf16
libc.src.math.roundevenf16
diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt
index fe121820d6b6..e3ca544ae018 100644
--- a/libc/config/linux/x86_64/entrypoints.txt
+++ b/libc/config/linux/x86_64/entrypoints.txt
@@ -173,7 +173,6 @@ set(TARGET_LIBC_ENTRYPOINTS
libc.src.stdlib.atoll
libc.src.stdlib.bsearch
libc.src.stdlib.div
- libc.src.stdlib.quick_exit
libc.src.stdlib.labs
libc.src.stdlib.ldiv
libc.src.stdlib.llabs
@@ -539,7 +538,15 @@ if(LIBC_TYPES_HAS_FLOAT16)
libc.src.math.fdimf16
libc.src.math.floorf16
libc.src.math.fmaxf16
+ libc.src.math.fmaximumf16
+ libc.src.math.fmaximum_magf16
+ libc.src.math.fmaximum_mag_numf16
+ libc.src.math.fmaximum_numf16
libc.src.math.fminf16
+ libc.src.math.fminimumf16
+ libc.src.math.fminimum_magf16
+ libc.src.math.fminimum_mag_numf16
+ libc.src.math.fminimum_numf16
libc.src.math.fromfpf16
libc.src.math.fromfpxf16
libc.src.math.llrintf16
@@ -547,6 +554,10 @@ if(LIBC_TYPES_HAS_FLOAT16)
libc.src.math.lrintf16
libc.src.math.lroundf16
libc.src.math.nearbyintf16
+ libc.src.math.nextafterf16
+ libc.src.math.nextdownf16
+ libc.src.math.nexttowardf16
+ libc.src.math.nextupf16
libc.src.math.rintf16
libc.src.math.roundf16
libc.src.math.roundevenf16
@@ -758,9 +769,11 @@ if(LLVM_LIBC_FULL_BUILD)
# stdlib.h entrypoints
libc.src.stdlib._Exit
libc.src.stdlib.abort
+ libc.src.stdlib.at_quick_exit
libc.src.stdlib.atexit
libc.src.stdlib.exit
libc.src.stdlib.getenv
+ libc.src.stdlib.quick_exit
# signal.h entrypoints
libc.src.signal.raise
diff --git a/libc/docs/c23.rst b/libc/docs/c23.rst
index 5bbb056ec5c7..71232cc004c7 100644
--- a/libc/docs/c23.rst
+++ b/libc/docs/c23.rst
@@ -59,15 +59,17 @@ Additions:
* ufromfp* |check|
* fromfpx* |check|
* ufromfpx* |check|
- * nextup*
- * nextdown*
+ * nextup* |check|
+ * nextdown* |check|
* canonicalize* |check|
- * fmaximum*
- * fminimum*
- * fmaximum_mag*
- * fminimum_mag*
- * fmaximum_mag_num*
- * fminimum_mag_num*
+ * fmaximum* |check|
+ * fminimum* |check|
+ * fmaximum_mag* |check|
+ * fminimum_mag* |check|
+ * fmaximum_num* |check|
+ * fminimum_num* |check|
+ * fmaximum_mag_num* |check|
+ * fminimum_mag_num* |check|
* fadd*
* fsub*
* fmul*
diff --git a/libc/docs/math/index.rst b/libc/docs/math/index.rst
index afadf7124565..b9507f0887cd 100644
--- a/libc/docs/math/index.rst
+++ b/libc/docs/math/index.rst
@@ -138,23 +138,23 @@ Basic Operations
+------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
| fmax | |check| | |check| | |check| | |check| | |check| | 7.12.12.2 | F.10.9.2 |
+------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
-| fmaximum | |check| | |check| | |check| | | |check| | 7.12.12.4 | F.10.9.4 |
+| fmaximum | |check| | |check| | |check| | |check| | |check| | 7.12.12.4 | F.10.9.4 |
+------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
-| fmaximum_mag | |check| | |check| | |check| | | |check| | 7.12.12.6 | F.10.9.4 |
+| fmaximum_mag | |check| | |check| | |check| | |check| | |check| | 7.12.12.6 | F.10.9.4 |
+------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
-| fmaximum_mag_num | |check| | |check| | |check| | | |check| | 7.12.12.10 | F.10.9.5 |
+| fmaximum_mag_num | |check| | |check| | |check| | |check| | |check| | 7.12.12.10 | F.10.9.5 |
+------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
-| fmaximum_num | |check| | |check| | |check| | | |check| | 7.12.12.8 | F.10.9.5 |
+| fmaximum_num | |check| | |check| | |check| | |check| | |check| | 7.12.12.8 | F.10.9.5 |
+------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
| fmin | |check| | |check| | |check| | |check| | |check| | 7.12.12.3 | F.10.9.3 |
+------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
-| fminimum | |check| | |check| | |check| | | |check| | 7.12.12.5 | F.10.9.4 |
+| fminimum | |check| | |check| | |check| | |check| | |check| | 7.12.12.5 | F.10.9.4 |
+------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
-| fminimum_mag | |check| | |check| | |check| | | |check| | 7.12.12.7 | F.10.9.4 |
+| fminimum_mag | |check| | |check| | |check| | |check| | |check| | 7.12.12.7 | F.10.9.4 |
+------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
-| fminimum_mag_num | |check| | |check| | |check| | | |check| | 7.12.12.11 | F.10.9.5 |
+| fminimum_mag_num | |check| | |check| | |check| | |check| | |check| | 7.12.12.11 | F.10.9.5 |
+------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
-| fminimum_num | |check| | |check| | |check| | | |check| | 7.12.12.9 | F.10.9.5 |
+| fminimum_num | |check| | |check| | |check| | |check| | |check| | 7.12.12.9 | F.10.9.5 |
+------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
| fmod | |check| | |check| | |check| | | |check| | 7.12.10.1 | F.10.7.1 |
+------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
@@ -190,13 +190,13 @@ Basic Operations
+------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
| nearbyint | |check| | |check| | |check| | |check| | |check| | 7.12.9.3 | F.10.6.3 |
+------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
-| nextafter | |check| | |check| | |check| | | |check| | 7.12.11.3 | F.10.8.3 |
+| nextafter | |check| | |check| | |check| | |check| | |check| | 7.12.11.3 | F.10.8.3 |
+------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
-| nextdown | |check| | |check| | |check| | | |check| | 7.12.11.6 | F.10.8.6 |
+| nextdown | |check| | |check| | |check| | |check| | |check| | 7.12.11.6 | F.10.8.6 |
+------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
-| nexttoward | |check| | |check| | |check| | | N/A | 7.12.11.4 | F.10.8.4 |
+| nexttoward | |check| | |check| | |check| | |check| | N/A | 7.12.11.4 | F.10.8.4 |
+------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
-| nextup | |check| | |check| | |check| | | |check| | 7.12.11.5 | F.10.8.5 |
+| nextup | |check| | |check| | |check| | |check| | |check| | 7.12.11.5 | F.10.8.5 |
+------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
| remainder | |check| | |check| | |check| | | | 7.12.10.2 | F.10.7.2 |
+------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
diff --git a/libc/hdr/types/CMakeLists.txt b/libc/hdr/types/CMakeLists.txt
index 8e87642f6608..9b3373a0ca39 100644
--- a/libc/hdr/types/CMakeLists.txt
+++ b/libc/hdr/types/CMakeLists.txt
@@ -117,3 +117,12 @@ add_proxy_header_library(
libc.include.llvm-libc-types.pid_t
libc.include.sys_types
)
+
+add_proxy_header_library(
+ atexithandler_t
+ HDRS
+ atexithandler_t.h
+ FULL_BUILD_DEPENDS
+ libc.include.llvm-libc-types.atexithandler_t
+ libc.include.stdlib
+)
diff --git a/libc/hdr/types/atexithandler_t.h b/libc/hdr/types/atexithandler_t.h
new file mode 100644
index 000000000000..4275e4407367
--- /dev/null
+++ b/libc/hdr/types/atexithandler_t.h
@@ -0,0 +1,22 @@
+//===-- Definition of macros from atexithandler_t.h -----------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_HDR_ATEXITHANDLER_T_H
+#define LLVM_LIBC_HDR_ATEXITHANDLER_T_H
+
+#ifdef LIBC_FULL_BUILD
+
+#include "include/llvm-libc-types/__atexithandler_t.h"
+
+#else // overlay mode
+
+#error // type not available in overlay mode
+
+#endif // LLVM_LIBC_FULL_BUILD
+
+#endif // LLVM_LIBC_HDR_ATEXITHANDLER_T_H
diff --git a/libc/spec/stdc.td b/libc/spec/stdc.td
index 3098736963d3..9a436c8ae38d 100644
--- a/libc/spec/stdc.td
+++ b/libc/spec/stdc.td
@@ -427,41 +427,49 @@ def StdC : StandardSpec<"stdc"> {
FunctionSpec<"fmaximum", RetValSpec<DoubleType>, [ArgSpec<DoubleType>, ArgSpec<DoubleType>]>,
FunctionSpec<"fmaximumf", RetValSpec<FloatType>, [ArgSpec<FloatType>, ArgSpec<FloatType>]>,
FunctionSpec<"fmaximuml", RetValSpec<LongDoubleType>, [ArgSpec<LongDoubleType>, ArgSpec<LongDoubleType>]>,
+ GuardedFunctionSpec<"fmaximumf16", RetValSpec<Float16Type>, [ArgSpec<Float16Type>, ArgSpec<Float16Type>], "LIBC_TYPES_HAS_FLOAT16">,
GuardedFunctionSpec<"fmaximumf128", RetValSpec<Float128Type>, [ArgSpec<Float128Type>, ArgSpec<Float128Type>], "LIBC_TYPES_HAS_FLOAT128">,
FunctionSpec<"fmaximum_num", RetValSpec<DoubleType>, [ArgSpec<DoubleType>, ArgSpec<DoubleType>]>,
FunctionSpec<"fmaximum_numf", RetValSpec<FloatType>, [ArgSpec<FloatType>, ArgSpec<FloatType>]>,
FunctionSpec<"fmaximum_numl", RetValSpec<LongDoubleType>, [ArgSpec<LongDoubleType>, ArgSpec<LongDoubleType>]>,
+ GuardedFunctionSpec<"fmaximum_numf16", RetValSpec<Float16Type>, [ArgSpec<Float16Type>, ArgSpec<Float16Type>], "LIBC_TYPES_HAS_FLOAT16">,
GuardedFunctionSpec<"fmaximum_numf128", RetValSpec<Float128Type>, [ArgSpec<Float128Type>, ArgSpec<Float128Type>], "LIBC_TYPES_HAS_FLOAT128">,
FunctionSpec<"fmaximum_mag", RetValSpec<DoubleType>, [ArgSpec<DoubleType>, ArgSpec<DoubleType>]>,
FunctionSpec<"fmaximum_magf", RetValSpec<FloatType>, [ArgSpec<FloatType>, ArgSpec<FloatType>]>,
FunctionSpec<"fmaximum_magl", RetValSpec<LongDoubleType>, [ArgSpec<LongDoubleType>, ArgSpec<LongDoubleType>]>,
+ GuardedFunctionSpec<"fmaximum_magf16", RetValSpec<Float16Type>, [ArgSpec<Float16Type>, ArgSpec<Float16Type>], "LIBC_TYPES_HAS_FLOAT16">,
GuardedFunctionSpec<"fmaximum_magf128", RetValSpec<Float128Type>, [ArgSpec<Float128Type>, ArgSpec<Float128Type>], "LIBC_TYPES_HAS_FLOAT128">,
FunctionSpec<"fmaximum_mag_num", RetValSpec<DoubleType>, [ArgSpec<DoubleType>, ArgSpec<DoubleType>]>,
FunctionSpec<"fmaximum_mag_numf", RetValSpec<FloatType>, [ArgSpec<FloatType>, ArgSpec<FloatType>]>,
FunctionSpec<"fmaximum_mag_numl", RetValSpec<LongDoubleType>, [ArgSpec<LongDoubleType>, ArgSpec<LongDoubleType>]>,
+ GuardedFunctionSpec<"fmaximum_mag_numf16", RetValSpec<Float16Type>, [ArgSpec<Float16Type>, ArgSpec<Float16Type>], "LIBC_TYPES_HAS_FLOAT16">,
GuardedFunctionSpec<"fmaximum_mag_numf128", RetValSpec<Float128Type>, [ArgSpec<Float128Type>, ArgSpec<Float128Type>], "LIBC_TYPES_HAS_FLOAT128">,
FunctionSpec<"fminimum", RetValSpec<DoubleType>, [ArgSpec<DoubleType>, ArgSpec<DoubleType>]>,
FunctionSpec<"fminimumf", RetValSpec<FloatType>, [ArgSpec<FloatType>, ArgSpec<FloatType>]>,
FunctionSpec<"fminimuml", RetValSpec<LongDoubleType>, [ArgSpec<LongDoubleType>, ArgSpec<LongDoubleType>]>,
+ GuardedFunctionSpec<"fminimumf16", RetValSpec<Float16Type>, [ArgSpec<Float16Type>, ArgSpec<Float16Type>], "LIBC_TYPES_HAS_FLOAT16">,
GuardedFunctionSpec<"fminimumf128", RetValSpec<Float128Type>, [ArgSpec<Float128Type>, ArgSpec<Float128Type>], "LIBC_TYPES_HAS_FLOAT128">,
FunctionSpec<"fminimum_num", RetValSpec<DoubleType>, [ArgSpec<DoubleType>, ArgSpec<DoubleType>]>,
FunctionSpec<"fminimum_numf", RetValSpec<FloatType>, [ArgSpec<FloatType>, ArgSpec<FloatType>]>,
FunctionSpec<"fmaximum_numl", RetValSpec<LongDoubleType>, [ArgSpec<LongDoubleType>, ArgSpec<LongDoubleType>]>,
+ GuardedFunctionSpec<"fminimum_numf16", RetValSpec<Float16Type>, [ArgSpec<Float16Type>, ArgSpec<Float16Type>], "LIBC_TYPES_HAS_FLOAT16">,
GuardedFunctionSpec<"fminimum_numf128", RetValSpec<Float128Type>, [ArgSpec<Float128Type>, ArgSpec<Float128Type>], "LIBC_TYPES_HAS_FLOAT128">,
FunctionSpec<"fminimum_mag", RetValSpec<DoubleType>, [ArgSpec<DoubleType>, ArgSpec<DoubleType>]>,
FunctionSpec<"fminimum_magf", RetValSpec<FloatType>, [ArgSpec<FloatType>, ArgSpec<FloatType>]>,
FunctionSpec<"fminimum_magl", RetValSpec<LongDoubleType>, [ArgSpec<LongDoubleType>, ArgSpec<LongDoubleType>]>,
+ GuardedFunctionSpec<"fminimum_magf16", RetValSpec<Float16Type>, [ArgSpec<Float16Type>, ArgSpec<Float16Type>], "LIBC_TYPES_HAS_FLOAT16">,
GuardedFunctionSpec<"fminimum_magf128", RetValSpec<Float128Type>, [ArgSpec<Float128Type>, ArgSpec<Float128Type>], "LIBC_TYPES_HAS_FLOAT128">,
FunctionSpec<"fminimum_mag_num", RetValSpec<DoubleType>, [ArgSpec<DoubleType>, ArgSpec<DoubleType>]>,
FunctionSpec<"fminimum_mag_numf", RetValSpec<FloatType>, [ArgSpec<FloatType>, ArgSpec<FloatType>]>,
FunctionSpec<"fminimum_mag_numl", RetValSpec<LongDoubleType>, [ArgSpec<LongDoubleType>, ArgSpec<LongDoubleType>]>,
+ GuardedFunctionSpec<"fminimum_mag_numf16", RetValSpec<Float16Type>, [ArgSpec<Float16Type>, ArgSpec<Float16Type>], "LIBC_TYPES_HAS_FLOAT16">,
GuardedFunctionSpec<"fminimum_mag_numf128", RetValSpec<Float128Type>, [ArgSpec<Float128Type>, ArgSpec<Float128Type>], "LIBC_TYPES_HAS_FLOAT128">,
FunctionSpec<"fma", RetValSpec<DoubleType>, [ArgSpec<DoubleType>, ArgSpec<DoubleType>, ArgSpec<DoubleType>]>,
@@ -634,20 +642,24 @@ def StdC : StandardSpec<"stdc"> {
FunctionSpec<"nextafterf", RetValSpec<FloatType>, [ArgSpec<FloatType>, ArgSpec<FloatType>]>,
FunctionSpec<"nextafter", RetValSpec<DoubleType>, [ArgSpec<DoubleType>, ArgSpec<DoubleType>]>,
FunctionSpec<"nextafterl", RetValSpec<LongDoubleType>, [ArgSpec<LongDoubleType>, ArgSpec<LongDoubleType>]>,
+ GuardedFunctionSpec<"nextafterf16", RetValSpec<Float16Type>, [ArgSpec<Float16Type>, ArgSpec<Float16Type>], "LIBC_TYPES_HAS_FLOAT16">,
GuardedFunctionSpec<"nextafterf128", RetValSpec<Float128Type>, [ArgSpec<Float128Type>, ArgSpec<Float128Type>], "LIBC_TYPES_HAS_FLOAT128">,
FunctionSpec<"nexttowardf", RetValSpec<FloatType>, [ArgSpec<FloatType>, ArgSpec<LongDoubleType>]>,
FunctionSpec<"nexttoward", RetValSpec<DoubleType>, [ArgSpec<DoubleType>, ArgSpec<LongDoubleType>]>,
FunctionSpec<"nexttowardl", RetValSpec<LongDoubleType>, [ArgSpec<LongDoubleType>, ArgSpec<LongDoubleType>]>,
+ GuardedFunctionSpec<"nexttowardf16", RetValSpec<Float16Type>, [ArgSpec<Float16Type>, ArgSpec<Float16Type>], "LIBC_TYPES_HAS_FLOAT16">,
FunctionSpec<"nextdown", RetValSpec<DoubleType>, [ArgSpec<DoubleType>]>,
FunctionSpec<"nextdownf", RetValSpec<FloatType>, [ArgSpec<FloatType>]>,
FunctionSpec<"nextdownl", RetValSpec<LongDoubleType>, [ArgSpec<LongDoubleType>]>,
+ GuardedFunctionSpec<"nextdownf16", RetValSpec<Float16Type>, [ArgSpec<Float16Type>], "LIBC_TYPES_HAS_FLOAT16">,
GuardedFunctionSpec<"nextdownf128", RetValSpec<Float128Type>, [ArgSpec<Float128Type>], "LIBC_TYPES_HAS_FLOAT128">,
FunctionSpec<"nextup", RetValSpec<DoubleType>, [ArgSpec<DoubleType>]>,
FunctionSpec<"nextupf", RetValSpec<FloatType>, [ArgSpec<FloatType>]>,
FunctionSpec<"nextupl", RetValSpec<LongDoubleType>, [ArgSpec<LongDoubleType>]>,
+ GuardedFunctionSpec<"nextupf16", RetValSpec<Float16Type>, [ArgSpec<Float16Type>], "LIBC_TYPES_HAS_FLOAT16">,
GuardedFunctionSpec<"nextupf128", RetValSpec<Float128Type>, [ArgSpec<Float128Type>], "LIBC_TYPES_HAS_FLOAT128">,
FunctionSpec<"powf", RetValSpec<FloatType>, [ArgSpec<FloatType>, ArgSpec<FloatType>]>,
@@ -1095,8 +1107,9 @@ def StdC : StandardSpec<"stdc"> {
FunctionSpec<"free", RetValSpec<VoidType>, [ArgSpec<VoidPtr>]>,
FunctionSpec<"_Exit", RetValSpec<NoReturn>, [ArgSpec<IntType>]>,
- FunctionSpec<"exit", RetValSpec<NoReturn>, [ArgSpec<IntType>]>,
+ FunctionSpec<"at_quick_exit", RetValSpec<IntType>, [ArgSpec<AtexitHandlerT>]>,
FunctionSpec<"atexit", RetValSpec<IntType>, [ArgSpec<AtexitHandlerT>]>,
+ FunctionSpec<"exit", RetValSpec<NoReturn>, [ArgSpec<IntType>]>,
FunctionSpec<"quick_exit", RetValSpec<NoReturn>, [ArgSpec<IntType>]>,
]
>;
diff --git a/libc/src/__support/fixedvector.h b/libc/src/__support/fixedvector.h
index 6aeb4d56363e..ddd0993a9527 100644
--- a/libc/src/__support/fixedvector.h
+++ b/libc/src/__support/fixedvector.h
@@ -24,6 +24,17 @@ template <typename T, size_t CAPACITY> class FixedVector {
public:
constexpr FixedVector() = default;
+ using iterator = typename cpp::array<T, CAPACITY>::iterator;
+ constexpr FixedVector(iterator begin, iterator end) {
+ for (; begin != end; ++begin)
+ push_back(*begin);
+ }
+
+ constexpr FixedVector(size_t count, const T &value) {
+ for (size_t i = 0; i < count; ++i)
+ push_back(value);
+ }
+
bool push_back(const T &obj) {
if (item_count == CAPACITY)
return false;
@@ -43,8 +54,14 @@ public:
return true;
}
+ T &operator[](size_t idx) { return store[idx]; }
+
+ const T &operator[](size_t idx) const { return store[idx]; }
+
bool empty() const { return item_count == 0; }
+ size_t size() const { return item_count; }
+
// Empties the store for all practical purposes.
void reset() { item_count = 0; }
@@ -64,7 +81,6 @@ public:
}
LIBC_INLINE constexpr reverse_iterator rend() { return store.rend(); }
- using iterator = typename cpp::array<T, CAPACITY>::iterator;
LIBC_INLINE constexpr iterator begin() { return store.begin(); }
LIBC_INLINE constexpr iterator end() { return iterator{&store[item_count]}; }
};
diff --git a/libc/src/math/CMakeLists.txt b/libc/src/math/CMakeLists.txt
index 83ce322e8273..7a349ddc5372 100644
--- a/libc/src/math/CMakeLists.txt
+++ b/libc/src/math/CMakeLists.txt
@@ -135,41 +135,49 @@ add_math_entrypoint_object(fminf16)
add_math_entrypoint_object(fmaximum)
add_math_entrypoint_object(fmaximumf)
add_math_entrypoint_object(fmaximuml)
+add_math_entrypoint_object(fmaximumf16)
add_math_entrypoint_object(fmaximumf128)
add_math_entrypoint_object(fmaximum_num)
add_math_entrypoint_object(fmaximum_numf)
add_math_entrypoint_object(fmaximum_numl)
+add_math_entrypoint_object(fmaximum_numf16)
add_math_entrypoint_object(fmaximum_numf128)
add_math_entrypoint_object(fmaximum_mag)
add_math_entrypoint_object(fmaximum_magf)
add_math_entrypoint_object(fmaximum_magl)
+add_math_entrypoint_object(fmaximum_magf16)
add_math_entrypoint_object(fmaximum_magf128)
add_math_entrypoint_object(fmaximum_mag_num)
add_math_entrypoint_object(fmaximum_mag_numf)
add_math_entrypoint_object(fmaximum_mag_numl)
+add_math_entrypoint_object(fmaximum_mag_numf16)
add_math_entrypoint_object(fmaximum_mag_numf128)
add_math_entrypoint_object(fminimum)
add_math_entrypoint_object(fminimumf)
add_math_entrypoint_object(fminimuml)
+add_math_entrypoint_object(fminimumf16)
add_math_entrypoint_object(fminimumf128)
add_math_entrypoint_object(fminimum_num)
add_math_entrypoint_object(fminimum_numf)
add_math_entrypoint_object(fminimum_numl)
+add_math_entrypoint_object(fminimum_numf16)
add_math_entrypoint_object(fminimum_numf128)
add_math_entrypoint_object(fminimum_mag)
add_math_entrypoint_object(fminimum_magf)
add_math_entrypoint_object(fminimum_magl)
+add_math_entrypoint_object(fminimum_magf16)
add_math_entrypoint_object(fminimum_magf128)
add_math_entrypoint_object(fminimum_mag_num)
add_math_entrypoint_object(fminimum_mag_numf)
add_math_entrypoint_object(fminimum_mag_numl)
+add_math_entrypoint_object(fminimum_mag_numf16)
add_math_entrypoint_object(fminimum_mag_numf128)
add_math_entrypoint_object(fmod)
@@ -272,20 +280,24 @@ add_math_entrypoint_object(nearbyintf128)
add_math_entrypoint_object(nextafter)
add_math_entrypoint_object(nextafterf)
add_math_entrypoint_object(nextafterl)
+add_math_entrypoint_object(nextafterf16)
add_math_entrypoint_object(nextafterf128)
add_math_entrypoint_object(nexttoward)
add_math_entrypoint_object(nexttowardf)
add_math_entrypoint_object(nexttowardl)
+add_math_entrypoint_object(nexttowardf16)
add_math_entrypoint_object(nextdown)
add_math_entrypoint_object(nextdownf)
add_math_entrypoint_object(nextdownl)
+add_math_entrypoint_object(nextdownf16)
add_math_entrypoint_object(nextdownf128)
add_math_entrypoint_object(nextup)
add_math_entrypoint_object(nextupf)
add_math_entrypoint_object(nextupl)
+add_math_entrypoint_object(nextupf16)
add_math_entrypoint_object(nextupf128)
add_math_entrypoint_object(pow)
diff --git a/libc/src/math/fmaximum_mag_numf16.h b/libc/src/math/fmaximum_mag_numf16.h
new file mode 100644
index 000000000000..4c963d4dccc7
--- /dev/null
+++ b/libc/src/math/fmaximum_mag_numf16.h
@@ -0,0 +1,20 @@
+//===-- Implementation header for fmaximum_mag_numf16 -----------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_MATH_FMAXIMUM_MAG_NUMF16_H
+#define LLVM_LIBC_SRC_MATH_FMAXIMUM_MAG_NUMF16_H
+
+#include "src/__support/macros/properties/types.h"
+
+namespace LIBC_NAMESPACE {
+
+float16 fmaximum_mag_numf16(float16 x, float16 y);
+
+} // namespace LIBC_NAMESPACE
+
+#endif // LLVM_LIBC_SRC_MATH_FMAXIMUM_MAG_NUMF16_H
diff --git a/libc/src/math/fmaximum_magf16.h b/libc/src/math/fmaximum_magf16.h
new file mode 100644
index 000000000000..e5f57d3b7f1d
--- /dev/null
+++ b/libc/src/math/fmaximum_magf16.h
@@ -0,0 +1,20 @@
+//===-- Implementation header for fmaximum_magf16 ---------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_MATH_FMAXIMUM_MAGF16_H
+#define LLVM_LIBC_SRC_MATH_FMAXIMUM_MAGF16_H
+
+#include "src/__support/macros/properties/types.h"
+
+namespace LIBC_NAMESPACE {
+
+float16 fmaximum_magf16(float16 x, float16 y);
+
+} // namespace LIBC_NAMESPACE
+
+#endif // LLVM_LIBC_SRC_MATH_FMAXIMUM_MAGF16_H
diff --git a/libc/src/math/fmaximum_numf16.h b/libc/src/math/fmaximum_numf16.h
new file mode 100644
index 000000000000..b450a4595648
--- /dev/null
+++ b/libc/src/math/fmaximum_numf16.h
@@ -0,0 +1,20 @@
+//===-- Implementation header for fmaximum_numf16 ---------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_MATH_FMAXIMUM_NUMF16_H
+#define LLVM_LIBC_SRC_MATH_FMAXIMUM_NUMF16_H
+
+#include "src/__support/macros/properties/types.h"
+
+namespace LIBC_NAMESPACE {
+
+float16 fmaximum_numf16(float16 x, float16 y);
+
+} // namespace LIBC_NAMESPACE
+
+#endif // LLVM_LIBC_SRC_MATH_FMAXIMUM_NUMF16_H
diff --git a/libc/src/math/fmaximumf16.h b/libc/src/math/fmaximumf16.h
new file mode 100644
index 000000000000..806339fde683
--- /dev/null
+++ b/libc/src/math/fmaximumf16.h
@@ -0,0 +1,20 @@
+//===-- Implementation header for fmaximumf16 -------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_MATH_FMAXIMUMF16_H
+#define LLVM_LIBC_SRC_MATH_FMAXIMUMF16_H
+
+#include "src/__support/macros/properties/types.h"
+
+namespace LIBC_NAMESPACE {
+
+float16 fmaximumf16(float16 x, float16 y);
+
+} // namespace LIBC_NAMESPACE
+
+#endif // LLVM_LIBC_SRC_MATH_FMAXIMUMF16_H
diff --git a/libc/src/math/fminimum_mag_numf16.h b/libc/src/math/fminimum_mag_numf16.h
new file mode 100644
index 000000000000..0fd314b2f5a2
--- /dev/null
+++ b/libc/src/math/fminimum_mag_numf16.h
@@ -0,0 +1,20 @@
+//===-- Implementation header for fminimum_mag_numf16 -----------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_MATH_FMINIMUM_MAG_NUMF16_H
+#define LLVM_LIBC_SRC_MATH_FMINIMUM_MAG_NUMF16_H
+
+#include "src/__support/macros/properties/types.h"
+
+namespace LIBC_NAMESPACE {
+
+float16 fminimum_mag_numf16(float16 x, float16 y);
+
+} // namespace LIBC_NAMESPACE
+
+#endif // LLVM_LIBC_SRC_MATH_FMINIMUM_MAG_NUMF16_H
diff --git a/libc/src/math/fminimum_magf16.h b/libc/src/math/fminimum_magf16.h
new file mode 100644
index 000000000000..27673555403c
--- /dev/null
+++ b/libc/src/math/fminimum_magf16.h
@@ -0,0 +1,20 @@
+//===-- Implementation header for fminimum_magf16 ---------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_MATH_FMINIMUM_MAGF16_H
+#define LLVM_LIBC_SRC_MATH_FMINIMUM_MAGF16_H
+
+#include "src/__support/macros/properties/types.h"
+
+namespace LIBC_NAMESPACE {
+
+float16 fminimum_magf16(float16 x, float16 y);
+
+} // namespace LIBC_NAMESPACE
+
+#endif // LLVM_LIBC_SRC_MATH_FMINIMUM_MAGF16_H
diff --git a/libc/src/math/fminimum_numf16.h b/libc/src/math/fminimum_numf16.h
new file mode 100644
index 000000000000..598ff9d3c32d
--- /dev/null
+++ b/libc/src/math/fminimum_numf16.h
@@ -0,0 +1,20 @@
+//===-- Implementation header for fminimum_numf16 ---------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_MATH_FMINIMUM_NUMF16_H
+#define LLVM_LIBC_SRC_MATH_FMINIMUM_NUMF16_H
+
+#include "src/__support/macros/properties/types.h"
+
+namespace LIBC_NAMESPACE {
+
+float16 fminimum_numf16(float16 x, float16 y);
+
+} // namespace LIBC_NAMESPACE
+
+#endif // LLVM_LIBC_SRC_MATH_FMINIMUM_NUMF16_H
diff --git a/libc/src/math/fminimumf16.h b/libc/src/math/fminimumf16.h
new file mode 100644
index 000000000000..86dd240ae406
--- /dev/null
+++ b/libc/src/math/fminimumf16.h
@@ -0,0 +1,20 @@
+//===-- Implementation header for fminimumf16 -------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_MATH_FMINIMUMF16_H
+#define LLVM_LIBC_SRC_MATH_FMINIMUMF16_H
+
+#include "src/__support/macros/properties/types.h"
+
+namespace LIBC_NAMESPACE {
+
+float16 fminimumf16(float16 x, float16 y);
+
+} // namespace LIBC_NAMESPACE
+
+#endif // LLVM_LIBC_SRC_MATH_FMINIMUMF16_H
diff --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt
index 00e2f0abc952..b1d786fc6b29 100644
--- a/libc/src/math/generic/CMakeLists.txt
+++ b/libc/src/math/generic/CMakeLists.txt
@@ -1895,6 +1895,19 @@ add_entrypoint_object(
)
add_entrypoint_object(
+ fmaximumf16
+ SRCS
+ fmaximumf16.cpp
+ HDRS
+ ../fmaximumf16.h
+ DEPENDS
+ libc.src.__support.macros.properties.types
+ libc.src.__support.FPUtil.basic_operations
+ COMPILE_OPTIONS
+ -O3
+)
+
+add_entrypoint_object(
fmaximumf128
SRCS
fmaximumf128.cpp
@@ -1944,6 +1957,19 @@ add_entrypoint_object(
)
add_entrypoint_object(
+ fmaximum_numf16
+ SRCS
+ fmaximum_numf16.cpp
+ HDRS
+ ../fmaximum_numf16.h
+ DEPENDS
+ libc.src.__support.macros.properties.types
+ libc.src.__support.FPUtil.basic_operations
+ COMPILE_OPTIONS
+ -O3
+)
+
+add_entrypoint_object(
fmaximum_numf128
SRCS
fmaximum_numf128.cpp
@@ -1993,6 +2019,19 @@ add_entrypoint_object(
)
add_entrypoint_object(
+ fmaximum_magf16
+ SRCS
+ fmaximum_magf16.cpp
+ HDRS
+ ../fmaximum_magf16.h
+ DEPENDS
+ libc.src.__support.macros.properties.types
+ libc.src.__support.FPUtil.basic_operations
+ COMPILE_OPTIONS
+ -O3
+)
+
+add_entrypoint_object(
fmaximum_magf128
SRCS
fmaximum_magf128.cpp
@@ -2005,7 +2044,6 @@ add_entrypoint_object(
-O3
)
-
add_entrypoint_object(
fmaximum_mag_num
SRCS
@@ -2043,6 +2081,19 @@ add_entrypoint_object(
)
add_entrypoint_object(
+ fmaximum_mag_numf16
+ SRCS
+ fmaximum_mag_numf16.cpp
+ HDRS
+ ../fmaximum_mag_numf16.h
+ DEPENDS
+ libc.src.__support.macros.properties.types
+ libc.src.__support.FPUtil.basic_operations
+ COMPILE_OPTIONS
+ -O3
+)
+
+add_entrypoint_object(
fmaximum_mag_numf128
SRCS
fmaximum_mag_numf128.cpp
@@ -2092,6 +2143,19 @@ add_entrypoint_object(
)
add_entrypoint_object(
+ fminimumf16
+ SRCS
+ fminimumf16.cpp
+ HDRS
+ ../fminimumf16.h
+ DEPENDS
+ libc.src.__support.macros.properties.types
+ libc.src.__support.FPUtil.basic_operations
+ COMPILE_OPTIONS
+ -O3
+)
+
+add_entrypoint_object(
fminimumf128
SRCS
fminimumf128.cpp
@@ -2141,6 +2205,19 @@ add_entrypoint_object(
)
add_entrypoint_object(
+ fminimum_numf16
+ SRCS
+ fminimum_numf16.cpp
+ HDRS
+ ../fminimum_numf16.h
+ DEPENDS
+ libc.src.__support.macros.properties.types
+ libc.src.__support.FPUtil.basic_operations
+ COMPILE_OPTIONS
+ -O3
+)
+
+add_entrypoint_object(
fminimum_numf128
SRCS
fminimum_numf128.cpp
@@ -2190,6 +2267,19 @@ add_entrypoint_object(
)
add_entrypoint_object(
+ fminimum_magf16
+ SRCS
+ fminimum_magf16.cpp
+ HDRS
+ ../fminimum_magf16.h
+ DEPENDS
+ libc.src.__support.macros.properties.types
+ libc.src.__support.FPUtil.basic_operations
+ COMPILE_OPTIONS
+ -O3
+)
+
+add_entrypoint_object(
fminimum_magf128
SRCS
fminimum_magf128.cpp
@@ -2202,7 +2292,6 @@ add_entrypoint_object(
-O3
)
-
add_entrypoint_object(
fminimum_mag_num
SRCS
@@ -2240,6 +2329,19 @@ add_entrypoint_object(
)
add_entrypoint_object(
+ fminimum_mag_numf16
+ SRCS
+ fminimum_mag_numf16.cpp
+ HDRS
+ ../fminimum_mag_numf16.h
+ DEPENDS
+ libc.src.__support.macros.properties.types
+ libc.src.__support.FPUtil.basic_operations
+ COMPILE_OPTIONS
+ -O3
+)
+
+add_entrypoint_object(
fminimum_mag_numf128
SRCS
fminimum_mag_numf128.cpp
@@ -2551,6 +2653,19 @@ add_entrypoint_object(
)
add_entrypoint_object(
+ nextafterf16
+ SRCS
+ nextafterf16.cpp
+ HDRS
+ ../nextafterf16.h
+ DEPENDS
+ libc.src.__support.macros.properties.types
+ libc.src.__support.FPUtil.manipulation_functions
+ COMPILE_OPTIONS
+ -O3
+)
+
+add_entrypoint_object(
nextafterf128
SRCS
nextafterf128.cpp
@@ -2600,6 +2715,19 @@ add_entrypoint_object(
)
add_entrypoint_object(
+ nexttowardf16
+ SRCS
+ nexttowardf16.cpp
+ HDRS
+ ../nexttowardf16.h
+ DEPENDS
+ libc.src.__support.macros.properties.types
+ libc.src.__support.FPUtil.manipulation_functions
+ COMPILE_OPTIONS
+ -O3
+)
+
+add_entrypoint_object(
nextdown
SRCS
nextdown.cpp
@@ -2636,6 +2764,19 @@ add_entrypoint_object(
)
add_entrypoint_object(
+ nextdownf16
+ SRCS
+ nextdownf16.cpp
+ HDRS
+ ../nextdownf16.h
+ DEPENDS
+ libc.src.__support.macros.properties.types
+ libc.src.__support.FPUtil.manipulation_functions
+ COMPILE_OPTIONS
+ -O3
+)
+
+add_entrypoint_object(
nextdownf128
SRCS
nextdownf128.cpp
@@ -2685,6 +2826,19 @@ add_entrypoint_object(
)
add_entrypoint_object(
+ nextupf16
+ SRCS
+ nextupf16.cpp
+ HDRS
+ ../nextupf16.h
+ DEPENDS
+ libc.src.__support.macros.properties.types
+ libc.src.__support.FPUtil.manipulation_functions
+ COMPILE_OPTIONS
+ -O3
+)
+
+add_entrypoint_object(
nextupf128
SRCS
nextupf128.cpp
diff --git a/libc/src/math/generic/fmaximum_mag_numf16.cpp b/libc/src/math/generic/fmaximum_mag_numf16.cpp
new file mode 100644
index 000000000000..5055802c4cf8
--- /dev/null
+++ b/libc/src/math/generic/fmaximum_mag_numf16.cpp
@@ -0,0 +1,19 @@
+//===-- Implementation of fmaximum_mag_numf16 function --------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/math/fmaximum_mag_numf16.h"
+#include "src/__support/FPUtil/BasicOperations.h"
+#include "src/__support/common.h"
+
+namespace LIBC_NAMESPACE {
+
+LLVM_LIBC_FUNCTION(float16, fmaximum_mag_numf16, (float16 x, float16 y)) {
+ return fputil::fmaximum_mag_num(x, y);
+}
+
+} // namespace LIBC_NAMESPACE
diff --git a/libc/src/math/generic/fmaximum_magf16.cpp b/libc/src/math/generic/fmaximum_magf16.cpp
new file mode 100644
index 000000000000..fbd5eaccf309
--- /dev/null
+++ b/libc/src/math/generic/fmaximum_magf16.cpp
@@ -0,0 +1,19 @@
+//===-- Implementation of fmaximum_magf16 function ------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/math/fmaximum_magf16.h"
+#include "src/__support/FPUtil/BasicOperations.h"
+#include "src/__support/common.h"
+
+namespace LIBC_NAMESPACE {
+
+LLVM_LIBC_FUNCTION(float16, fmaximum_magf16, (float16 x, float16 y)) {
+ return fputil::fmaximum_mag(x, y);
+}
+
+} // namespace LIBC_NAMESPACE
diff --git a/libc/src/math/generic/fmaximum_numf16.cpp b/libc/src/math/generic/fmaximum_numf16.cpp
new file mode 100644
index 000000000000..187cfbeee6e2
--- /dev/null
+++ b/libc/src/math/generic/fmaximum_numf16.cpp
@@ -0,0 +1,19 @@
+//===-- Implementation of fmaximum_numf16 function ------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/math/fmaximum_numf16.h"
+#include "src/__support/FPUtil/BasicOperations.h"
+#include "src/__support/common.h"
+
+namespace LIBC_NAMESPACE {
+
+LLVM_LIBC_FUNCTION(float16, fmaximum_numf16, (float16 x, float16 y)) {
+ return fputil::fmaximum_num(x, y);
+}
+
+} // namespace LIBC_NAMESPACE
diff --git a/libc/src/math/generic/fmaximumf16.cpp b/libc/src/math/generic/fmaximumf16.cpp
new file mode 100644
index 000000000000..9e194d2ecef6
--- /dev/null
+++ b/libc/src/math/generic/fmaximumf16.cpp
@@ -0,0 +1,19 @@
+//===-- Implementation of fmaximumf16 function ----------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/math/fmaximumf16.h"
+#include "src/__support/FPUtil/BasicOperations.h"
+#include "src/__support/common.h"
+
+namespace LIBC_NAMESPACE {
+
+LLVM_LIBC_FUNCTION(float16, fmaximumf16, (float16 x, float16 y)) {
+ return fputil::fmaximum(x, y);
+}
+
+} // namespace LIBC_NAMESPACE
diff --git a/libc/src/math/generic/fminimum_mag_numf16.cpp b/libc/src/math/generic/fminimum_mag_numf16.cpp
new file mode 100644
index 000000000000..1a893c6c4bbc
--- /dev/null
+++ b/libc/src/math/generic/fminimum_mag_numf16.cpp
@@ -0,0 +1,19 @@
+//===-- Implementation of fminimum_mag_numf16 function --------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/math/fminimum_mag_numf16.h"
+#include "src/__support/FPUtil/BasicOperations.h"
+#include "src/__support/common.h"
+
+namespace LIBC_NAMESPACE {
+
+LLVM_LIBC_FUNCTION(float16, fminimum_mag_numf16, (float16 x, float16 y)) {
+ return fputil::fminimum_mag_num(x, y);
+}
+
+} // namespace LIBC_NAMESPACE
diff --git a/libc/src/math/generic/fminimum_magf16.cpp b/libc/src/math/generic/fminimum_magf16.cpp
new file mode 100644
index 000000000000..45183a963e2d
--- /dev/null
+++ b/libc/src/math/generic/fminimum_magf16.cpp
@@ -0,0 +1,19 @@
+//===-- Implementation of fminimum_magf16 function ------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/math/fminimum_magf16.h"
+#include "src/__support/FPUtil/BasicOperations.h"
+#include "src/__support/common.h"
+
+namespace LIBC_NAMESPACE {
+
+LLVM_LIBC_FUNCTION(float16, fminimum_magf16, (float16 x, float16 y)) {
+ return fputil::fminimum_mag(x, y);
+}
+
+} // namespace LIBC_NAMESPACE
diff --git a/libc/src/math/generic/fminimum_numf16.cpp b/libc/src/math/generic/fminimum_numf16.cpp
new file mode 100644
index 000000000000..825ad3e7b63a
--- /dev/null
+++ b/libc/src/math/generic/fminimum_numf16.cpp
@@ -0,0 +1,19 @@
+//===-- Implementation of fminimum_numf16 function ------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/math/fminimum_numf16.h"
+#include "src/__support/FPUtil/BasicOperations.h"
+#include "src/__support/common.h"
+
+namespace LIBC_NAMESPACE {
+
+LLVM_LIBC_FUNCTION(float16, fminimum_numf16, (float16 x, float16 y)) {
+ return fputil::fminimum_num(x, y);
+}
+
+} // namespace LIBC_NAMESPACE
diff --git a/libc/src/math/generic/fminimumf16.cpp b/libc/src/math/generic/fminimumf16.cpp
new file mode 100644
index 000000000000..16f738be7e58
--- /dev/null
+++ b/libc/src/math/generic/fminimumf16.cpp
@@ -0,0 +1,19 @@
+//===-- Implementation of fminimumf16 function ----------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/math/fminimumf16.h"
+#include "src/__support/FPUtil/BasicOperations.h"
+#include "src/__support/common.h"
+
+namespace LIBC_NAMESPACE {
+
+LLVM_LIBC_FUNCTION(float16, fminimumf16, (float16 x, float16 y)) {
+ return fputil::fminimum(x, y);
+}
+
+} // namespace LIBC_NAMESPACE
diff --git a/libc/src/math/generic/nextafterf16.cpp b/libc/src/math/generic/nextafterf16.cpp
new file mode 100644
index 000000000000..144b3fc61461
--- /dev/null
+++ b/libc/src/math/generic/nextafterf16.cpp
@@ -0,0 +1,19 @@
+//===-- Implementation of nextafterf16 function ---------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/math/nextafterf16.h"
+#include "src/__support/FPUtil/ManipulationFunctions.h"
+#include "src/__support/common.h"
+
+namespace LIBC_NAMESPACE {
+
+LLVM_LIBC_FUNCTION(float16, nextafterf16, (float16 x, float16 y)) {
+ return fputil::nextafter(x, y);
+}
+
+} // namespace LIBC_NAMESPACE
diff --git a/libc/src/math/generic/nextdownf16.cpp b/libc/src/math/generic/nextdownf16.cpp
new file mode 100644
index 000000000000..9fdaa9dafdd8
--- /dev/null
+++ b/libc/src/math/generic/nextdownf16.cpp
@@ -0,0 +1,19 @@
+//===-- Implementation of nextdownf16 function ----------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/math/nextdownf16.h"
+#include "src/__support/FPUtil/ManipulationFunctions.h"
+#include "src/__support/common.h"
+
+namespace LIBC_NAMESPACE {
+
+LLVM_LIBC_FUNCTION(float16, nextdownf16, (float16 x)) {
+ return fputil::nextupdown</*IsDown=*/true>(x);
+}
+
+} // namespace LIBC_NAMESPACE
diff --git a/libc/src/math/generic/nexttowardf16.cpp b/libc/src/math/generic/nexttowardf16.cpp
new file mode 100644
index 000000000000..d1d78e8f22d3
--- /dev/null
+++ b/libc/src/math/generic/nexttowardf16.cpp
@@ -0,0 +1,21 @@
+//===-- Implementation of nexttowardf16 function --------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/math/nexttowardf16.h"
+#include "src/__support/FPUtil/ManipulationFunctions.h"
+#include "src/__support/common.h"
+
+namespace LIBC_NAMESPACE {
+
+LLVM_LIBC_FUNCTION(float16, nexttowardf16, (float16 x, long double y)) {
+ // We can reuse the nextafter implementation because the internal nextafter is
+ // templated on the types of the arguments.
+ return fputil::nextafter(x, y);
+}
+
+} // namespace LIBC_NAMESPACE
diff --git a/libc/src/math/generic/nextupf16.cpp b/libc/src/math/generic/nextupf16.cpp
new file mode 100644
index 000000000000..5d3d52c94068
--- /dev/null
+++ b/libc/src/math/generic/nextupf16.cpp
@@ -0,0 +1,19 @@
+//===-- Implementation of nextupf16 function ------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/math/nextupf16.h"
+#include "src/__support/FPUtil/ManipulationFunctions.h"
+#include "src/__support/common.h"
+
+namespace LIBC_NAMESPACE {
+
+LLVM_LIBC_FUNCTION(float16, nextupf16, (float16 x)) {
+ return fputil::nextupdown</*IsDown=*/false>(x);
+}
+
+} // namespace LIBC_NAMESPACE
diff --git a/libc/src/math/nextafterf16.h b/libc/src/math/nextafterf16.h
new file mode 100644
index 000000000000..293569ef40c5
--- /dev/null
+++ b/libc/src/math/nextafterf16.h
@@ -0,0 +1,20 @@
+//===-- Implementation header for nextafterf16 ------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_MATH_NEXTAFTERF16_H
+#define LLVM_LIBC_SRC_MATH_NEXTAFTERF16_H
+
+#include "src/__support/macros/properties/types.h"
+
+namespace LIBC_NAMESPACE {
+
+float16 nextafterf16(float16 x, float16 y);
+
+} // namespace LIBC_NAMESPACE
+
+#endif // LLVM_LIBC_SRC_MATH_NEXTAFTERF16_H
diff --git a/libc/src/math/nextdownf16.h b/libc/src/math/nextdownf16.h
new file mode 100644
index 000000000000..19137574ac92
--- /dev/null
+++ b/libc/src/math/nextdownf16.h
@@ -0,0 +1,20 @@
+//===-- Implementation header for nextdownf16 -------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_MATH_NEXTDOWNF16_H
+#define LLVM_LIBC_SRC_MATH_NEXTDOWNF16_H
+
+#include "src/__support/macros/properties/types.h"
+
+namespace LIBC_NAMESPACE {
+
+float16 nextdownf16(float16 x);
+
+} // namespace LIBC_NAMESPACE
+
+#endif // LLVM_LIBC_SRC_MATH_NEXTDOWNF16_H
diff --git a/libc/src/math/nexttowardf16.h b/libc/src/math/nexttowardf16.h
new file mode 100644
index 000000000000..604eb32c2577
--- /dev/null
+++ b/libc/src/math/nexttowardf16.h
@@ -0,0 +1,20 @@
+//===-- Implementation header for nexttowardf16 -----------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_MATH_NEXTTOWARDF16_H
+#define LLVM_LIBC_SRC_MATH_NEXTTOWARDF16_H
+
+#include "src/__support/macros/properties/types.h"
+
+namespace LIBC_NAMESPACE {
+
+float16 nexttowardf16(float16 x, long double y);
+
+} // namespace LIBC_NAMESPACE
+
+#endif // LLVM_LIBC_SRC_MATH_NEXTTOWARDF16_H
diff --git a/libc/src/math/nextupf16.h b/libc/src/math/nextupf16.h
new file mode 100644
index 000000000000..b2973e4afc25
--- /dev/null
+++ b/libc/src/math/nextupf16.h
@@ -0,0 +1,20 @@
+//===-- Implementation header for nextupf16 ---------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_MATH_NEXTUPF16_H
+#define LLVM_LIBC_SRC_MATH_NEXTUPF16_H
+
+#include "src/__support/macros/properties/types.h"
+
+namespace LIBC_NAMESPACE {
+
+float16 nextupf16(float16 x);
+
+} // namespace LIBC_NAMESPACE
+
+#endif // LLVM_LIBC_SRC_MATH_NEXTUPF16_H
diff --git a/libc/src/stdlib/CMakeLists.txt b/libc/src/stdlib/CMakeLists.txt
index e0bff5198b59..f0091ad367c0 100644
--- a/libc/src/stdlib/CMakeLists.txt
+++ b/libc/src/stdlib/CMakeLists.txt
@@ -50,6 +50,7 @@ add_entrypoint_object(
quick_exit.h
DEPENDS
libc.src.__support.OSUtil.osutil
+ .exit_handler
)
add_entrypoint_object(
@@ -415,14 +416,17 @@ add_entrypoint_object(
libc.src.__support.OSUtil.osutil
)
-add_entrypoint_object(
- atexit
+# TODO: Move all exit functions to linux specific
+
+if (TARGET libc.src.__support.threads.mutex)
+add_object_library(
+ exit_handler
SRCS
- atexit.cpp
+ exit_handler.cpp
HDRS
- atexit.h
+ exit_handler.h
CXX_STANDARD
- 20 # For constinit of the atexit callback list.
+ 20 # For constinit
DEPENDS
libc.src.__support.CPP.mutex
libc.src.__support.CPP.new
@@ -431,6 +435,27 @@ add_entrypoint_object(
libc.src.__support.fixedvector
libc.src.__support.threads.mutex
)
+endif()
+
+add_entrypoint_object(
+ atexit
+ SRCS
+ atexit.cpp
+ HDRS
+ atexit.h
+ DEPENDS
+ .exit_handler
+)
+
+add_entrypoint_object(
+ at_quick_exit
+ SRCS
+ at_quick_exit.cpp
+ HDRS
+ at_quick_exit.h
+ DEPENDS
+ .exit_handler
+)
add_entrypoint_object(
exit
@@ -442,6 +467,7 @@ add_entrypoint_object(
._Exit
.atexit
libc.src.__support.OSUtil.osutil
+ .exit_handler
)
add_entrypoint_object(
diff --git a/libc/src/stdlib/at_quick_exit.cpp b/libc/src/stdlib/at_quick_exit.cpp
new file mode 100644
index 000000000000..752d67e7fe44
--- /dev/null
+++ b/libc/src/stdlib/at_quick_exit.cpp
@@ -0,0 +1,22 @@
+//===-- Implementation of at_quick_exit -----------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/stdlib/at_quick_exit.h"
+#include "hdr/types/atexithandler_t.h"
+#include "src/__support/common.h"
+#include "src/stdlib/exit_handler.h"
+
+namespace LIBC_NAMESPACE {
+
+LLVM_LIBC_FUNCTION(int, at_quick_exit, (__atexithandler_t callback)) {
+ return add_atexit_unit(
+ at_quick_exit_callbacks,
+ {&stdc_at_exit_func, reinterpret_cast<void *>(callback)});
+}
+
+} // namespace LIBC_NAMESPACE
diff --git a/libc/src/stdlib/at_quick_exit.h b/libc/src/stdlib/at_quick_exit.h
new file mode 100644
index 000000000000..c36c797088ab
--- /dev/null
+++ b/libc/src/stdlib/at_quick_exit.h
@@ -0,0 +1,20 @@
+//===-- Implementation header for at_quick_exit -----------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_STDLIB_AT_QUICK_EXIT_H
+#define LLVM_LIBC_SRC_STDLIB_AT_QUICK_EXIT_H
+
+#include "hdr/types/atexithandler_t.h"
+
+namespace LIBC_NAMESPACE {
+
+int at_quick_exit(__atexithandler_t);
+
+} // namespace LIBC_NAMESPACE
+
+#endif // LLVM_LIBC_SRC_STDLIB_AT_QUICK_EXIT_H
diff --git a/libc/src/stdlib/atexit.cpp b/libc/src/stdlib/atexit.cpp
index 9e37c4cf256c..ca3cbfe87a88 100644
--- a/libc/src/stdlib/atexit.cpp
+++ b/libc/src/stdlib/atexit.cpp
@@ -7,95 +7,28 @@
//===----------------------------------------------------------------------===//
#include "src/stdlib/atexit.h"
-#include "src/__support/CPP/mutex.h" // lock_guard
-#include "src/__support/blockstore.h"
+#include "hdr/types/atexithandler_t.h"
#include "src/__support/common.h"
-#include "src/__support/fixedvector.h"
-#include "src/__support/threads/mutex.h"
+#include "src/stdlib/exit_handler.h"
namespace LIBC_NAMESPACE {
-namespace {
-
-Mutex handler_list_mtx(/*timed=*/false, /*recursive=*/false, /*robust=*/false,
- /*pshared=*/false);
-
-using AtExitCallback = void(void *);
-using StdCAtExitCallback = void(void);
-
-struct AtExitUnit {
- AtExitCallback *callback = nullptr;
- void *payload = nullptr;
- constexpr AtExitUnit() = default;
- constexpr AtExitUnit(AtExitCallback *c, void *p) : callback(c), payload(p) {}
-};
-
-#if defined(LIBC_TARGET_ARCH_IS_GPU)
-// The GPU build cannot handle the potentially recursive definitions required by
-// the BlockStore class. Additionally, the liklihood that someone exceeds this
-// while executing on the GPU is extremely small.
-// FIXME: It is not generally safe to use 'atexit' on the GPU because the
-// mutexes simply passthrough. We will need a lock free stack.
-using ExitCallbackList = FixedVector<AtExitUnit, 64>;
-#elif defined(LIBC_COPT_PUBLIC_PACKAGING)
-using ExitCallbackList = ReverseOrderBlockStore<AtExitUnit, 32>;
-#else
-// BlockStore uses dynamic memory allocation. To avoid dynamic memory
-// allocation in tests, we use a fixed size callback list when built for
-// tests.
-// If we use BlockStore, then we will have to pull in malloc etc into
-// the tests. While this is not bad, the problem we have currently is
-// that LLVM libc' allocator is SCUDO. So, we will end up pulling SCUDO's
-// deps also (some of which are not yet available in LLVM libc) into the
-// integration tests.
-using ExitCallbackList = FixedVector<AtExitUnit, CALLBACK_LIST_SIZE_FOR_TESTS>;
-#endif // LIBC_COPT_PUBLIC_PACKAGING
-
-constinit ExitCallbackList exit_callbacks;
-
-void stdc_at_exit_func(void *payload) {
- reinterpret_cast<StdCAtExitCallback *>(payload)();
-}
-
-void call_exit_callbacks() {
- handler_list_mtx.lock();
- while (!exit_callbacks.empty()) {
- AtExitUnit &unit = exit_callbacks.back();
- exit_callbacks.pop_back();
- handler_list_mtx.unlock();
- unit.callback(unit.payload);
- handler_list_mtx.lock();
- }
- ExitCallbackList::destroy(&exit_callbacks);
-}
-
-int add_atexit_unit(const AtExitUnit &unit) {
- cpp::lock_guard lock(handler_list_mtx);
- if (exit_callbacks.push_back(unit))
- return 0;
- return -1;
-}
-
-} // namespace
-
extern "C" {
-// TODO: Handle the last dso handle argument.
int __cxa_atexit(AtExitCallback *callback, void *payload, void *) {
- return add_atexit_unit({callback, payload});
+ return add_atexit_unit(atexit_callbacks, {callback, payload});
}
-// TODO: Handle the dso handle argument. call_exit_callbacks should only invoke
-// the callbacks from this DSO. Requires adding support for __dso_handle.
void __cxa_finalize(void *dso) {
if (!dso)
- call_exit_callbacks();
+ call_exit_callbacks(atexit_callbacks);
}
} // extern "C"
-LLVM_LIBC_FUNCTION(int, atexit, (StdCAtExitCallback * callback)) {
+LLVM_LIBC_FUNCTION(int, atexit, (__atexithandler_t callback)) {
return add_atexit_unit(
+ atexit_callbacks,
{&stdc_at_exit_func, reinterpret_cast<void *>(callback)});
}
diff --git a/libc/src/stdlib/atexit.h b/libc/src/stdlib/atexit.h
index 7cf9d7c92191..7faaf654247c 100644
--- a/libc/src/stdlib/atexit.h
+++ b/libc/src/stdlib/atexit.h
@@ -9,13 +9,10 @@
#ifndef LLVM_LIBC_SRC_STDLIB_ATEXIT_H
#define LLVM_LIBC_SRC_STDLIB_ATEXIT_H
-#include <stddef.h> // For size_t
-
+#include "hdr/types/atexithandler_t.h"
namespace LIBC_NAMESPACE {
-constexpr size_t CALLBACK_LIST_SIZE_FOR_TESTS = 1024;
-
-int atexit(void (*function)());
+int atexit(__atexithandler_t);
} // namespace LIBC_NAMESPACE
diff --git a/libc/src/stdlib/exit_handler.cpp b/libc/src/stdlib/exit_handler.cpp
new file mode 100644
index 000000000000..ed41247e4a31
--- /dev/null
+++ b/libc/src/stdlib/exit_handler.cpp
@@ -0,0 +1,42 @@
+//===--- Implementation of exit_handler------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/stdlib/exit_handler.h"
+#include "src/__support/CPP/mutex.h" // lock_guard
+
+namespace LIBC_NAMESPACE {
+
+constinit ExitCallbackList at_quick_exit_callbacks;
+constinit ExitCallbackList atexit_callbacks;
+
+Mutex handler_list_mtx(false, false, false, false);
+
+void stdc_at_exit_func(void *payload) {
+ reinterpret_cast<StdCAtExitCallback *>(payload)();
+}
+
+void call_exit_callbacks(ExitCallbackList &callbacks) {
+ handler_list_mtx.lock();
+ while (!callbacks.empty()) {
+ AtExitUnit &unit = callbacks.back();
+ callbacks.pop_back();
+ handler_list_mtx.unlock();
+ unit.callback(unit.payload);
+ handler_list_mtx.lock();
+ }
+ ExitCallbackList::destroy(&callbacks);
+}
+
+int add_atexit_unit(ExitCallbackList &callbacks, const AtExitUnit &unit) {
+ cpp::lock_guard lock(handler_list_mtx);
+ if (callbacks.push_back(unit))
+ return 0;
+ return -1;
+}
+
+} // namespace LIBC_NAMESPACE
diff --git a/libc/src/stdlib/exit_handler.h b/libc/src/stdlib/exit_handler.h
new file mode 100644
index 000000000000..8494c2f2e526
--- /dev/null
+++ b/libc/src/stdlib/exit_handler.h
@@ -0,0 +1,53 @@
+//===-- Implementation header for exit_handler ------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_STDLIB_EXIT_HANDLER_H
+#define LLVM_LIBC_SRC_STDLIB_EXIT_HANDLER_H
+
+#include "src/__support/CPP/mutex.h" // lock_guard
+#include "src/__support/blockstore.h"
+#include "src/__support/common.h"
+#include "src/__support/fixedvector.h"
+#include "src/__support/threads/mutex.h"
+
+namespace LIBC_NAMESPACE {
+
+using AtExitCallback = void(void *);
+using StdCAtExitCallback = void(void);
+constexpr size_t CALLBACK_LIST_SIZE_FOR_TESTS = 1024;
+
+struct AtExitUnit {
+ AtExitCallback *callback = nullptr;
+ void *payload = nullptr;
+ LIBC_INLINE constexpr AtExitUnit() = default;
+ LIBC_INLINE constexpr AtExitUnit(AtExitCallback *c, void *p)
+ : callback(c), payload(p) {}
+};
+
+#if defined(LIBC_TARGET_ARCH_IS_GPU)
+using ExitCallbackList = FixedVector<AtExitUnit, 64>;
+#elif defined(LIBC_COPT_PUBLIC_PACKAGING)
+using ExitCallbackList = ReverseOrderBlockStore<AtExitUnit, 32>;
+#else
+using ExitCallbackList = FixedVector<AtExitUnit, CALLBACK_LIST_SIZE_FOR_TESTS>;
+#endif
+
+extern ExitCallbackList atexit_callbacks;
+extern ExitCallbackList at_quick_exit_callbacks;
+
+extern Mutex handler_list_mtx;
+
+void stdc_at_exit_func(void *payload);
+
+void call_exit_callbacks(ExitCallbackList &callbacks);
+
+int add_atexit_unit(ExitCallbackList &callbacks, const AtExitUnit &unit);
+
+} // namespace LIBC_NAMESPACE
+
+#endif // LLVM_LIBC_SRC_STDLIB_EXIT_HANDLER_H
diff --git a/libc/src/stdlib/quick_exit.cpp b/libc/src/stdlib/quick_exit.cpp
index cf7f07bf2439..38f0a3db3e2c 100644
--- a/libc/src/stdlib/quick_exit.cpp
+++ b/libc/src/stdlib/quick_exit.cpp
@@ -9,13 +9,15 @@
#include "src/stdlib/quick_exit.h"
#include "src/__support/OSUtil/exit.h"
#include "src/__support/common.h"
+#include "src/stdlib/exit_handler.h"
// extern "C" void __cxa_finalize(void *);
-
namespace LIBC_NAMESPACE {
+extern ExitCallbackList at_quick_exit_callbacks;
+
[[noreturn]] LLVM_LIBC_FUNCTION(void, quick_exit, (int status)) {
- // __cxa_finalize(nullptr);
+ call_exit_callbacks(at_quick_exit_callbacks);
internal::exit(status);
}
diff --git a/libc/test/src/__support/CMakeLists.txt b/libc/test/src/__support/CMakeLists.txt
index 663aa2bb82ca..d05377eca8a8 100644
--- a/libc/test/src/__support/CMakeLists.txt
+++ b/libc/test/src/__support/CMakeLists.txt
@@ -86,8 +86,8 @@ add_libc_test(
libc.src.__support.uint128
)
-# The GPU does not support varargs currently.
-if(NOT LIBC_TARGET_OS_IS_GPU)
+# NVPTX does not support varargs currently.
+if(NOT LIBC_TARGET_ARCHITECTURE_IS_NVPTX)
add_libc_test(
arg_list_test
SUITE
@@ -132,6 +132,7 @@ add_libc_test(
SRCS
fixedvector_test.cpp
DEPENDS
+ libc.src.__support.CPP.array
libc.src.__support.fixedvector
)
diff --git a/libc/test/src/__support/fixedvector_test.cpp b/libc/test/src/__support/fixedvector_test.cpp
index e9ffdd0203c2..212e1aed20f7 100644
--- a/libc/test/src/__support/fixedvector_test.cpp
+++ b/libc/test/src/__support/fixedvector_test.cpp
@@ -6,6 +6,7 @@
//
//===----------------------------------------------------------------------===//
+#include "src/__support/CPP/array.h"
#include "src/__support/fixedvector.h"
#include "test/UnitTest/Test.h"
@@ -69,3 +70,29 @@ TEST(LlvmLibcFixedVectorTest, Iteration) {
for (int &x : v)
ASSERT_GE(x, 0);
}
+
+TEST(LlvmLibcFixedVectorTest, ConstructionFromIterators) {
+ LIBC_NAMESPACE::cpp::array<int, 4> arr{1, 2, 3, 4};
+ LIBC_NAMESPACE::FixedVector<int, 5> vec(arr.begin(), arr.end());
+ ASSERT_EQ(vec.size(), arr.size());
+ for (size_t i = 0; i < arr.size(); ++i)
+ ASSERT_EQ(vec[i], arr[i]);
+}
+
+TEST(LlvmLibcFixedVectorTest, ConstructionFromCountAndValue) {
+ constexpr int kVal = 10;
+ LIBC_NAMESPACE::FixedVector<int, 5> vec(4, kVal);
+ ASSERT_EQ(vec.size(), size_t(4));
+ for (size_t i = 0; i < vec.size(); ++i)
+ ASSERT_EQ(vec[i], kVal);
+}
+
+TEST(LlvmLibcFixedVectorTest, ForwardIteration) {
+ LIBC_NAMESPACE::cpp::array<int, 4> arr{1, 2, 3, 4};
+ LIBC_NAMESPACE::FixedVector<int, 5> vec(arr.begin(), arr.end());
+ ASSERT_EQ(vec.size(), arr.size());
+ for (auto it = vec.begin(); it != vec.end(); ++it) {
+ auto idx = it - vec.begin();
+ ASSERT_EQ(*it, arr[idx]);
+ }
+}
diff --git a/libc/test/src/math/smoke/CMakeLists.txt b/libc/test/src/math/smoke/CMakeLists.txt
index a331c02a12ea..110fa1de97d6 100644
--- a/libc/test/src/math/smoke/CMakeLists.txt
+++ b/libc/test/src/math/smoke/CMakeLists.txt
@@ -1705,6 +1705,7 @@ add_fp_unittest(
FMinTest.h
DEPENDS
libc.src.math.fminf
+ libc.src.__support.CPP.algorithm
libc.src.__support.FPUtil.fp_bits
)
@@ -1718,6 +1719,7 @@ add_fp_unittest(
FMinTest.h
DEPENDS
libc.src.math.fmin
+ libc.src.__support.CPP.algorithm
libc.src.__support.FPUtil.fp_bits
)
@@ -1731,6 +1733,7 @@ add_fp_unittest(
FMinTest.h
DEPENDS
libc.src.math.fminl
+ libc.src.__support.CPP.algorithm
libc.src.__support.FPUtil.fp_bits
)
@@ -1744,6 +1747,7 @@ add_fp_unittest(
FMinTest.h
DEPENDS
libc.src.math.fminf128
+ libc.src.__support.CPP.algorithm
libc.src.__support.FPUtil.fp_bits
)
@@ -1757,6 +1761,7 @@ add_fp_unittest(
FMinTest.h
DEPENDS
libc.src.math.fminf16
+ libc.src.__support.CPP.algorithm
libc.src.__support.FPUtil.fp_bits
)
@@ -1770,6 +1775,7 @@ add_fp_unittest(
FMaxTest.h
DEPENDS
libc.src.math.fmaxf
+ libc.src.__support.CPP.algorithm
libc.src.__support.FPUtil.fp_bits
)
@@ -1783,6 +1789,7 @@ add_fp_unittest(
FMaxTest.h
DEPENDS
libc.src.math.fmax
+ libc.src.__support.CPP.algorithm
libc.src.__support.FPUtil.fp_bits
)
@@ -1796,6 +1803,7 @@ add_fp_unittest(
FMaxTest.h
DEPENDS
libc.src.math.fmaxl
+ libc.src.__support.CPP.algorithm
libc.src.__support.FPUtil.fp_bits
)
@@ -1809,6 +1817,7 @@ add_fp_unittest(
FMaxTest.h
DEPENDS
libc.src.math.fmaxf128
+ libc.src.__support.CPP.algorithm
libc.src.__support.FPUtil.fp_bits
)
@@ -1822,6 +1831,7 @@ add_fp_unittest(
FMaxTest.h
DEPENDS
libc.src.math.fmaxf16
+ libc.src.__support.CPP.algorithm
libc.src.__support.FPUtil.fp_bits
)
@@ -1835,6 +1845,21 @@ add_fp_unittest(
FMaximumTest.h
DEPENDS
libc.src.math.fmaximuml
+ libc.src.__support.CPP.algorithm
+ libc.src.__support.FPUtil.fp_bits
+)
+
+add_fp_unittest(
+ fmaximumf16_test
+ SUITE
+ libc-math-smoke-tests
+ SRCS
+ fmaximumf16_test.cpp
+ HDRS
+ FMaximumTest.h
+ DEPENDS
+ libc.src.math.fmaximumf16
+ libc.src.__support.CPP.algorithm
libc.src.__support.FPUtil.fp_bits
)
@@ -1848,6 +1873,7 @@ add_fp_unittest(
FMaximumTest.h
DEPENDS
libc.src.math.fmaximumf128
+ libc.src.__support.CPP.algorithm
libc.src.__support.FPUtil.fp_bits
)
@@ -1861,6 +1887,7 @@ add_fp_unittest(
FMaximumTest.h
DEPENDS
libc.src.math.fmaximum
+ libc.src.__support.CPP.algorithm
libc.src.__support.FPUtil.fp_bits
)
@@ -1874,6 +1901,7 @@ add_fp_unittest(
FMaximumTest.h
DEPENDS
libc.src.math.fmaximumf
+ libc.src.__support.CPP.algorithm
libc.src.__support.FPUtil.fp_bits
)
@@ -1887,6 +1915,7 @@ add_fp_unittest(
FMaximumNumTest.h
DEPENDS
libc.src.math.fmaximum_numf
+ libc.src.__support.CPP.algorithm
libc.src.__support.FPUtil.fp_bits
)
@@ -1900,6 +1929,7 @@ add_fp_unittest(
FMaximumNumTest.h
DEPENDS
libc.src.math.fmaximum_num
+ libc.src.__support.CPP.algorithm
libc.src.__support.FPUtil.fp_bits
)
@@ -1913,6 +1943,21 @@ add_fp_unittest(
FMaximumNumTest.h
DEPENDS
libc.src.math.fmaximum_numl
+ libc.src.__support.CPP.algorithm
+ libc.src.__support.FPUtil.fp_bits
+)
+
+add_fp_unittest(
+ fmaximum_numf16_test
+ SUITE
+ libc-math-smoke-tests
+ SRCS
+ fmaximum_numf16_test.cpp
+ HDRS
+ FMaximumNumTest.h
+ DEPENDS
+ libc.src.math.fmaximum_numf16
+ libc.src.__support.CPP.algorithm
libc.src.__support.FPUtil.fp_bits
)
@@ -1926,6 +1971,7 @@ add_fp_unittest(
FMaximumNumTest.h
DEPENDS
libc.src.math.fmaximum_numf128
+ libc.src.__support.CPP.algorithm
libc.src.__support.FPUtil.fp_bits
)
@@ -1939,6 +1985,8 @@ add_fp_unittest(
FMaximumMagTest.h
DEPENDS
libc.src.math.fmaximum_magf
+ libc.src.__support.CPP.algorithm
+ libc.src.__support.FPUtil.basic_operations
libc.src.__support.FPUtil.fp_bits
)
@@ -1952,6 +2000,8 @@ add_fp_unittest(
FMaximumMagTest.h
DEPENDS
libc.src.math.fmaximum_mag
+ libc.src.__support.CPP.algorithm
+ libc.src.__support.FPUtil.basic_operations
libc.src.__support.FPUtil.fp_bits
)
@@ -1965,6 +2015,23 @@ add_fp_unittest(
FMaximumMagTest.h
DEPENDS
libc.src.math.fmaximum_magl
+ libc.src.__support.CPP.algorithm
+ libc.src.__support.FPUtil.basic_operations
+ libc.src.__support.FPUtil.fp_bits
+)
+
+add_fp_unittest(
+ fmaximum_magf16_test
+ SUITE
+ libc-math-smoke-tests
+ SRCS
+ fmaximum_magf16_test.cpp
+ HDRS
+ FMaximumMagTest.h
+ DEPENDS
+ libc.src.math.fmaximum_magf16
+ libc.src.__support.CPP.algorithm
+ libc.src.__support.FPUtil.basic_operations
libc.src.__support.FPUtil.fp_bits
)
@@ -1978,10 +2045,11 @@ add_fp_unittest(
FMaximumMagTest.h
DEPENDS
libc.src.math.fmaximum_magf128
+ libc.src.__support.CPP.algorithm
+ libc.src.__support.FPUtil.basic_operations
libc.src.__support.FPUtil.fp_bits
)
-
add_fp_unittest(
fmaximum_mag_numf_test
SUITE
@@ -1992,6 +2060,7 @@ add_fp_unittest(
FMaximumMagNumTest.h
DEPENDS
libc.src.math.fmaximum_mag_numf
+ libc.src.__support.FPUtil.basic_operations
libc.src.__support.FPUtil.fp_bits
)
@@ -2005,6 +2074,7 @@ add_fp_unittest(
FMaximumMagNumTest.h
DEPENDS
libc.src.math.fmaximum_mag_num
+ libc.src.__support.FPUtil.basic_operations
libc.src.__support.FPUtil.fp_bits
)
@@ -2018,6 +2088,21 @@ add_fp_unittest(
FMaximumMagNumTest.h
DEPENDS
libc.src.math.fmaximum_mag_numl
+ libc.src.__support.FPUtil.basic_operations
+ libc.src.__support.FPUtil.fp_bits
+)
+
+add_fp_unittest(
+ fmaximum_mag_numf16_test
+ SUITE
+ libc-math-smoke-tests
+ SRCS
+ fmaximum_mag_numf16_test.cpp
+ HDRS
+ FMaximumMagNumTest.h
+ DEPENDS
+ libc.src.math.fmaximum_mag_numf16
+ libc.src.__support.FPUtil.basic_operations
libc.src.__support.FPUtil.fp_bits
)
@@ -2031,6 +2116,7 @@ add_fp_unittest(
FMaximumMagNumTest.h
DEPENDS
libc.src.math.fmaximum_mag_numf128
+ libc.src.__support.FPUtil.basic_operations
libc.src.__support.FPUtil.fp_bits
)
@@ -2044,6 +2130,21 @@ add_fp_unittest(
FMinimumTest.h
DEPENDS
libc.src.math.fminimuml
+ libc.src.__support.CPP.algorithm
+ libc.src.__support.FPUtil.fp_bits
+)
+
+add_fp_unittest(
+ fminimumf16_test
+ SUITE
+ libc-math-smoke-tests
+ SRCS
+ fminimumf16_test.cpp
+ HDRS
+ FMinimumTest.h
+ DEPENDS
+ libc.src.math.fminimumf16
+ libc.src.__support.CPP.algorithm
libc.src.__support.FPUtil.fp_bits
)
@@ -2057,6 +2158,7 @@ add_fp_unittest(
FMinimumTest.h
DEPENDS
libc.src.math.fminimumf128
+ libc.src.__support.CPP.algorithm
libc.src.__support.FPUtil.fp_bits
)
@@ -2070,6 +2172,7 @@ add_fp_unittest(
FMinimumTest.h
DEPENDS
libc.src.math.fminimum
+ libc.src.__support.CPP.algorithm
libc.src.__support.FPUtil.fp_bits
)
@@ -2083,6 +2186,7 @@ add_fp_unittest(
FMinimumTest.h
DEPENDS
libc.src.math.fminimumf
+ libc.src.__support.CPP.algorithm
libc.src.__support.FPUtil.fp_bits
)
@@ -2096,6 +2200,7 @@ add_fp_unittest(
FMinimumNumTest.h
DEPENDS
libc.src.math.fminimum_numf
+ libc.src.__support.CPP.algorithm
libc.src.__support.FPUtil.fp_bits
)
@@ -2109,6 +2214,7 @@ add_fp_unittest(
FMinimumNumTest.h
DEPENDS
libc.src.math.fminimum_num
+ libc.src.__support.CPP.algorithm
libc.src.__support.FPUtil.fp_bits
)
@@ -2122,6 +2228,21 @@ add_fp_unittest(
FMinimumNumTest.h
DEPENDS
libc.src.math.fminimum_numl
+ libc.src.__support.CPP.algorithm
+ libc.src.__support.FPUtil.fp_bits
+)
+
+add_fp_unittest(
+ fminimum_numf16_test
+ SUITE
+ libc-math-smoke-tests
+ SRCS
+ fminimum_numf16_test.cpp
+ HDRS
+ FMinimumNumTest.h
+ DEPENDS
+ libc.src.math.fminimum_numf16
+ libc.src.__support.CPP.algorithm
libc.src.__support.FPUtil.fp_bits
)
@@ -2135,6 +2256,7 @@ add_fp_unittest(
FMinimumNumTest.h
DEPENDS
libc.src.math.fminimum_numf128
+ libc.src.__support.CPP.algorithm
libc.src.__support.FPUtil.fp_bits
)
@@ -2148,6 +2270,7 @@ add_fp_unittest(
FMinimumMagTest.h
DEPENDS
libc.src.math.fminimum_magf
+ libc.src.__support.CPP.algorithm
libc.src.__support.FPUtil.fp_bits
)
@@ -2161,6 +2284,7 @@ add_fp_unittest(
FMinimumMagTest.h
DEPENDS
libc.src.math.fminimum_mag
+ libc.src.__support.CPP.algorithm
libc.src.__support.FPUtil.fp_bits
)
@@ -2174,6 +2298,21 @@ add_fp_unittest(
FMinimumMagTest.h
DEPENDS
libc.src.math.fminimum_magl
+ libc.src.__support.CPP.algorithm
+ libc.src.__support.FPUtil.fp_bits
+)
+
+add_fp_unittest(
+ fminimum_magf16_test
+ SUITE
+ libc-math-smoke-tests
+ SRCS
+ fminimum_magf16_test.cpp
+ HDRS
+ FMinimumMagTest.h
+ DEPENDS
+ libc.src.math.fminimum_magf16
+ libc.src.__support.CPP.algorithm
libc.src.__support.FPUtil.fp_bits
)
@@ -2187,10 +2326,10 @@ add_fp_unittest(
FMinimumMagTest.h
DEPENDS
libc.src.math.fminimum_magf128
+ libc.src.__support.CPP.algorithm
libc.src.__support.FPUtil.fp_bits
)
-
add_fp_unittest(
fminimum_mag_numf_test
SUITE
@@ -2201,6 +2340,7 @@ add_fp_unittest(
FMinimumMagNumTest.h
DEPENDS
libc.src.math.fminimum_mag_numf
+ libc.src.__support.CPP.algorithm
libc.src.__support.FPUtil.fp_bits
)
@@ -2214,6 +2354,7 @@ add_fp_unittest(
FMinimumMagNumTest.h
DEPENDS
libc.src.math.fminimum_mag_num
+ libc.src.__support.CPP.algorithm
libc.src.__support.FPUtil.fp_bits
)
@@ -2227,6 +2368,21 @@ add_fp_unittest(
FMinimumMagNumTest.h
DEPENDS
libc.src.math.fminimum_mag_numl
+ libc.src.__support.CPP.algorithm
+ libc.src.__support.FPUtil.fp_bits
+)
+
+add_fp_unittest(
+ fminimum_mag_numf16_test
+ SUITE
+ libc-math-smoke-tests
+ SRCS
+ fminimum_mag_numf16_test.cpp
+ HDRS
+ FMinimumMagNumTest.h
+ DEPENDS
+ libc.src.math.fminimum_mag_numf16
+ libc.src.__support.CPP.algorithm
libc.src.__support.FPUtil.fp_bits
)
@@ -2240,6 +2396,7 @@ add_fp_unittest(
FMinimumMagNumTest.h
DEPENDS
libc.src.math.fminimum_mag_numf128
+ libc.src.__support.CPP.algorithm
libc.src.__support.FPUtil.fp_bits
)
@@ -2541,8 +2698,10 @@ add_fp_unittest(
HDRS
NextAfterTest.h
DEPENDS
+ libc.hdr.fenv_macros
libc.src.math.nextafter
- libc.src.__support.FPUtil.basic_operations
+ libc.src.__support.CPP.bit
+ libc.src.__support.FPUtil.fenv_impl
libc.src.__support.FPUtil.fp_bits
)
@@ -2555,8 +2714,10 @@ add_fp_unittest(
HDRS
NextAfterTest.h
DEPENDS
+ libc.hdr.fenv_macros
libc.src.math.nextafterf
- libc.src.__support.FPUtil.basic_operations
+ libc.src.__support.CPP.bit
+ libc.src.__support.FPUtil.fenv_impl
libc.src.__support.FPUtil.fp_bits
)
@@ -2569,8 +2730,26 @@ add_fp_unittest(
HDRS
NextAfterTest.h
DEPENDS
+ libc.hdr.fenv_macros
libc.src.math.nextafterl
- libc.src.__support.FPUtil.basic_operations
+ libc.src.__support.CPP.bit
+ libc.src.__support.FPUtil.fenv_impl
+ libc.src.__support.FPUtil.fp_bits
+)
+
+add_fp_unittest(
+ nextafterf16_test
+ SUITE
+ libc-math-smoke-tests
+ SRCS
+ nextafterf16_test.cpp
+ HDRS
+ NextAfterTest.h
+ DEPENDS
+ libc.hdr.fenv_macros
+ libc.src.math.nextafterf16
+ libc.src.__support.CPP.bit
+ libc.src.__support.FPUtil.fenv_impl
libc.src.__support.FPUtil.fp_bits
)
@@ -2583,8 +2762,10 @@ add_fp_unittest(
HDRS
NextAfterTest.h
DEPENDS
+ libc.hdr.fenv_macros
libc.src.math.nextafterf128
- libc.src.__support.FPUtil.basic_operations
+ libc.src.__support.CPP.bit
+ libc.src.__support.FPUtil.fenv_impl
libc.src.__support.FPUtil.fp_bits
)
@@ -2599,8 +2780,10 @@ if(NOT LIBC_TARGET_OS_IS_GPU)
HDRS
NextTowardTest.h
DEPENDS
+ libc.hdr.fenv_macros
libc.src.math.nexttoward
- libc.src.__support.FPUtil.basic_operations
+ libc.src.__support.CPP.bit
+ libc.src.__support.FPUtil.fenv_impl
libc.src.__support.FPUtil.fp_bits
)
@@ -2613,8 +2796,10 @@ if(NOT LIBC_TARGET_OS_IS_GPU)
HDRS
NextTowardTest.h
DEPENDS
+ libc.hdr.fenv_macros
libc.src.math.nexttowardf
- libc.src.__support.FPUtil.basic_operations
+ libc.src.__support.CPP.bit
+ libc.src.__support.FPUtil.fenv_impl
libc.src.__support.FPUtil.fp_bits
)
endif()
@@ -2628,8 +2813,26 @@ add_fp_unittest(
HDRS
NextTowardTest.h
DEPENDS
+ libc.hdr.fenv_macros
libc.src.math.nexttowardl
- libc.src.__support.FPUtil.basic_operations
+ libc.src.__support.CPP.bit
+ libc.src.__support.FPUtil.fenv_impl
+ libc.src.__support.FPUtil.fp_bits
+)
+
+add_fp_unittest(
+ nexttowardf16_test
+ SUITE
+ libc-math-smoke-tests
+ SRCS
+ nexttowardf16_test.cpp
+ HDRS
+ NextTowardTest.h
+ DEPENDS
+ libc.hdr.fenv_macros
+ libc.src.math.nexttowardf16
+ libc.src.__support.CPP.bit
+ libc.src.__support.FPUtil.fenv_impl
libc.src.__support.FPUtil.fp_bits
)
@@ -2643,7 +2846,6 @@ add_fp_unittest(
NextDownTest.h
DEPENDS
libc.src.math.nextdown
- libc.src.__support.FPUtil.manipulation_functions
)
add_fp_unittest(
@@ -2656,7 +2858,6 @@ add_fp_unittest(
NextDownTest.h
DEPENDS
libc.src.math.nextdownf
- libc.src.__support.FPUtil.manipulation_functions
)
add_fp_unittest(
@@ -2669,7 +2870,18 @@ add_fp_unittest(
NextDownTest.h
DEPENDS
libc.src.math.nextdownl
- libc.src.__support.FPUtil.manipulation_functions
+)
+
+add_fp_unittest(
+ nextdownf16_test
+ SUITE
+ libc-math-smoke-tests
+ SRCS
+ nextdownf16_test.cpp
+ HDRS
+ NextDownTest.h
+ DEPENDS
+ libc.src.math.nextdownf16
)
add_fp_unittest(
@@ -2682,7 +2894,6 @@ add_fp_unittest(
NextDownTest.h
DEPENDS
libc.src.math.nextdownf128
- libc.src.__support.FPUtil.manipulation_functions
)
add_fp_unittest(
@@ -2695,7 +2906,6 @@ add_fp_unittest(
NextUpTest.h
DEPENDS
libc.src.math.nextup
- libc.src.__support.FPUtil.manipulation_functions
)
add_fp_unittest(
@@ -2708,7 +2918,6 @@ add_fp_unittest(
NextUpTest.h
DEPENDS
libc.src.math.nextupf
- libc.src.__support.FPUtil.manipulation_functions
)
add_fp_unittest(
@@ -2721,7 +2930,18 @@ add_fp_unittest(
NextUpTest.h
DEPENDS
libc.src.math.nextupl
- libc.src.__support.FPUtil.manipulation_functions
+)
+
+add_fp_unittest(
+ nextupf16_test
+ SUITE
+ libc-math-smoke-tests
+ SRCS
+ nextupf16_test.cpp
+ HDRS
+ NextUpTest.h
+ DEPENDS
+ libc.src.math.nextupf16
)
add_fp_unittest(
@@ -2734,7 +2954,6 @@ add_fp_unittest(
NextUpTest.h
DEPENDS
libc.src.math.nextupf128
- libc.src.__support.FPUtil.manipulation_functions
)
# TODO(lntue): The current implementation of fputil::general::fma<float> is only
diff --git a/libc/test/src/math/smoke/FMaxTest.h b/libc/test/src/math/smoke/FMaxTest.h
index df8e35e0bd16..f4c78b5d04b5 100644
--- a/libc/test/src/math/smoke/FMaxTest.h
+++ b/libc/test/src/math/smoke/FMaxTest.h
@@ -9,6 +9,7 @@
#ifndef LLVM_LIBC_TEST_SRC_MATH_SMOKE_FMAXTEST_H
#define LLVM_LIBC_TEST_SRC_MATH_SMOKE_FMAXTEST_H
+#include "src/__support/CPP/algorithm.h"
#include "test/UnitTest/FEnvSafeTest.h"
#include "test/UnitTest/FPMatcher.h"
#include "test/UnitTest/Test.h"
@@ -55,10 +56,11 @@ public:
}
void testRange(FMaxFunc func) {
- constexpr StorageType COUNT = 100'001;
- constexpr StorageType STEP = STORAGE_MAX / COUNT;
- for (StorageType i = 0, v = 0, w = STORAGE_MAX; i <= COUNT;
- ++i, v += STEP, w -= STEP) {
+ constexpr int COUNT = 100'001;
+ constexpr StorageType STEP = LIBC_NAMESPACE::cpp::max(
+ static_cast<StorageType>(STORAGE_MAX / COUNT), StorageType(1));
+ StorageType v = 0, w = STORAGE_MAX;
+ for (int i = 0; i <= COUNT; ++i, v += STEP, w -= STEP) {
FPBits xbits(v), ybits(w);
if (xbits.is_inf_or_nan())
continue;
diff --git a/libc/test/src/math/smoke/FMaximumMagNumTest.h b/libc/test/src/math/smoke/FMaximumMagNumTest.h
index aafb6d2b0d5e..726f87059fc6 100644
--- a/libc/test/src/math/smoke/FMaximumMagNumTest.h
+++ b/libc/test/src/math/smoke/FMaximumMagNumTest.h
@@ -9,6 +9,7 @@
#ifndef LLVM_LIBC_TEST_SRC_MATH_SMOKE_FMAXIMUMMAG_NUMTEST_H
#define LLVM_LIBC_TEST_SRC_MATH_SMOKE_FMAXIMUMMAG_NUMTEST_H
+#include "src/__support/CPP/algorithm.h"
#include "src/__support/FPUtil/BasicOperations.h"
#include "src/__support/FPUtil/FPBits.h"
#include "test/UnitTest/FEnvSafeTest.h"
@@ -68,10 +69,11 @@ public:
}
void testRange(FMaximumMagNumFunc func) {
- constexpr StorageType COUNT = 100'001;
- constexpr StorageType STEP = STORAGE_MAX / COUNT;
- for (StorageType i = 0, v = 0, w = STORAGE_MAX; i <= COUNT;
- ++i, v += STEP, w -= STEP) {
+ constexpr int COUNT = 100'001;
+ constexpr StorageType STEP = LIBC_NAMESPACE::cpp::max(
+ static_cast<StorageType>(STORAGE_MAX / COUNT), StorageType(1));
+ StorageType v = 0, w = STORAGE_MAX;
+ for (int i = 0; i <= COUNT; ++i, v += STEP, w -= STEP) {
FPBits xbits(v), ybits(w);
if (xbits.is_inf_or_nan())
continue;
@@ -82,11 +84,10 @@ public:
if ((x == 0) && (y == 0))
continue;
- if (LIBC_NAMESPACE::fputil::abs(x) > LIBC_NAMESPACE::fputil::abs(y)) {
+ if (LIBC_NAMESPACE::fputil::abs(x) > LIBC_NAMESPACE::fputil::abs(y))
EXPECT_FP_EQ(x, func(x, y));
- } else {
+ else
EXPECT_FP_EQ(y, func(x, y));
- }
}
}
};
diff --git a/libc/test/src/math/smoke/FMaximumMagTest.h b/libc/test/src/math/smoke/FMaximumMagTest.h
index 7bb79a69be58..b5b2c1ca79ab 100644
--- a/libc/test/src/math/smoke/FMaximumMagTest.h
+++ b/libc/test/src/math/smoke/FMaximumMagTest.h
@@ -9,6 +9,7 @@
#ifndef LLVM_LIBC_TEST_SRC_MATH_SMOKE_FMAXIMUM_MAGTEST_H
#define LLVM_LIBC_TEST_SRC_MATH_SMOKE_FMAXIMUM_MAGTEST_H
+#include "src/__support/CPP/algorithm.h"
#include "src/__support/FPUtil/BasicOperations.h"
#include "test/UnitTest/FEnvSafeTest.h"
#include "test/UnitTest/FPMatcher.h"
@@ -56,10 +57,11 @@ public:
}
void testRange(FMaximumMagFunc func) {
- constexpr StorageType COUNT = 100'001;
- constexpr StorageType STEP = STORAGE_MAX / COUNT;
- for (StorageType i = 0, v = 0, w = STORAGE_MAX; i <= COUNT;
- ++i, v += STEP, w -= STEP) {
+ constexpr int COUNT = 100'001;
+ constexpr StorageType STEP = LIBC_NAMESPACE::cpp::max(
+ static_cast<StorageType>(STORAGE_MAX / COUNT), StorageType(1));
+ StorageType v = 0, w = STORAGE_MAX;
+ for (int i = 0; i <= COUNT; ++i, v += STEP, w -= STEP) {
FPBits xbits(v), ybits(w);
if (xbits.is_inf_or_nan())
continue;
@@ -70,11 +72,10 @@ public:
if ((x == 0) && (y == 0))
continue;
- if (LIBC_NAMESPACE::fputil::abs(x) > LIBC_NAMESPACE::fputil::abs(y)) {
+ if (LIBC_NAMESPACE::fputil::abs(x) > LIBC_NAMESPACE::fputil::abs(y))
EXPECT_FP_EQ(x, func(x, y));
- } else {
+ else
EXPECT_FP_EQ(y, func(x, y));
- }
}
}
};
diff --git a/libc/test/src/math/smoke/FMaximumNumTest.h b/libc/test/src/math/smoke/FMaximumNumTest.h
index da0ea2c247a9..ec7913509d39 100644
--- a/libc/test/src/math/smoke/FMaximumNumTest.h
+++ b/libc/test/src/math/smoke/FMaximumNumTest.h
@@ -9,6 +9,7 @@
#ifndef LLVM_LIBC_TEST_SRC_MATH_SMOKE_FMAXIMUMNUMTEST_H
#define LLVM_LIBC_TEST_SRC_MATH_SMOKE_FMAXIMUMNUMTEST_H
+#include "src/__support/CPP/algorithm.h"
#include "src/__support/FPUtil/FPBits.h"
#include "test/UnitTest/FEnvSafeTest.h"
#include "test/UnitTest/FPMatcher.h"
@@ -67,10 +68,11 @@ public:
}
void testRange(FMaximumNumFunc func) {
- constexpr StorageType COUNT = 100'001;
- constexpr StorageType STEP = STORAGE_MAX / COUNT;
- for (StorageType i = 0, v = 0, w = STORAGE_MAX; i <= COUNT;
- ++i, v += STEP, w -= STEP) {
+ constexpr int COUNT = 100'001;
+ constexpr StorageType STEP = LIBC_NAMESPACE::cpp::max(
+ static_cast<StorageType>(STORAGE_MAX / COUNT), StorageType(1));
+ StorageType v = 0, w = STORAGE_MAX;
+ for (int i = 0; i <= COUNT; ++i, v += STEP, w -= STEP) {
FPBits xbits(v), ybits(w);
if (xbits.is_inf_or_nan())
continue;
@@ -81,11 +83,10 @@ public:
if ((x == 0) && (y == 0))
continue;
- if (x > y) {
+ if (x > y)
EXPECT_FP_EQ(x, func(x, y));
- } else {
+ else
EXPECT_FP_EQ(y, func(x, y));
- }
}
}
};
diff --git a/libc/test/src/math/smoke/FMaximumTest.h b/libc/test/src/math/smoke/FMaximumTest.h
index 1bd15163ed75..94e4a343190a 100644
--- a/libc/test/src/math/smoke/FMaximumTest.h
+++ b/libc/test/src/math/smoke/FMaximumTest.h
@@ -9,6 +9,7 @@
#ifndef LLVM_LIBC_TEST_SRC_MATH_SMOKE_FMAXIMUMTEST_H
#define LLVM_LIBC_TEST_SRC_MATH_SMOKE_FMAXIMUMTEST_H
+#include "src/__support/CPP/algorithm.h"
#include "test/UnitTest/FEnvSafeTest.h"
#include "test/UnitTest/FPMatcher.h"
#include "test/UnitTest/Test.h"
@@ -55,10 +56,11 @@ public:
}
void testRange(FMaximumFunc func) {
- constexpr StorageType COUNT = 100'001;
- constexpr StorageType STEP = STORAGE_MAX / COUNT;
- for (StorageType i = 0, v = 0, w = STORAGE_MAX; i <= COUNT;
- ++i, v += STEP, w -= STEP) {
+ constexpr int COUNT = 100'001;
+ constexpr StorageType STEP = LIBC_NAMESPACE::cpp::max(
+ static_cast<StorageType>(STORAGE_MAX / COUNT), StorageType(1));
+ StorageType v = 0, w = STORAGE_MAX;
+ for (int i = 0; i <= COUNT; ++i, v += STEP, w -= STEP) {
FPBits xbits(v), ybits(w);
if (xbits.is_inf_or_nan())
continue;
@@ -69,11 +71,10 @@ public:
if ((x == 0) && (y == 0))
continue;
- if (x > y) {
+ if (x > y)
EXPECT_FP_EQ(x, func(x, y));
- } else {
+ else
EXPECT_FP_EQ(y, func(x, y));
- }
}
}
};
diff --git a/libc/test/src/math/smoke/FMinTest.h b/libc/test/src/math/smoke/FMinTest.h
index f71b558cd3da..629aaab729a8 100644
--- a/libc/test/src/math/smoke/FMinTest.h
+++ b/libc/test/src/math/smoke/FMinTest.h
@@ -9,6 +9,7 @@
#ifndef LLVM_LIBC_TEST_SRC_MATH_SMOKE_FMINTEST_H
#define LLVM_LIBC_TEST_SRC_MATH_SMOKE_FMINTEST_H
+#include "src/__support/CPP/algorithm.h"
#include "test/UnitTest/FEnvSafeTest.h"
#include "test/UnitTest/FPMatcher.h"
#include "test/UnitTest/Test.h"
@@ -55,10 +56,11 @@ public:
}
void testRange(FMinFunc func) {
- constexpr StorageType COUNT = 100'001;
- constexpr StorageType STEP = STORAGE_MAX / COUNT;
- for (StorageType i = 0, v = 0, w = STORAGE_MAX; i <= COUNT;
- ++i, v += STEP, w -= STEP) {
+ constexpr int COUNT = 100'001;
+ constexpr StorageType STEP = LIBC_NAMESPACE::cpp::max(
+ static_cast<StorageType>(STORAGE_MAX / COUNT), StorageType(1));
+ StorageType v = 0, w = STORAGE_MAX;
+ for (int i = 0; i <= COUNT; ++i, v += STEP, w -= STEP) {
FPBits xbits(v), ybits(w);
if (xbits.is_inf_or_nan())
continue;
diff --git a/libc/test/src/math/smoke/FMinimumMagNumTest.h b/libc/test/src/math/smoke/FMinimumMagNumTest.h
index e4b8fd9e3353..2ceca6ff95ba 100644
--- a/libc/test/src/math/smoke/FMinimumMagNumTest.h
+++ b/libc/test/src/math/smoke/FMinimumMagNumTest.h
@@ -9,6 +9,7 @@
#ifndef LLVM_LIBC_TEST_SRC_MATH_SMOKE_FMINIMUMMAG_NUMTEST_H
#define LLVM_LIBC_TEST_SRC_MATH_SMOKE_FMINIMUMMAG_NUMTEST_H
+#include "src/__support/CPP/algorithm.h"
#include "src/__support/FPUtil/BasicOperations.h"
#include "src/__support/FPUtil/FPBits.h"
#include "test/UnitTest/FEnvSafeTest.h"
@@ -68,10 +69,11 @@ public:
}
void testRange(FMinimumMagNumFunc func) {
- constexpr StorageType COUNT = 100'001;
- constexpr StorageType STEP = STORAGE_MAX / COUNT;
- for (StorageType i = 0, v = 0, w = STORAGE_MAX; i <= COUNT;
- ++i, v += STEP, w -= STEP) {
+ constexpr int COUNT = 100'001;
+ constexpr StorageType STEP = LIBC_NAMESPACE::cpp::max(
+ static_cast<StorageType>(STORAGE_MAX / COUNT), StorageType(1));
+ StorageType v = 0, w = STORAGE_MAX;
+ for (int i = 0; i <= COUNT; ++i, v += STEP, w -= STEP) {
FPBits xbits(v), ybits(w);
if (xbits.is_inf_or_nan())
continue;
@@ -82,11 +84,10 @@ public:
if ((x == 0) && (y == 0))
continue;
- if (LIBC_NAMESPACE::fputil::abs(x) > LIBC_NAMESPACE::fputil::abs(y)) {
+ if (LIBC_NAMESPACE::fputil::abs(x) > LIBC_NAMESPACE::fputil::abs(y))
EXPECT_FP_EQ(y, func(x, y));
- } else {
+ else
EXPECT_FP_EQ(x, func(x, y));
- }
}
}
};
diff --git a/libc/test/src/math/smoke/FMinimumMagTest.h b/libc/test/src/math/smoke/FMinimumMagTest.h
index 3e16622fe3fa..9c49446795ce 100644
--- a/libc/test/src/math/smoke/FMinimumMagTest.h
+++ b/libc/test/src/math/smoke/FMinimumMagTest.h
@@ -9,6 +9,7 @@
#ifndef LLVM_LIBC_TEST_SRC_MATH_SMOKE_FMINIMUM_MAGTEST_H
#define LLVM_LIBC_TEST_SRC_MATH_SMOKE_FMINIMUM_MAGTEST_H
+#include "src/__support/CPP/algorithm.h"
#include "src/__support/FPUtil/BasicOperations.h"
#include "test/UnitTest/FEnvSafeTest.h"
#include "test/UnitTest/FPMatcher.h"
@@ -56,10 +57,11 @@ public:
}
void testRange(FMinimumMagFunc func) {
- constexpr StorageType COUNT = 100'001;
- constexpr StorageType STEP = STORAGE_MAX / COUNT;
- for (StorageType i = 0, v = 0, w = STORAGE_MAX; i <= COUNT;
- ++i, v += STEP, w -= STEP) {
+ constexpr int COUNT = 100'001;
+ constexpr StorageType STEP = LIBC_NAMESPACE::cpp::max(
+ static_cast<StorageType>(STORAGE_MAX / COUNT), StorageType(1));
+ StorageType v = 0, w = STORAGE_MAX;
+ for (int i = 0; i <= COUNT; ++i, v += STEP, w -= STEP) {
FPBits xbits(v), ybits(w);
if (xbits.is_inf_or_nan())
continue;
@@ -70,11 +72,10 @@ public:
if ((x == 0) && (y == 0))
continue;
- if (LIBC_NAMESPACE::fputil::abs(x) < LIBC_NAMESPACE::fputil::abs(y)) {
+ if (LIBC_NAMESPACE::fputil::abs(x) < LIBC_NAMESPACE::fputil::abs(y))
EXPECT_FP_EQ(x, func(x, y));
- } else {
+ else
EXPECT_FP_EQ(y, func(x, y));
- }
}
}
};
diff --git a/libc/test/src/math/smoke/FMinimumNumTest.h b/libc/test/src/math/smoke/FMinimumNumTest.h
index 6186ea0df17c..8004ee987454 100644
--- a/libc/test/src/math/smoke/FMinimumNumTest.h
+++ b/libc/test/src/math/smoke/FMinimumNumTest.h
@@ -9,6 +9,7 @@
#ifndef LLVM_LIBC_TEST_SRC_MATH_SMOKE_FMINIMUMNUMTEST_H
#define LLVM_LIBC_TEST_SRC_MATH_SMOKE_FMINIMUMNUMTEST_H
+#include "src/__support/CPP/algorithm.h"
#include "src/__support/FPUtil/FPBits.h"
#include "test/UnitTest/FEnvSafeTest.h"
#include "test/UnitTest/FPMatcher.h"
@@ -67,10 +68,11 @@ public:
}
void testRange(FMinimumNumFunc func) {
- constexpr StorageType COUNT = 100'001;
- constexpr StorageType STEP = STORAGE_MAX / COUNT;
- for (StorageType i = 0, v = 0, w = STORAGE_MAX; i <= COUNT;
- ++i, v += STEP, w -= STEP) {
+ constexpr int COUNT = 100'001;
+ constexpr StorageType STEP = LIBC_NAMESPACE::cpp::max(
+ static_cast<StorageType>(STORAGE_MAX / COUNT), StorageType(1));
+ StorageType v = 0, w = STORAGE_MAX;
+ for (int i = 0; i <= COUNT; ++i, v += STEP, w -= STEP) {
FPBits xbits(v), ybits(w);
if (xbits.is_inf_or_nan())
continue;
@@ -81,11 +83,10 @@ public:
if ((x == 0) && (y == 0))
continue;
- if (x > y) {
+ if (x > y)
EXPECT_FP_EQ(y, func(x, y));
- } else {
+ else
EXPECT_FP_EQ(x, func(x, y));
- }
}
}
};
diff --git a/libc/test/src/math/smoke/FMinimumTest.h b/libc/test/src/math/smoke/FMinimumTest.h
index a267f6c78321..242c857fbb99 100644
--- a/libc/test/src/math/smoke/FMinimumTest.h
+++ b/libc/test/src/math/smoke/FMinimumTest.h
@@ -9,6 +9,7 @@
#ifndef LLVM_LIBC_TEST_SRC_MATH_SMOKE_FMINIMUMTEST_H
#define LLVM_LIBC_TEST_SRC_MATH_SMOKE_FMINIMUMTEST_H
+#include "src/__support/CPP/algorithm.h"
#include "test/UnitTest/FEnvSafeTest.h"
#include "test/UnitTest/FPMatcher.h"
#include "test/UnitTest/Test.h"
@@ -55,10 +56,11 @@ public:
}
void testRange(FMinimumFunc func) {
- constexpr StorageType COUNT = 100'001;
- constexpr StorageType STEP = STORAGE_MAX / COUNT;
- for (StorageType i = 0, v = 0, w = STORAGE_MAX; i <= COUNT;
- ++i, v += STEP, w -= STEP) {
+ constexpr int COUNT = 100'001;
+ constexpr StorageType STEP = LIBC_NAMESPACE::cpp::max(
+ static_cast<StorageType>(STORAGE_MAX / COUNT), StorageType(1));
+ StorageType v = 0, w = STORAGE_MAX;
+ for (int i = 0; i <= COUNT; ++i, v += STEP, w -= STEP) {
FPBits xbits(v), ybits(w);
if (xbits.is_inf_or_nan())
continue;
@@ -69,11 +71,10 @@ public:
if ((x == 0) && (y == 0))
continue;
- if (x > y) {
+ if (x > y)
EXPECT_FP_EQ(y, func(x, y));
- } else {
+ else
EXPECT_FP_EQ(x, func(x, y));
- }
}
}
};
diff --git a/libc/test/src/math/smoke/NextAfterTest.h b/libc/test/src/math/smoke/NextAfterTest.h
index d65ccdf8e70c..6278f899d8a8 100644
--- a/libc/test/src/math/smoke/NextAfterTest.h
+++ b/libc/test/src/math/smoke/NextAfterTest.h
@@ -9,15 +9,15 @@
#ifndef LLVM_LIBC_TEST_SRC_MATH_NEXTAFTERTEST_H
#define LLVM_LIBC_TEST_SRC_MATH_NEXTAFTERTEST_H
-#include "hdr/math_macros.h"
#include "src/__support/CPP/bit.h"
-#include "src/__support/CPP/type_traits.h"
-#include "src/__support/FPUtil/BasicOperations.h"
+#include "src/__support/FPUtil/FEnvImpl.h"
#include "src/__support/FPUtil/FPBits.h"
#include "test/UnitTest/FEnvSafeTest.h"
#include "test/UnitTest/FPMatcher.h"
#include "test/UnitTest/Test.h"
+#include "hdr/fenv_macros.h"
+
// TODO: Strengthen errno,exception checks and remove these assert macros
// after new matchers/test fixtures are added
#define ASSERT_FP_EQ_WITH_EXCEPTION(result, expected, expected_exception) \
@@ -181,7 +181,7 @@ public:
result_bits = FPBits(result);
ASSERT_EQ(result_bits.get_biased_exponent(), x_bits.get_biased_exponent());
ASSERT_EQ(result_bits.get_mantissa(),
- x_bits.get_mantissa() + StorageType(1));
+ static_cast<StorageType>(x_bits.get_mantissa() + StorageType(1)));
x = -x;
@@ -195,7 +195,7 @@ public:
result_bits = FPBits(result);
ASSERT_EQ(result_bits.get_biased_exponent(), x_bits.get_biased_exponent());
ASSERT_EQ(result_bits.get_mantissa(),
- x_bits.get_mantissa() + StorageType(1));
+ static_cast<StorageType>(x_bits.get_mantissa() + StorageType(1)));
}
};
diff --git a/libc/test/src/math/smoke/NextTowardTest.h b/libc/test/src/math/smoke/NextTowardTest.h
index a24ec9ff6bd8..5992273d9190 100644
--- a/libc/test/src/math/smoke/NextTowardTest.h
+++ b/libc/test/src/math/smoke/NextTowardTest.h
@@ -9,16 +9,15 @@
#ifndef LLVM_LIBC_TEST_SRC_MATH_NEXTTOWARDTEST_H
#define LLVM_LIBC_TEST_SRC_MATH_NEXTTOWARDTEST_H
-#include "hdr/fenv_macros.h"
-#include "hdr/math_macros.h"
#include "src/__support/CPP/bit.h"
-#include "src/__support/CPP/type_traits.h"
-#include "src/__support/FPUtil/BasicOperations.h"
+#include "src/__support/FPUtil/FEnvImpl.h"
#include "src/__support/FPUtil/FPBits.h"
#include "test/UnitTest/FEnvSafeTest.h"
#include "test/UnitTest/FPMatcher.h"
#include "test/UnitTest/Test.h"
+#include "hdr/fenv_macros.h"
+
// TODO: Strengthen errno,exception checks and remove these assert macros
// after new matchers/test fixtures are added
#define ASSERT_FP_EQ_WITH_EXCEPTION(result, expected, expected_exception) \
@@ -194,7 +193,7 @@ public:
result_bits = FPBits(result);
ASSERT_EQ(result_bits.get_biased_exponent(), x_bits.get_biased_exponent());
ASSERT_EQ(result_bits.get_mantissa(),
- x_bits.get_mantissa() + StorageType(1));
+ static_cast<StorageType>(x_bits.get_mantissa() + StorageType(1)));
x = -x;
@@ -208,7 +207,7 @@ public:
result_bits = FPBits(result);
ASSERT_EQ(result_bits.get_biased_exponent(), x_bits.get_biased_exponent());
ASSERT_EQ(result_bits.get_mantissa(),
- x_bits.get_mantissa() + StorageType(1));
+ static_cast<StorageType>(x_bits.get_mantissa() + StorageType(1)));
}
};
diff --git a/libc/test/src/math/smoke/fmaximum_mag_numf16_test.cpp b/libc/test/src/math/smoke/fmaximum_mag_numf16_test.cpp
new file mode 100644
index 000000000000..b11653eb395e
--- /dev/null
+++ b/libc/test/src/math/smoke/fmaximum_mag_numf16_test.cpp
@@ -0,0 +1,13 @@
+//===-- Unittests for fmaximum_mag_numf16 ---------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "FMaximumMagNumTest.h"
+
+#include "src/math/fmaximum_mag_numf16.h"
+
+LIST_FMAXIMUM_MAG_NUM_TESTS(float16, LIBC_NAMESPACE::fmaximum_mag_numf16)
diff --git a/libc/test/src/math/smoke/fmaximum_magf16_test.cpp b/libc/test/src/math/smoke/fmaximum_magf16_test.cpp
new file mode 100644
index 000000000000..6df1e4aaf6a9
--- /dev/null
+++ b/libc/test/src/math/smoke/fmaximum_magf16_test.cpp
@@ -0,0 +1,13 @@
+//===-- Unittests for fmaximum_magf16 -------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "FMaximumMagTest.h"
+
+#include "src/math/fmaximum_magf16.h"
+
+LIST_FMAXIMUM_MAG_TESTS(float16, LIBC_NAMESPACE::fmaximum_magf16)
diff --git a/libc/test/src/math/smoke/fmaximum_numf16_test.cpp b/libc/test/src/math/smoke/fmaximum_numf16_test.cpp
new file mode 100644
index 000000000000..7cb9cb068cf7
--- /dev/null
+++ b/libc/test/src/math/smoke/fmaximum_numf16_test.cpp
@@ -0,0 +1,13 @@
+//===-- Unittests for fmaximum_numf16 -------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "FMaximumNumTest.h"
+
+#include "src/math/fmaximum_numf16.h"
+
+LIST_FMAXIMUM_NUM_TESTS(float16, LIBC_NAMESPACE::fmaximum_numf16)
diff --git a/libc/test/src/math/smoke/fmaximumf16_test.cpp b/libc/test/src/math/smoke/fmaximumf16_test.cpp
new file mode 100644
index 000000000000..4cbf846a1610
--- /dev/null
+++ b/libc/test/src/math/smoke/fmaximumf16_test.cpp
@@ -0,0 +1,13 @@
+//===-- Unittests for fmaximumf16 -----------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "FMaximumTest.h"
+
+#include "src/math/fmaximumf16.h"
+
+LIST_FMAXIMUM_TESTS(float16, LIBC_NAMESPACE::fmaximumf16)
diff --git a/libc/test/src/math/smoke/fminimum_mag_numf16_test.cpp b/libc/test/src/math/smoke/fminimum_mag_numf16_test.cpp
new file mode 100644
index 000000000000..2c6aede529cd
--- /dev/null
+++ b/libc/test/src/math/smoke/fminimum_mag_numf16_test.cpp
@@ -0,0 +1,13 @@
+//===-- Unittests for fminimum_mag_numf16 ---------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "FMinimumMagNumTest.h"
+
+#include "src/math/fminimum_mag_numf16.h"
+
+LIST_FMINIMUM_MAG_NUM_TESTS(float16, LIBC_NAMESPACE::fminimum_mag_numf16)
diff --git a/libc/test/src/math/smoke/fminimum_magf16_test.cpp b/libc/test/src/math/smoke/fminimum_magf16_test.cpp
new file mode 100644
index 000000000000..3687aecedfd9
--- /dev/null
+++ b/libc/test/src/math/smoke/fminimum_magf16_test.cpp
@@ -0,0 +1,13 @@
+//===-- Unittests for fminimum_magf16 -------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "FMinimumMagTest.h"
+
+#include "src/math/fminimum_magf16.h"
+
+LIST_FMINIMUM_MAG_TESTS(float16, LIBC_NAMESPACE::fminimum_magf16)
diff --git a/libc/test/src/math/smoke/fminimum_numf16_test.cpp b/libc/test/src/math/smoke/fminimum_numf16_test.cpp
new file mode 100644
index 000000000000..67750816d229
--- /dev/null
+++ b/libc/test/src/math/smoke/fminimum_numf16_test.cpp
@@ -0,0 +1,13 @@
+//===-- Unittests for fminimum_numf16 -------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "FMinimumNumTest.h"
+
+#include "src/math/fminimum_numf16.h"
+
+LIST_FMINIMUM_NUM_TESTS(float16, LIBC_NAMESPACE::fminimum_numf16)
diff --git a/libc/test/src/math/smoke/fminimumf16_test.cpp b/libc/test/src/math/smoke/fminimumf16_test.cpp
new file mode 100644
index 000000000000..f8b0577490b9
--- /dev/null
+++ b/libc/test/src/math/smoke/fminimumf16_test.cpp
@@ -0,0 +1,13 @@
+//===-- Unittests for fminimumf16 -----------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "FMinimumTest.h"
+
+#include "src/math/fminimumf16.h"
+
+LIST_FMINIMUM_TESTS(float16, LIBC_NAMESPACE::fminimumf16)
diff --git a/libc/test/src/math/smoke/nextafterf16_test.cpp b/libc/test/src/math/smoke/nextafterf16_test.cpp
new file mode 100644
index 000000000000..860a0c74acbc
--- /dev/null
+++ b/libc/test/src/math/smoke/nextafterf16_test.cpp
@@ -0,0 +1,13 @@
+//===-- Unittests for nextafterf16 ----------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "NextAfterTest.h"
+
+#include "src/math/nextafterf16.h"
+
+LIST_NEXTAFTER_TESTS(float16, LIBC_NAMESPACE::nextafterf16)
diff --git a/libc/test/src/math/smoke/nextdownf16_test.cpp b/libc/test/src/math/smoke/nextdownf16_test.cpp
new file mode 100644
index 000000000000..353f08586177
--- /dev/null
+++ b/libc/test/src/math/smoke/nextdownf16_test.cpp
@@ -0,0 +1,13 @@
+//===-- Unittests for nextdownf16 -----------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "NextDownTest.h"
+
+#include "src/math/nextdownf16.h"
+
+LIST_NEXTDOWN_TESTS(float16, LIBC_NAMESPACE::nextdownf16)
diff --git a/libc/test/src/math/smoke/nexttowardf16_test.cpp b/libc/test/src/math/smoke/nexttowardf16_test.cpp
new file mode 100644
index 000000000000..8490e8de94ce
--- /dev/null
+++ b/libc/test/src/math/smoke/nexttowardf16_test.cpp
@@ -0,0 +1,13 @@
+//===-- Unittests for nexttowardf16 ---------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "NextTowardTest.h"
+
+#include "src/math/nexttowardf16.h"
+
+LIST_NEXTTOWARD_TESTS(float16, LIBC_NAMESPACE::nexttowardf16)
diff --git a/libc/test/src/math/smoke/nextupf16_test.cpp b/libc/test/src/math/smoke/nextupf16_test.cpp
new file mode 100644
index 000000000000..a146d279f3a7
--- /dev/null
+++ b/libc/test/src/math/smoke/nextupf16_test.cpp
@@ -0,0 +1,13 @@
+//===-- Unittests for nextupf16 -------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "NextUpTest.h"
+
+#include "src/math/nextupf16.h"
+
+LIST_NEXTUP_TESTS(float16, LIBC_NAMESPACE::nextupf16)
diff --git a/libc/test/src/stdlib/CMakeLists.txt b/libc/test/src/stdlib/CMakeLists.txt
index 6a7faedece38..38488778c657 100644
--- a/libc/test/src/stdlib/CMakeLists.txt
+++ b/libc/test/src/stdlib/CMakeLists.txt
@@ -354,7 +354,20 @@ if(LLVM_LIBC_FULL_BUILD)
libc.src.stdlib.exit
libc.src.stdlib.atexit
libc.src.__support.CPP.array
- libc.src.__support.CPP.utility
+ )
+
+ add_libc_test(
+ at_quick_exit_test
+ # The EXPECT_EXITS test is only availible for unit tests.
+ UNIT_TEST_ONLY
+ SUITE
+ libc-stdlib-tests
+ SRCS
+ at_quick_exit_test.cpp
+ DEPENDS
+ libc.src.stdlib.quick_exit
+ libc.src.stdlib.at_quick_exit
+ libc.src.__support.CPP.array
)
add_libc_test(
diff --git a/libc/test/src/stdlib/at_quick_exit_test.cpp b/libc/test/src/stdlib/at_quick_exit_test.cpp
new file mode 100644
index 000000000000..e0a258d9fb2d
--- /dev/null
+++ b/libc/test/src/stdlib/at_quick_exit_test.cpp
@@ -0,0 +1,90 @@
+//===-- Unittests for at_quick_exit ---------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/__support/CPP/array.h"
+#include "src/__support/CPP/utility.h"
+#include "src/stdlib/at_quick_exit.h"
+#include "src/stdlib/quick_exit.h"
+#include "test/UnitTest/Test.h"
+
+static int a;
+TEST(LlvmLibcAtQuickExit, Basic) {
+ // In case tests ever run multiple times.
+ a = 0;
+
+ auto test = [] {
+ int status = LIBC_NAMESPACE::at_quick_exit(+[] {
+ if (a != 1)
+ __builtin_trap();
+ });
+ status |= LIBC_NAMESPACE::at_quick_exit(+[] { a++; });
+ if (status)
+ __builtin_trap();
+
+ LIBC_NAMESPACE::quick_exit(0);
+ };
+ EXPECT_EXITS(test, 0);
+}
+
+TEST(LlvmLibcAtQuickExit, AtQuickExitCallsSysExit) {
+ auto test = [] {
+ LIBC_NAMESPACE::at_quick_exit(+[] { _Exit(1); });
+ LIBC_NAMESPACE::quick_exit(0);
+ };
+ EXPECT_EXITS(test, 1);
+}
+
+static int size;
+static LIBC_NAMESPACE::cpp::array<int, 256> arr;
+
+template <int... Ts>
+void register_at_quick_exit_handlers(
+ LIBC_NAMESPACE::cpp::integer_sequence<int, Ts...>) {
+ (LIBC_NAMESPACE::at_quick_exit(+[] { arr[size++] = Ts; }), ...);
+}
+
+template <int count> constexpr auto get_test() {
+ return [] {
+ LIBC_NAMESPACE::at_quick_exit(+[] {
+ if (size != count)
+ __builtin_trap();
+ for (int i = 0; i < count; i++)
+ if (arr[i] != count - 1 - i)
+ __builtin_trap();
+ });
+ register_at_quick_exit_handlers(
+ LIBC_NAMESPACE::cpp::make_integer_sequence<int, count>{});
+ LIBC_NAMESPACE::quick_exit(0);
+ };
+}
+
+TEST(LlvmLibcAtQuickExit, ReverseOrder) {
+ // In case tests ever run multiple times.
+ size = 0;
+
+ auto test = get_test<32>();
+ EXPECT_EXITS(test, 0);
+}
+
+TEST(LlvmLibcAtQuickExit, Many) {
+ // In case tests ever run multiple times.
+ size = 0;
+
+ auto test = get_test<256>();
+ EXPECT_EXITS(test, 0);
+}
+
+TEST(LlvmLibcAtQuickExit, HandlerCallsAtQuickExit) {
+ auto test = [] {
+ LIBC_NAMESPACE::at_quick_exit(+[] {
+ LIBC_NAMESPACE::at_quick_exit(+[] { LIBC_NAMESPACE::quick_exit(1); });
+ });
+ LIBC_NAMESPACE::quick_exit(0);
+ };
+ EXPECT_EXITS(test, 1);
+}
diff --git a/libclc/CMakeLists.txt b/libclc/CMakeLists.txt
index 5ce179524308..9858ae905983 100644
--- a/libclc/CMakeLists.txt
+++ b/libclc/CMakeLists.txt
@@ -198,7 +198,7 @@ set( tahiti_aliases pitcairn verde oland hainan bonaire kabini kaveri hawaii
gfx1010 gfx1011 gfx1012 gfx1013
gfx1030 gfx1031 gfx1032 gfx1033 gfx1034 gfx1035 gfx1036
gfx1100 gfx1101 gfx1102 gfx1103
- gfx1150 gfx1151
+ gfx1150 gfx1151 gfx1152
gfx1200 gfx1201
)
diff --git a/libcxx/docs/Hardening.rst b/libcxx/docs/Hardening.rst
index 0761f42368e9..996e7aed881a 100644
--- a/libcxx/docs/Hardening.rst
+++ b/libcxx/docs/Hardening.rst
@@ -1,4 +1,4 @@
-.. _hardening-modes:
+.. _hardening:
===============
Hardening Modes
@@ -29,8 +29,11 @@ modes are:
rigour impacts performance more than fast mode: we recommend benchmarking to
determine if that is acceptable for your program.
- **Debug mode**, which enables all the available checks in the library,
- including internal assertions, some of which might be very expensive. This
- mode is intended to be used for testing, not in production.
+ including heuristic checks that might have significant performance overhead as
+ well as internal library assertions. This mode should be used in
+ non-production environments (such as test suites, CI, or local development).
+ We don’t commit to a particular level of performance in this mode and it’s
+ *not* intended to be used in production.
.. note::
@@ -72,17 +75,367 @@ to control the level by passing **one** of the following options to the compiler
Notes for vendors
-----------------
-Vendors can set the default hardening mode by providing ``LIBCXX_HARDENING_MODE``
-as a configuration option, with the possible values of ``none``, ``fast``,
-``extensive`` and ``debug``. The default value is ``none`` which doesn't enable
-any hardening checks (this mode is sometimes called the ``unchecked`` mode).
+Vendors can set the default hardening mode by providing
+``LIBCXX_HARDENING_MODE`` as a configuration option, with the possible values of
+``none``, ``fast``, ``extensive`` and ``debug``. The default value is ``none``
+which doesn't enable any hardening checks (this mode is sometimes called the
+``unchecked`` mode).
This option controls both the hardening mode that the precompiled library is
built with and the default hardening mode that users will build with. If set to
``none``, the precompiled library will not contain any assertions, and user code
will default to building without assertions.
-Iterator bounds checking
-------------------------
+Vendors can also override the way the program is terminated when an assertion
+fails by :ref:`providing a custom header <override-assertion-handler>`.
-TODO(hardening)
+Assertion categories
+====================
+
+Inside the library, individual assertions are grouped into different
+*categories*. Each hardening mode enables a different set of assertion
+categories; categories provide an additional layer of abstraction that makes it
+easier to reason about the high-level semantics of a hardening mode.
+
+.. note::
+
+ Users are not intended to interact with these categories directly -- the
+ categories are considered internal to the library and subject to change.
+
+- ``valid-element-access`` -- checks that any attempts to access a container
+ element, whether through the container object or through an iterator, are
+ valid and do not attempt to go out of bounds or otherwise access
+ a non-existent element. This also includes operations that set up an imminent
+ invalid access (e.g. incrementing an end iterator). For iterator checks to
+ work, bounded iterators must be enabled in the ABI. Types like
+ ``std::optional`` and ``std::function`` are considered containers (with at
+ most one element) for the purposes of this check.
+
+- ``valid-input-range`` -- checks that ranges (whether expressed as an iterator
+ pair, an iterator and a sentinel, an iterator and a count, or
+ a ``std::range``) given as input to library functions are valid:
+ - the sentinel is reachable from the begin iterator;
+ - TODO(hardening): both iterators refer to the same container.
+
+ ("input" here refers to "an input given to an algorithm", not to an iterator
+ category)
+
+ Violating assertions in this category leads to an out-of-bounds access.
+
+- ``non-null`` -- checks that the pointer being dereferenced is not null. On
+ most modern platforms, the zero address does not refer to an actual location
+ in memory, so a null pointer dereference would not compromise the memory
+ security of a program (however, it is still undefined behavior that can result
+ in strange errors due to compiler optimizations).
+
+- ``non-overlapping-ranges`` -- for functions that take several ranges as
+ arguments, checks that those ranges do not overlap.
+
+- ``valid-deallocation`` -- checks that an attempt to deallocate memory is valid
+ (e.g. the given object was allocated by the given allocator). Violating this
+ category typically results in a memory leak.
+
+- ``valid-external-api-call`` -- checks that a call to an external API doesn't
+ fail in an unexpected manner. This includes triggering documented cases of
+ undefined behavior in an external library (like attempting to unlock an
+ unlocked mutex in pthreads). Any API external to the library falls under this
+ category (from system calls to compiler intrinsics). We generally don't expect
+ these failures to compromise memory safety or otherwise create an immediate
+ security issue.
+
+- ``compatible-allocator`` -- checks any operations that exchange nodes between
+ containers to make sure the containers have compatible allocators.
+
+- ``argument-within-domain`` -- checks that the given argument is within the
+ domain of valid arguments for the function. Violating this typically produces
+ an incorrect result (e.g. ``std::clamp`` returns the original value without
+ clamping it due to incorrect functors) or puts an object into an invalid state
+ (e.g. a string view where only a subset of elements is accessible). This
+ category is for assertions violating which doesn't cause any immediate issues
+ in the library -- whatever the consequences are, they will happen in the user
+ code.
+
+- ``pedantic`` -- checks preconditions that are imposed by the Standard, but
+ violating which happens to be benign in libc++.
+
+- ``semantic-requirement`` -- checks that the given argument satisfies the
+ semantic requirements imposed by the Standard. Typically, there is no simple
+ way to completely prove that a semantic requirement is satisfied; thus, this
+ would often be a heuristic check and it might be quite expensive.
+
+- ``internal`` -- checks that internal invariants of the library hold. These
+ assertions don't depend on user input.
+
+- ``uncategorized`` -- for assertions that haven't been properly classified yet.
+ This category is an escape hatch used for some existing assertions in the
+ library; all new code should have its assertions properly classified.
+
+Mapping between the hardening modes and the assertion categories
+================================================================
+
+.. list-table::
+ :header-rows: 1
+ :widths: auto
+
+ * - Category name
+ - ``fast``
+ - ``extensive``
+ - ``debug``
+ * - ``valid-element-access``
+ - ✅
+ - ✅
+ - ✅
+ * - ``valid-input-range``
+ - ✅
+ - ✅
+ - ✅
+ * - ``non-null``
+ - ❌
+ - ✅
+ - ✅
+ * - ``non-overlapping-ranges``
+ - ❌
+ - ✅
+ - ✅
+ * - ``valid-deallocation``
+ - ❌
+ - ✅
+ - ✅
+ * - ``valid-external-api-call``
+ - ❌
+ - ✅
+ - ✅
+ * - ``compatible-allocator``
+ - ❌
+ - ✅
+ - ✅
+ * - ``argument-within-domain``
+ - ❌
+ - ✅
+ - ✅
+ * - ``pedantic``
+ - ❌
+ - ✅
+ - ✅
+ * - ``semantic-requirement``
+ - ❌
+ - ❌
+ - ✅
+ * - ``internal``
+ - ❌
+ - ❌
+ - ✅
+ * - ``uncategorized``
+ - ❌
+ - ✅
+ - ✅
+
+.. note::
+
+ At the moment, each subsequent hardening mode is a strict superset of the
+ previous one (in other words, each subsequent mode only enables additional
+ assertion categories without disabling any), but this won't necessarily be
+ true for any hardening modes that might be added in the future.
+
+.. note::
+
+ The categories enabled by each mode are subject to change and users should not
+ rely on the precise assertions enabled by a mode at a given point in time.
+ However, the library does guarantee to keep the hardening modes stable and
+ to fulfill the semantics documented here.
+
+Hardening assertion failure
+===========================
+
+In production modes (``fast`` and ``extensive``), a hardening assertion failure
+immediately ``_traps <https://llvm.org/docs/LangRef.html#llvm-trap-intrinsic>``
+the program. This is the safest approach that also minimizes the code size
+penalty as the failure handler maps to a single instruction. The downside is
+that the failure provides no additional details other than the stack trace
+(which might also be affected by optimizations).
+
+TODO(hardening): describe ``__builtin_verbose_trap`` once we can use it.
+
+In the ``debug`` mode, an assertion failure terminates the program in an
+unspecified manner and also outputs the associated error message to the error
+output. This is less secure and increases the size of the binary (among other
+things, it has to store the error message strings) but makes the failure easier
+to debug. It also allows testing the error messages in our test suite.
+
+.. _override-assertion-handler:
+
+Overriding the assertion failure handler
+----------------------------------------
+
+Vendors can override the default assertion handler mechanism by following these
+steps:
+
+- create a header file that provides a definition of a macro called
+ ``_LIBCPP_ASSERTION_HANDLER``. The macro will be invoked when a hardening
+ assertion fails, with a single parameter containing a null-terminated string
+ with the error message.
+- when configuring the library, provide the path to custom header (relative to
+ the root of the repository) via the CMake variable
+ ``LIBCXX_ASSERTION_HANDLER_FILE``.
+
+Note that almost all libc++ headers include the assertion handler header which
+means it should not include anything non-trivial from the standard library to
+avoid creating circular dependencies.
+
+There is no existing mechanism for users to override the assertion handler
+because the ability to do the override other than at configure-time carries an
+unavoidable code size penalty that would otherwise be imposed on all users,
+whether they require such customization or not. Instead, we let vendors decide
+what's right on their platform for their users -- a vendor who wishes to provide
+this capability is free to do so, e.g. by declaring the assertion handler as an
+overridable function.
+
+ABI
+===
+
+Setting a hardening mode does **not** affect the ABI. Each mode uses the subset
+of checks available in the current ABI configuration which is determined by the
+platform.
+
+It is important to stress that whether a particular check is enabled depends on
+the combination of the selected hardening mode and the hardening-related ABI
+options. Some checks require changing the ABI from the "default" to store
+additional information in the library classes -- e.g. checking whether an
+iterator is valid upon dereference generally requires storing data about bounds
+inside the iterator object. Using ``std::span`` as an example, setting the
+hardening mode to ``fast`` will always enable the ``valid-element-access``
+checks when accessing elements via a ``std::span`` object, but whether
+dereferencing a ``std::span`` iterator does the equivalent check depends on the
+ABI configuration.
+
+ABI options
+-----------
+
+Vendors can use the following ABI options to enable additional hardening checks:
+
+- ``_LIBCPP_ABI_BOUNDED_ITERATORS`` -- changes the iterator type of select
+ containers (see below) to a bounded iterator that keeps track of whether it's
+ within the bounds of the original container and asserts valid bounds on every
+ dereference.
+
+ ABI impact: changes the iterator type of the relevant containers.
+
+ Supported containers:
+
+ - ``span``;
+ - ``string_view``.
+
+ABI tags
+--------
+
+We use ABI tags to allow translation units built with different hardening modes
+to interact with each other without causing ODR violations. Knowing how
+hardening modes are encoded into the ABI tags might be useful to examine
+a binary and determine whether it was built with hardening enabled.
+
+.. warning::
+ We don't commit to the encoding scheme used by the ABI tags being stable
+ between different releases of libc++. The tags themselves are never stable, by
+ design -- new releases increase the version number. The following describes
+ the state of the latest release and is for informational purposes only.
+
+The first character of an ABI tag encodes the hardening mode:
+
+- ``f`` -- [f]ast mode;
+- ``s`` -- extensive ("[s]afe") mode;
+- ``d`` -- [d]ebug mode;
+- ``n`` -- [n]one mode.
+
+Hardened containers status
+==========================
+
+.. list-table::
+ :header-rows: 1
+ :widths: auto
+
+ * - Name
+ - Member functions
+ - Iterators (ABI-dependent)
+ * - ``span``
+ - ✅
+ - ✅
+ * - ``string_view``
+ - ✅
+ - ✅
+ * - ``array``
+ - ✅
+ - ❌
+ * - ``vector``
+ - ✅
+ - ❌
+ * - ``string``
+ - ✅
+ - ❌
+ * - ``list``
+ - ✅
+ - ❌
+ * - ``forward_list``
+ - ❌
+ - ❌
+ * - ``deque``
+ - ✅
+ - ❌
+ * - ``map``
+ - ❌
+ - ❌
+ * - ``set``
+ - ❌
+ - ❌
+ * - ``multimap``
+ - ❌
+ - ❌
+ * - ``multiset``
+ - ❌
+ - ❌
+ * - ``unordered_map``
+ - Partial
+ - Partial
+ * - ``unordered_set``
+ - Partial
+ - Partial
+ * - ``unordered_multimap``
+ - Partial
+ - Partial
+ * - ``unordered_multiset``
+ - Partial
+ - Partial
+ * - ``mdspan``
+ - ✅
+ - ❌
+ * - ``optional``
+ - ✅
+ - N/A
+ * - ``function``
+ - ❌
+ - N/A
+ * - ``variant``
+ - N/A
+ - N/A
+ * - ``any``
+ - N/A
+ - N/A
+ * - ``expected``
+ - ✅
+ - N/A
+ * - ``valarray``
+ - Partial
+ - N/A
+ * - ``bitset``
+ - ❌
+ - N/A
+
+Testing
+=======
+
+Please see :ref:`Testing documentation <testing-hardening-assertions>`.
+
+Further reading
+===============
+
+- ``_Hardening RFC <https://discourse.llvm.org/t/rfc-hardening-in-libc/73925>``:
+ contains some of the design rationale.
diff --git a/libcxx/docs/ReleaseNotes/18.rst b/libcxx/docs/ReleaseNotes/18.rst
index fcd630e09b44..4f7b9b362e5e 100644
--- a/libcxx/docs/ReleaseNotes/18.rst
+++ b/libcxx/docs/ReleaseNotes/18.rst
@@ -40,7 +40,7 @@ and C++26 features.
New hardened modes for the library have been added, replacing the legacy debug mode that was
removed in the LLVM 17 release. Unlike the legacy debug mode, some of these hardening modes are
-also intended to be used in production. See :ref:`hardening-modes` for more details.
+also intended to be used in production. See :ref:`hardening` for more details.
Work on the ranges support has progressed. See
:ref:`ranges-status` for the current status.
diff --git a/libcxx/docs/TestingLibcxx.rst b/libcxx/docs/TestingLibcxx.rst
index 50ee9d4ee400..d9f4fe467fe3 100644
--- a/libcxx/docs/TestingLibcxx.rst
+++ b/libcxx/docs/TestingLibcxx.rst
@@ -480,3 +480,48 @@ For example:
$ ./algorithms.libcxx.out --benchmark_filter=BM_Sort.* # Only runs the sort benchmarks
For more information about running benchmarks see `Google Benchmark`_.
+
+
+.. _testing-hardening-assertions:
+
+Testing hardening assertions
+============================
+
+Each hardening assertion should be tested using death tests (via the
+``TEST_LIBCPP_ASSERT_FAILURE`` macro). Use the ``libcpp-hardening-mode`` Lit
+feature to make sure the assertion is enabled in (and only in) the intended
+modes. The convention is to use `assert.` in the name of the test file to make
+it easier to identify as a hardening test, e.g. ``assert.my_func.pass.cpp``.
+A toy example:
+
+.. code-block:: cpp
+
+ // Note: the following three annotations are currently needed to use the
+ // `TEST_LIBCPP_ASSERT_FAILURE`.
+ // REQUIRES: has-unix-headers
+ // UNSUPPORTED: c++03
+ // XFAIL: libcpp-hardening-mode=debug && availability-verbose_abort-missing
+
+ // Example: only run this test in `fast`/`extensive`/`debug` modes.
+ // UNSUPPORTED: libcpp-hardening-mode=none
+ // Example: only run this test in the `debug` mode.
+ // REQUIRES: libcpp-hardening-mode=debug
+ // Example: only run this test in `extensive`/`debug` modes.
+ // REQUIRES: libcpp-hardening-mode={{extensive|debug}}
+
+ #include <header_being_tested>
+
+ #include "check_assertion.h" // Contains the `TEST_LIBCPP_ASSERT_FAILURE` macro
+
+ int main(int, char**) {
+ std::type_being_tested foo;
+ int bad_input = -1;
+ TEST_LIBCPP_ASSERT_FAILURE(foo.some_function_that_asserts(bad_input),
+ "The expected assertion message");
+
+ return 0;
+ }
+
+Note that error messages are only tested (matched) if the ``debug``
+hardening mode is used.
+
diff --git a/libcxx/include/__configuration/abi.h b/libcxx/include/__configuration/abi.h
index 17aceb042f52..73375fa47d07 100644
--- a/libcxx/include/__configuration/abi.h
+++ b/libcxx/include/__configuration/abi.h
@@ -127,8 +127,7 @@
//
// Supported containers:
// - `span`;
-// - `string_view`;
-// - `array`.
+// - `string_view`.
// #define _LIBCPP_ABI_BOUNDED_ITERATORS
#if defined(_LIBCPP_COMPILER_CLANG_BASED)
diff --git a/libcxx/test/std/experimental/simd/simd.class/simd_copy.pass.cpp b/libcxx/test/std/experimental/simd/simd.class/simd_copy.pass.cpp
index 8fcc811f6df3..7d91ca0eada1 100644
--- a/libcxx/test/std/experimental/simd/simd.class/simd_copy.pass.cpp
+++ b/libcxx/test/std/experimental/simd/simd.class/simd_copy.pass.cpp
@@ -8,9 +8,9 @@
// UNSUPPORTED: c++03, c++11, c++14
-// FIXME: Fatal error with following targets (remove XFAIL when fixed):
+// Older versions of clang may encounter a backend error (see 0295c2ad):
// Pass-by-value arguments with alignment greater than register width are not supported.
-// XFAIL: target=powerpc{{.*}}-ibm-aix7.2.5.7
+// XFAIL: target=powerpc{{.*}}-ibm-{{.*}} && (clang-17 || clang-18)
// <experimental/simd>
//
diff --git a/lld/ELF/Writer.cpp b/lld/ELF/Writer.cpp
index 0295a656b070..0aceb941a1dc 100644
--- a/lld/ELF/Writer.cpp
+++ b/lld/ELF/Writer.cpp
@@ -783,13 +783,11 @@ template <class ELFT> void Writer<ELFT>::addRelIpltSymbols() {
// __rela_iplt_{start,end} are initially defined relative to dummy section 0.
// We'll override Out::elfHeader with relaDyn later when we are sure that
// .rela.dyn will be present in the output.
- ElfSym::relaIpltStart = addOptionalRegular(
- config->isRela ? "__rela_iplt_start" : "__rel_iplt_start",
- Out::elfHeader, 0, STV_HIDDEN);
-
- ElfSym::relaIpltEnd = addOptionalRegular(
- config->isRela ? "__rela_iplt_end" : "__rel_iplt_end",
- Out::elfHeader, 0, STV_HIDDEN);
+ std::string name = config->isRela ? "__rela_iplt_start" : "__rel_iplt_start";
+ ElfSym::relaIpltStart =
+ addOptionalRegular(name, Out::elfHeader, 0, STV_HIDDEN);
+ name.replace(name.size() - 5, 5, "end");
+ ElfSym::relaIpltEnd = addOptionalRegular(name, Out::elfHeader, 0, STV_HIDDEN);
}
// This function generates assignments for predefined symbols (e.g. _end or
@@ -921,7 +919,11 @@ static bool shouldSkip(SectionCommand *cmd) {
static SmallVectorImpl<SectionCommand *>::iterator
findOrphanPos(SmallVectorImpl<SectionCommand *>::iterator b,
SmallVectorImpl<SectionCommand *>::iterator e) {
+ // Place non-alloc orphan sections at the end. This matches how we assign file
+ // offsets to non-alloc sections.
OutputSection *sec = &cast<OutputDesc>(*e)->osec;
+ if (!(sec->flags & SHF_ALLOC))
+ return e;
// As a special case, place .relro_padding before the SymbolAssignment using
// DATA_SEGMENT_RELRO_END, if present.
@@ -2483,11 +2485,12 @@ template <class ELFT> void Writer<ELFT>::assignFileOffsets() {
lastRX->lastSec == sec)
off = alignToPowerOf2(off, config->maxPageSize);
}
- for (OutputSection *osec : outputSections)
- if (!(osec->flags & SHF_ALLOC)) {
- osec->offset = alignToPowerOf2(off, osec->addralign);
- off = osec->offset + osec->size;
- }
+ for (OutputSection *osec : outputSections) {
+ if (osec->flags & SHF_ALLOC)
+ continue;
+ osec->offset = alignToPowerOf2(off, osec->addralign);
+ off = osec->offset + osec->size;
+ }
sectionHeaderOff = alignToPowerOf2(off, config->wordsize);
fileSize = sectionHeaderOff + (outputSections.size() + 1) * sizeof(Elf_Shdr);
diff --git a/lld/test/ELF/linkerscript/memory-nonalloc-no-warn.test b/lld/test/ELF/linkerscript/memory-nonalloc-no-warn.test
index 2dcd0f8d6ce2..eabdf75fcf93 100644
--- a/lld/test/ELF/linkerscript/memory-nonalloc-no-warn.test
+++ b/lld/test/ELF/linkerscript/memory-nonalloc-no-warn.test
@@ -16,20 +16,20 @@
## The output file must include all sections.
# RUN: llvm-readelf -S %t/a.elf | FileCheck %s
-# CHECK: There are 12 section headers, starting at offset 0x2140:
+# CHECK: There are 12 section headers, starting at offset 0x2138:
# CHECK: [Nr] Name Type Address Off Size ES Flg Lk Inf Al
# CHECK-NEXT: [ 0] NULL 0000000000000000 000000 000000 00 0 0 0
# CHECK-NEXT: [ 1] .nonalloc PROGBITS 0000000000000000 001064 001000 00 W 0 0 1
-# CHECK-NEXT: [ 2] .comment PROGBITS 0000000000000000 {{.*}} {{.*}} 01 MS 0 0 1
-# CHECK-NEXT: [ 3] .symtab SYMTAB 0000000000000000 {{.*}} {{.*}} 18 5 1 8
-# CHECK-NEXT: [ 4] .shstrtab STRTAB 0000000000000000 {{.*}} {{.*}} 00 0 0 1
-# CHECK-NEXT: [ 5] .strtab STRTAB 0000000000000000 {{.*}} {{.*}} 00 0 0 1
-# CHECK-NEXT: [ 6] .dat PROGBITS 0000000000000000 002137 000004 00 W 0 0 1
-# CHECK-NEXT: [ 7] .intvec0_out PROGBITS 0000000000000000 00213b 000000 00 W 0 0 1
-# CHECK-NEXT: [ 8] .intvec1_out PROGBITS 0000000000000000 00213b 000000 00 W 0 0 1
-# CHECK-NEXT: [ 9] .intvec2_out PROGBITS 0000000000000000 00213b 000000 00 W 0 0 1
-# CHECK-NEXT: [10] .intvec3_out PROGBITS 00000000803fe060 001060 000004 00 AX 0 0 1
-# CHECK-NEXT: [11] .text PROGBITS 00000000803fe064 001064 000000 00 AX 0 0 4
+# CHECK-NEXT: [ 2] .dat PROGBITS 0000000000000000 002064 000004 00 W 0 0 1
+# CHECK-NEXT: [ 3] .intvec0_out PROGBITS 0000000000000000 002068 000000 00 W 0 0 1
+# CHECK-NEXT: [ 4] .intvec1_out PROGBITS 0000000000000000 002068 000000 00 W 0 0 1
+# CHECK-NEXT: [ 5] .intvec2_out PROGBITS 0000000000000000 002068 000000 00 W 0 0 1
+# CHECK-NEXT: [ 6] .intvec3_out PROGBITS 00000000803fe060 001060 000004 00 AX 0 0 1
+# CHECK-NEXT: [ 7] .text PROGBITS 00000000803fe064 001064 000000 00 AX 0 0 4
+# CHECK-NEXT: [ 8] .comment PROGBITS 0000000000000000 {{.*}} {{.*}} 01 MS 0 0 1
+# CHECK-NEXT: [ 9] .symtab SYMTAB 0000000000000000 {{.*}} {{.*}} 18 11 1 8
+# CHECK-NEXT: [10] .shstrtab STRTAB 0000000000000000 {{.*}} {{.*}} 00 0 0 1
+# CHECK-NEXT: [11] .strtab STRTAB 0000000000000000 {{.*}} {{.*}} 00 0 0 1
#--- a.s
diff --git a/lld/test/ELF/linkerscript/sections-nonalloc.s b/lld/test/ELF/linkerscript/sections-nonalloc.s
index 79765d32dfff..d66e524248ce 100644
--- a/lld/test/ELF/linkerscript/sections-nonalloc.s
+++ b/lld/test/ELF/linkerscript/sections-nonalloc.s
@@ -16,15 +16,15 @@
# CHECK-NEXT: [ 2] data1 PROGBITS 0000000000000001 001001 000001 00 WA 0
# CHECK-NEXT: [ 3] other1 PROGBITS 0000000000000000 001008 000001 00 0
# CHECK-NEXT: [ 4] other2 PROGBITS 0000000000000000 001010 000001 00 0
-## Orphan placement places other3, .symtab, .shstrtab and .strtab after other2.
-# CHECK-NEXT: [ 5] other3 PROGBITS 0000000000000000 001020 000001 00 0
-# CHECK-NEXT: [ 6] .symtab SYMTAB 0000000000000000 001028 000030 18 8
-# CHECK-NEXT: [ 7] .shstrtab STRTAB 0000000000000000 001058 00004d 00 0
-# CHECK-NEXT: [ 8] .strtab STRTAB 0000000000000000 0010a5 000008 00 0
-# CHECK-NEXT: [ 9] data2 PROGBITS 0000000000000002 001002 000001 00 WA 0
+# CHECK-NEXT: [ 5] data2 PROGBITS 0000000000000002 001002 000001 00 WA 0
## max{sortRank(data1),sortRank(data2)} <= sortRank(data3). data3 is placed after the latter.
-# CHECK-NEXT: [10] data3 PROGBITS 0000000000000003 001003 000001 00 WA 0
-# CHECK-NEXT: [11] .text PROGBITS 0000000000000004 001004 000001 00 AX 0
+# CHECK-NEXT: [ 6] data3 PROGBITS 0000000000000003 001003 000001 00 WA 0
+# CHECK-NEXT: [ 7] .text PROGBITS 0000000000000004 001004 000001 00 AX 0
+## Non-alloc orphan sections other3, .symtab, .shstrtab and .strtab are placed at the end.
+# CHECK-NEXT: [ 8] other3 PROGBITS 0000000000000000 001020 000001 00 0
+# CHECK-NEXT: [ 9] .symtab SYMTAB 0000000000000000 001028 000030 18 11
+# CHECK-NEXT: [10] .shstrtab STRTAB 0000000000000000 001058 00004d 00 0
+# CHECK-NEXT: [11] .strtab STRTAB 0000000000000000 0010a5 000008 00 0
# CHECK: Type Offset VirtAddr PhysAddr FileSiz MemSiz Flg Align
# CHECK-NEXT: LOAD 0x001000 0x0000000000000000 0x0000000000000000 0x000004 0x000004 RW 0x1000
@@ -34,6 +34,11 @@
# RUN: ld.lld -T %t/b.lds %t.o -o %tb
# RUN: llvm-readelf -S -l %tb | FileCheck %s --check-prefix=CHECK1
+## --section-start causes the orphan other3 to be considered before .data3.
+## The non-alloc other3 does not disable the placement of .data3.
+# RUN: ld.lld -T %t/b.lds %t.o -o %tb --section-start=other3=0
+# RUN: llvm-readelf -S -l %tb | FileCheck %s --check-prefix=CHECK1
+
# CHECK1: [Nr] Name Type Address Off Size ES Flg Lk
# CHECK1-NEXT: [ 0] NULL 0000000000000000 000000 000000 00 0
# CHECK1-NEXT: [ 1] .text PROGBITS 00000000000000b0 0000b0 000001 00 AX 0
@@ -41,12 +46,12 @@
# CHECK1-NEXT: [ 3] data1 PROGBITS 00000000000000b2 0000b2 000001 00 WA 0
# CHECK1-NEXT: [ 4] other1 PROGBITS 0000000000000000 0000b8 000001 00 0
# CHECK1-NEXT: [ 5] other2 PROGBITS 0000000000000000 0000c0 000001 00 0
-# CHECK1-NEXT: [ 6] other3 PROGBITS 0000000000000000 0000d0 000001 00 0
-# CHECK1-NEXT: [ 7] .symtab SYMTAB 0000000000000000 0000d8 000030 18 9
-# CHECK1-NEXT: [ 8] .shstrtab STRTAB 0000000000000000 000108 00004d 00 0
-# CHECK1-NEXT: [ 9] .strtab STRTAB 0000000000000000 000155 000008 00 0
-# CHECK1-NEXT: [10] data2 PROGBITS 00000000000000b3 0000b3 000001 00 WA 0
-# CHECK1-NEXT: [11] data3 PROGBITS 00000000000000b4 0000b4 000001 00 WA 0
+# CHECK1-NEXT: [ 6] data2 PROGBITS 00000000000000b3 0000b3 000001 00 WA 0
+# CHECK1-NEXT: [ 7] data3 PROGBITS 00000000000000b4 0000b4 000001 00 WA 0
+# CHECK1-NEXT: [ 8] other3 PROGBITS 0000000000000000 0000d0 000001 00 0
+# CHECK1-NEXT: [ 9] .symtab SYMTAB 0000000000000000 0000d8 000030 18 11
+# CHECK1-NEXT: [10] .shstrtab STRTAB 0000000000000000 000108 00004d 00 0
+# CHECK1-NEXT: [11] .strtab STRTAB 0000000000000000 000155 000008 00 0
# CHECK1: Type Offset VirtAddr PhysAddr FileSiz MemSiz Flg Align
# CHECK1-NEXT: LOAD 0x000000 0x0000000000000000 0x0000000000000000 0x0000b5 0x0000b5 RWE 0x1000
# CHECK1-NEXT: 0x60000000 0x0000b8 0x0000000000000000 0x0000000000000000 0x000009 0x000001 0x8
diff --git a/lld/test/ELF/linkerscript/sections.s b/lld/test/ELF/linkerscript/sections.s
index 5d6cc1f3bd0d..fc03af8402df 100644
--- a/lld/test/ELF/linkerscript/sections.s
+++ b/lld/test/ELF/linkerscript/sections.s
@@ -79,8 +79,8 @@
# SEP-BY-NONALLOC: [ 1] .text PROGBITS 0000000000000000 001000 00000e 00 AX
# SEP-BY-NONALLOC-NEXT: [ 2] .data PROGBITS 000000000000000e 00100e 000020 00 WA
# SEP-BY-NONALLOC-NEXT: [ 3] .comment PROGBITS 0000000000000000 001031 000008 01 MS
-# SEP-BY-NONALLOC: [ 7] other PROGBITS 000000000000002e 00102e 000003 00 WA
-# SEP-BY-NONALLOC-NEXT: [ 8] .bss NOBITS 0000000000000031 001031 000002 00 WA
+# SEP-BY-NONALLOC: [ 4] other PROGBITS 000000000000002e 00102e 000003 00 WA
+# SEP-BY-NONALLOC-NEXT: [ 5] .bss NOBITS 0000000000000031 001031 000002 00 WA
# SEP-BY-NONALLOC: Type Offset VirtAddr PhysAddr FileSiz MemSiz Flg Align
# SEP-BY-NONALLOC-NEXT: LOAD 0x001000 0x0000000000000000 0x0000000000000000 0x00000e 0x00000e R E 0x1000
diff --git a/lld/test/ELF/relocatable-comdat.s b/lld/test/ELF/relocatable-comdat.s
index 160c48d9fb4d..45ca9fb7a248 100644
--- a/lld/test/ELF/relocatable-comdat.s
+++ b/lld/test/ELF/relocatable-comdat.s
@@ -3,46 +3,71 @@
## may be rewritten because group members may change their indices. Additionally,
## group member may be combined or discarded (e.g. /DISCARD/ or --gc-sections).
-# RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux %s -o %t.o
-# RUN: ld.lld -r %t.o %t.o -o %t.ro
-# RUN: llvm-readelf -g -S %t.ro | FileCheck %s
+# RUN: rm -rf %t && split-file %s %t && cd %t
+# RUN: llvm-mc -filetype=obj -triple=x86_64 a.s -o a.o
+# RUN: ld.lld -r a.o a.o -o a.ro
+# RUN: llvm-readelf -g -S a.ro | FileCheck %s
-# CHECK: Name Type Address Off Size ES Flg Lk Inf Al
-# CHECK: .group GROUP 0000000000000000 {{.*}} 000014 04 {{[1-9]}} [[#]] 4
+# CHECK: Name Type Address Off Size ES Flg Lk Inf Al
+# CHECK: .group GROUP 0000000000000000 {{.*}} 00001c 04 [[#]] [[#]] 4
+# CHECK-NEXT: .rodata.bar PROGBITS 0000000000000000 {{.*}} 000001 00 AG 0 0 1
+# CHECK-NEXT: .rodata.foo PROGBITS 0000000000000000 {{.*}} 000001 00 AG 0 0 1
+# CHECK-NEXT: .text.bar PROGBITS 0000000000000000 {{.*}} 000008 00 AXG 0 0 1
+# CHECK-NEXT: .rela.text.bar RELA 0000000000000000 {{.*}} 000018 18 IG [[#]] [[#]] 8
+# CHECK-NEXT: .text.foo PROGBITS 0000000000000000 {{.*}} 000008 00 AXG [[#]] [[#]] 1
+# CHECK-NEXT: .rela.text.foo RELA 0000000000000000 {{.*}} 000018 18 IG [[#]] [[#]] 8
+# CHECK-NEXT: .note.GNU-stack
-# CHECK: COMDAT group section [{{.*}}] `.group' [abc] contains 4 sections:
+# CHECK: COMDAT group section [{{.*}}] `.group' [abc] contains 6 sections:
# CHECK-NEXT: Name
# CHECK-NEXT: .rodata.bar
# CHECK-NEXT: .rodata.foo
# CHECK-NEXT: .text.bar
+# CHECK-NEXT: .rela.text.bar
# CHECK-NEXT: .text.foo
+# CHECK-NEXT: .rela.text.foo
## Rewrite SHT_GROUP content if some members are combined.
-# RUN: echo 'SECTIONS { .rodata : {*(.rodata.*)} .text : {*(.text.*)} }' > %t1.lds
-# RUN: ld.lld -r -T %t1.lds %t.o %t.o -o %t1.ro
-# RUN: llvm-readelf -g -S %t1.ro | FileCheck %s --check-prefix=SCRIPT1
+# RUN: echo 'SECTIONS { .rodata : {*(.rodata.*)} .text : {*(.text.*)} }' > combine.lds
+# RUN: ld.lld -r -T combine.lds a.o a.o -o combine.ro
+# RUN: llvm-readelf -g -S combine.ro | FileCheck %s --check-prefix=COMBINE
-# SCRIPT1: Name Type Address Off Size ES Flg Lk Inf Al
-# SCRIPT1: .group GROUP 0000000000000000 {{.*}} 00000c 04 {{[1-9]}} [[#]] 4
+# COMBINE: Name Type Address Off Size ES Flg Lk Inf Al
+# COMBINE: .rodata PROGBITS 0000000000000000 {{.*}} 000002 00 AG 0 0 1
+# COMBINE-NEXT: .text PROGBITS 0000000000000000 {{.*}} 000010 00 AXG 0 0 4
+# COMBINE-NEXT: .group GROUP 0000000000000000 {{.*}} 000014 04 [[#]] [[#]] 4
+# COMBINE-NEXT: .rela.text RELA 0000000000000000 {{.*}} 000018 18 IG [[#]] [[#]] 8
+# COMBINE-NEXT: .rela.text RELA 0000000000000000 {{.*}} 000018 18 IG [[#]] [[#]] 8
+# COMBINE-NEXT: .note.GNU-stack
-# SCRIPT1: COMDAT group section [{{.*}}] `.group' [abc] contains 2 sections:
-# SCRIPT1-NEXT: Name
-# SCRIPT1-NEXT: .rodata
-# SCRIPT1-NEXT: .text
+# COMBINE: COMDAT group section [{{.*}}] `.group' [abc] contains 4 sections:
+# COMBINE-NEXT: Name
+# COMBINE-NEXT: .rodata
+# COMBINE-NEXT: .text
+# COMBINE-NEXT: .rela.text
+# COMBINE-NEXT: .rela.text
-# RUN: echo 'SECTIONS { /DISCARD/ : {*(.rodata.*)} }' > %t2.lds
-# RUN: ld.lld -r -T %t2.lds %t.o %t.o -o %t2.ro
-# RUN: llvm-readelf -g -S %t2.ro | FileCheck %s --check-prefix=SCRIPT2
+# RUN: echo 'SECTIONS { /DISCARD/ : {*(.rodata.*)} }' > discard-rodata.lds
+# RUN: ld.lld -r -T discard-rodata.lds a.o a.o -o discard-rodata.ro
+# RUN: llvm-readelf -g -S discard-rodata.ro | FileCheck %s --check-prefix=NO-RODATA
## Handle discarded group members.
-# SCRIPT2: [Nr] Name Type Address Off Size ES Flg Lk Inf Al
-# SCRIPT2: [ 2] .group GROUP 0000000000000000 {{.*}} 00000c 04 {{[1-9]}} [[#]] 4
+# NO-RODATA: Name Type Address Off Size ES Flg Lk Inf Al
+# NO-RODATA: .group GROUP 0000000000000000 {{.*}} 000014 04 [[#]] [[#]] 4
+# NO-RODATA-NEXT: .text.bar PROGBITS 0000000000000000 {{.*}} 000008 00 AXG 0 0 1
+# NO-RODATA-NEXT: .rela.text.bar RELA 0000000000000000 {{.*}} 000018 18 IG [[#]] [[#]] 8
+# NO-RODATA-NEXT: .text.foo PROGBITS 0000000000000000 {{.*}} 000008 00 AXG [[#]] [[#]] 1
+# NO-RODATA-NEXT: .rela.text.foo RELA 0000000000000000 {{.*}} 000018 18 IG [[#]] [[#]] 8
+# NO-RODATA-NEXT: .note.GNU-stack
-# SCRIPT2: COMDAT group section [{{.*}}] `.group' [abc] contains 2 sections:
-# SCRIPT2-NEXT: Name
-# SCRIPT2-NEXT: .text.bar
-# SCRIPT2-NEXT: .text.foo
+# NO-RODATA: COMDAT group section [{{.*}}] `.group' [abc] contains 4 sections:
+# NO-RODATA-NEXT: Name
+# NO-RODATA-NEXT: .text.bar
+# NO-RODATA-NEXT: .rela.text.bar
+# NO-RODATA-NEXT: .text.foo
+# NO-RODATA-NEXT: .rela.text.foo
+#--- a.s
.weak abc
abc:
@@ -52,6 +77,6 @@ abc:
.byte 42
.section .text.bar,"axG",@progbits,abc,comdat
-.quad 42
+.quad abc
.section .text.foo,"axG",@progbits,abc,comdat
-.long 42
+.quad abc
diff --git a/lldb/examples/python/crashlog.py b/lldb/examples/python/crashlog.py
index 7c6c60e518d7..1c0d717ce455 100755
--- a/lldb/examples/python/crashlog.py
+++ b/lldb/examples/python/crashlog.py
@@ -547,9 +547,9 @@ class CrashLog(symbolication.Symbolicator):
for image in self.images:
image.resolve = True
elif options.crashed_only:
+ images_to_load = []
for thread in self.threads:
- if thread.did_crash():
- images_to_load = []
+ if thread.did_crash() or thread.app_specific_backtrace:
for ident in thread.idents:
for image in self.find_images_with_identifier(ident):
image.resolve = True
@@ -864,7 +864,7 @@ class JSONCrashLogParser(CrashLogParser):
thread = self.crashlog.Thread(
len(self.crashlog.threads), True, self.crashlog.process_arch
)
- thread.queue = "Application Specific Backtrace"
+ thread.name = "Application Specific Backtrace"
if self.parse_asi_backtrace(thread, json_app_specific_bts[0]):
self.crashlog.threads.append(thread)
else:
@@ -874,7 +874,7 @@ class JSONCrashLogParser(CrashLogParser):
thread = self.crashlog.Thread(
len(self.crashlog.threads), True, self.crashlog.process_arch
)
- thread.queue = "Last Exception Backtrace"
+ thread.name = "Last Exception Backtrace"
self.parse_frames(thread, json_last_exc_bts)
self.crashlog.threads.append(thread)
@@ -1174,11 +1174,13 @@ class TextCrashLogParser(CrashLogParser):
self.thread = self.crashlog.Thread(
idx, True, self.crashlog.process_arch
)
+ self.thread.name = "Application Specific Backtrace"
elif line.startswith("Last Exception Backtrace:"): # iOS
self.parse_mode = self.CrashLogParseMode.THREAD
self.app_specific_backtrace = True
idx = 1
self.thread = self.crashlog.Thread(idx, True, self.crashlog.process_arch)
+ self.thread.name = "Last Exception Backtrace"
self.crashlog.info_lines.append(line.strip())
def parse_thread(self, line):
@@ -1812,6 +1814,9 @@ def SymbolicateCrashLogs(debugger, command_args, result, is_command):
)
)
+ if "NO_PARALLEL_IMG_LOADING" in os.environ:
+ options.no_parallel_image_loading = True
+
if options.version:
print(debugger.GetVersionString())
return
diff --git a/lldb/examples/python/crashlog_scripted_process.py b/lldb/examples/python/crashlog_scripted_process.py
index 2ee030239ee3..be0ed49d3590 100644
--- a/lldb/examples/python/crashlog_scripted_process.py
+++ b/lldb/examples/python/crashlog_scripted_process.py
@@ -173,10 +173,7 @@ class CrashLogScriptedThread(ScriptedThread):
self.backing_thread = crashlog_thread
self.idx = self.backing_thread.index
self.tid = self.backing_thread.id
- if self.backing_thread.app_specific_backtrace:
- self.name = "Application Specific Backtrace"
- else:
- self.name = self.backing_thread.name
+ self.name = self.backing_thread.name
self.queue = self.backing_thread.queue
self.has_crashed = self.originating_process.crashed_thread_idx == self.idx
self.create_stackframes()
diff --git a/lldb/source/Expression/DWARFExpression.cpp b/lldb/source/Expression/DWARFExpression.cpp
index 4681dbafb6f9..05767a8d02ff 100644
--- a/lldb/source/Expression/DWARFExpression.cpp
+++ b/lldb/source/Expression/DWARFExpression.cpp
@@ -94,51 +94,38 @@ void DWARFExpression::SetRegisterKind(RegisterKind reg_kind) {
m_reg_kind = reg_kind;
}
-
-static bool ReadRegisterValueAsScalar(RegisterContext *reg_ctx,
- lldb::RegisterKind reg_kind,
- uint32_t reg_num, Status *error_ptr,
- Value &value) {
- if (reg_ctx == nullptr) {
- if (error_ptr)
- error_ptr->SetErrorString("No register context in frame.\n");
- } else {
- uint32_t native_reg =
- reg_ctx->ConvertRegisterKindToRegisterNumber(reg_kind, reg_num);
- if (native_reg == LLDB_INVALID_REGNUM) {
- if (error_ptr)
- error_ptr->SetErrorStringWithFormat("Unable to convert register "
- "kind=%u reg_num=%u to a native "
- "register number.\n",
- reg_kind, reg_num);
- } else {
- const RegisterInfo *reg_info =
- reg_ctx->GetRegisterInfoAtIndex(native_reg);
- RegisterValue reg_value;
- if (reg_ctx->ReadRegister(reg_info, reg_value)) {
- if (reg_value.GetScalarValue(value.GetScalar())) {
- value.SetValueType(Value::ValueType::Scalar);
- value.SetContext(Value::ContextType::RegisterInfo,
- const_cast<RegisterInfo *>(reg_info));
- if (error_ptr)
- error_ptr->Clear();
- return true;
- } else {
- // If we get this error, then we need to implement a value buffer in
- // the dwarf expression evaluation function...
- if (error_ptr)
- error_ptr->SetErrorStringWithFormat(
- "register %s can't be converted to a scalar value",
- reg_info->name);
- }
- } else {
- if (error_ptr)
- error_ptr->SetErrorStringWithFormat("register %s is not available",
- reg_info->name);
- }
+static llvm::Error ReadRegisterValueAsScalar(RegisterContext *reg_ctx,
+ lldb::RegisterKind reg_kind,
+ uint32_t reg_num, Value &value) {
+ if (reg_ctx == nullptr)
+ return llvm::createStringError("no register context in frame");
+
+ const uint32_t native_reg =
+ reg_ctx->ConvertRegisterKindToRegisterNumber(reg_kind, reg_num);
+ if (native_reg == LLDB_INVALID_REGNUM)
+ return llvm::createStringError(
+ "unable to convert register kind=%u reg_num=%u to a native "
+ "register number",
+ reg_kind, reg_num);
+
+ const RegisterInfo *reg_info = reg_ctx->GetRegisterInfoAtIndex(native_reg);
+ RegisterValue reg_value;
+ if (reg_ctx->ReadRegister(reg_info, reg_value)) {
+ if (reg_value.GetScalarValue(value.GetScalar())) {
+ value.SetValueType(Value::ValueType::Scalar);
+ value.SetContext(Value::ContextType::RegisterInfo,
+ const_cast<RegisterInfo *>(reg_info));
+ return llvm::Error::success();
}
+
+ // If we get this error, then we need to implement a value buffer in
+ // the dwarf expression evaluation function...
+ return llvm::createStringError(
+ "register %s can't be converted to a scalar value", reg_info->name);
}
- return false;
+
+ return llvm::createStringError("register %s is not available",
+ reg_info->name);
}
/// Return the length in bytes of the set of operands for \p op. No guarantees
@@ -782,7 +769,6 @@ void UpdateValueTypeFromLocationDescription(Log *log, const DWARFUnit *dwarf_cu,
///
/// \param exe_ctx Pointer to the execution context
/// \param module_sp shared_ptr contains the module if we have one
-/// \param error_ptr pointer to Status object if we have one
/// \param dw_op_type C-style string used to vary the error output
/// \param file_addr the file address we are trying to resolve and turn into a
/// load address
@@ -793,32 +779,22 @@ void UpdateValueTypeFromLocationDescription(Log *log, const DWARFUnit *dwarf_cu,
/// the load address succeed or an empty Optinal otherwise. If
/// check_sectionoffset is true we consider LLDB_INVALID_ADDRESS a
/// success if so_addr.IsSectionOffset() is true.
-static std::optional<lldb::addr_t>
+static llvm::Expected<lldb::addr_t>
ResolveLoadAddress(ExecutionContext *exe_ctx, lldb::ModuleSP &module_sp,
- Status *error_ptr, const char *dw_op_type,
- lldb::addr_t file_addr, Address &so_addr,
- bool check_sectionoffset = false) {
- if (!module_sp) {
- if (error_ptr)
- error_ptr->SetErrorStringWithFormat(
- "need module to resolve file address for %s", dw_op_type);
- return {};
- }
+ const char *dw_op_type, lldb::addr_t file_addr,
+ Address &so_addr, bool check_sectionoffset = false) {
+ if (!module_sp)
+ return llvm::createStringError("need module to resolve file address for %s",
+ dw_op_type);
- if (!module_sp->ResolveFileAddress(file_addr, so_addr)) {
- if (error_ptr)
- error_ptr->SetErrorString("failed to resolve file address in module");
- return {};
- }
+ if (!module_sp->ResolveFileAddress(file_addr, so_addr))
+ return llvm::createStringError("failed to resolve file address in module");
- addr_t load_addr = so_addr.GetLoadAddress(exe_ctx->GetTargetPtr());
+ const addr_t load_addr = so_addr.GetLoadAddress(exe_ctx->GetTargetPtr());
if (load_addr == LLDB_INVALID_ADDRESS &&
- (check_sectionoffset && !so_addr.IsSectionOffset())) {
- if (error_ptr)
- error_ptr->SetErrorString("failed to resolve load address");
- return {};
- }
+ (check_sectionoffset && !so_addr.IsSectionOffset()))
+ return llvm::createStringError("failed to resolve load address");
return load_addr;
}
@@ -988,12 +964,11 @@ llvm::Expected<Value> DWARFExpression::Evaluate(
LLDB_INVALID_ADDRESS);
Address so_addr;
- Status load_err;
auto maybe_load_addr = ResolveLoadAddress(
- exe_ctx, module_sp, &load_err, "DW_OP_deref", file_addr, so_addr);
+ exe_ctx, module_sp, "DW_OP_deref", file_addr, so_addr);
if (!maybe_load_addr)
- return load_err.ToError();
+ return maybe_load_addr.takeError();
stack.back().GetScalar() = *maybe_load_addr;
// Fall through to load address promotion code below.
@@ -1105,14 +1080,12 @@ llvm::Expected<Value> DWARFExpression::Evaluate(
auto file_addr =
stack.back().GetScalar().ULongLong(LLDB_INVALID_ADDRESS);
Address so_addr;
- Status resolve_err;
- auto maybe_load_addr =
- ResolveLoadAddress(exe_ctx, module_sp, &resolve_err,
- "DW_OP_deref_size", file_addr, so_addr,
- /*check_sectionoffset=*/true);
+ auto maybe_load_addr = ResolveLoadAddress(
+ exe_ctx, module_sp, "DW_OP_deref_size", file_addr, so_addr,
+ /*check_sectionoffset=*/true);
if (!maybe_load_addr)
- return resolve_err.ToError();
+ return maybe_load_addr.takeError();
addr_t load_addr = *maybe_load_addr;
@@ -1832,11 +1805,10 @@ llvm::Expected<Value> DWARFExpression::Evaluate(
dwarf4_location_description_kind = Register;
reg_num = op - DW_OP_reg0;
- Status read_err;
- if (ReadRegisterValueAsScalar(reg_ctx, reg_kind, reg_num, &read_err, tmp))
- stack.push_back(tmp);
- else
- return read_err.ToError();
+ if (llvm::Error err =
+ ReadRegisterValueAsScalar(reg_ctx, reg_kind, reg_num, tmp))
+ return err;
+ stack.push_back(tmp);
} break;
// OPCODE: DW_OP_regx
// OPERANDS:
@@ -1846,10 +1818,10 @@ llvm::Expected<Value> DWARFExpression::Evaluate(
dwarf4_location_description_kind = Register;
reg_num = opcodes.GetULEB128(&offset);
Status read_err;
- if (ReadRegisterValueAsScalar(reg_ctx, reg_kind, reg_num, &read_err, tmp))
- stack.push_back(tmp);
- else
- return read_err.ToError();
+ if (llvm::Error err =
+ ReadRegisterValueAsScalar(reg_ctx, reg_kind, reg_num, tmp))
+ return err;
+ stack.push_back(tmp);
} break;
// OPCODE: DW_OP_bregN
@@ -1890,17 +1862,15 @@ llvm::Expected<Value> DWARFExpression::Evaluate(
case DW_OP_breg30:
case DW_OP_breg31: {
reg_num = op - DW_OP_breg0;
-
- Status read_err;
- if (ReadRegisterValueAsScalar(reg_ctx, reg_kind, reg_num, &read_err,
- tmp)) {
- int64_t breg_offset = opcodes.GetSLEB128(&offset);
- tmp.ResolveValue(exe_ctx) += (uint64_t)breg_offset;
- tmp.ClearContext();
- stack.push_back(tmp);
- stack.back().SetValueType(Value::ValueType::LoadAddress);
- } else
- return read_err.ToError();
+ if (llvm::Error err =
+ ReadRegisterValueAsScalar(reg_ctx, reg_kind, reg_num, tmp))
+ return err;
+
+ int64_t breg_offset = opcodes.GetSLEB128(&offset);
+ tmp.ResolveValue(exe_ctx) += (uint64_t)breg_offset;
+ tmp.ClearContext();
+ stack.push_back(tmp);
+ stack.back().SetValueType(Value::ValueType::LoadAddress);
} break;
// OPCODE: DW_OP_bregx
// OPERANDS: 2
@@ -1910,17 +1880,15 @@ llvm::Expected<Value> DWARFExpression::Evaluate(
// N plus an offset.
case DW_OP_bregx: {
reg_num = opcodes.GetULEB128(&offset);
-
- Status read_err;
- if (ReadRegisterValueAsScalar(reg_ctx, reg_kind, reg_num, &read_err,
- tmp)) {
- int64_t breg_offset = opcodes.GetSLEB128(&offset);
- tmp.ResolveValue(exe_ctx) += (uint64_t)breg_offset;
- tmp.ClearContext();
- stack.push_back(tmp);
- stack.back().SetValueType(Value::ValueType::LoadAddress);
- } else
- return read_err.ToError();
+ if (llvm::Error err =
+ ReadRegisterValueAsScalar(reg_ctx, reg_kind, reg_num, tmp))
+ return err;
+
+ int64_t breg_offset = opcodes.GetSLEB128(&offset);
+ tmp.ResolveValue(exe_ctx) += (uint64_t)breg_offset;
+ tmp.ClearContext();
+ stack.push_back(tmp);
+ stack.back().SetValueType(Value::ValueType::LoadAddress);
} break;
case DW_OP_fbreg:
diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParser.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParser.h
index e144cf0f9bd9..66db396279e0 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParser.h
+++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParser.h
@@ -60,8 +60,6 @@ public:
virtual ConstString GetDIEClassTemplateParams(const DWARFDIE &die) = 0;
- virtual lldb_private::Type *FindDefinitionTypeForDIE(const DWARFDIE &die) = 0;
-
static std::optional<SymbolFile::ArrayInfo>
ParseChildArrayInfo(const DWARFDIE &parent_die,
const ExecutionContext *exe_ctx = nullptr);
diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp
index 7d7e835c3d73..579a538af363 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp
+++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp
@@ -154,26 +154,6 @@ static bool TagIsRecordType(dw_tag_t tag) {
}
}
-static bool IsForwardDeclaration(const DWARFDIE &die,
- const ParsedDWARFTypeAttributes &attrs,
- LanguageType cu_language) {
- if (attrs.is_forward_declaration)
- return true;
-
- // Work around an issue with clang at the moment where forward
- // declarations for objective C classes are emitted as:
- // DW_TAG_structure_type [2]
- // DW_AT_name( "ForwardObjcClass" )
- // DW_AT_byte_size( 0x00 )
- // DW_AT_decl_file( "..." )
- // DW_AT_decl_line( 1 )
- //
- // Note that there is no DW_AT_declaration and there are no children,
- // and the byte size is zero.
- return attrs.byte_size && *attrs.byte_size == 0 && attrs.name &&
- !die.HasChildren() && cu_language == eLanguageTypeObjC;
-}
-
TypeSP DWARFASTParserClang::ParseTypeFromClangModule(const SymbolContext &sc,
const DWARFDIE &die,
Log *log) {
@@ -269,9 +249,11 @@ static void ForcefullyCompleteType(CompilerType type) {
/// This function serves a similar purpose as RequireCompleteType above, but it
/// avoids completing the type if it is not immediately necessary. It only
/// ensures we _can_ complete the type later.
-void DWARFASTParserClang::PrepareContextToReceiveMembers(
- clang::DeclContext *decl_ctx, const DWARFDIE &decl_ctx_die,
- const DWARFDIE &die, const char *type_name_cstr) {
+static void PrepareContextToReceiveMembers(TypeSystemClang &ast,
+ ClangASTImporter &ast_importer,
+ clang::DeclContext *decl_ctx,
+ DWARFDIE die,
+ const char *type_name_cstr) {
auto *tag_decl_ctx = clang::dyn_cast<clang::TagDecl>(decl_ctx);
if (!tag_decl_ctx)
return; // Non-tag context are always ready.
@@ -286,8 +268,7 @@ void DWARFASTParserClang::PrepareContextToReceiveMembers(
// gmodules case), we can complete the type by doing a full import.
// If this type was not imported from an external AST, there's nothing to do.
- CompilerType type = m_ast.GetTypeForDecl(tag_decl_ctx);
- ClangASTImporter &ast_importer = GetClangASTImporter();
+ CompilerType type = ast.GetTypeForDecl(tag_decl_ctx);
if (type && ast_importer.CanImport(type)) {
auto qual_type = ClangUtil::GetQualType(type);
if (ast_importer.RequireCompleteType(qual_type))
@@ -298,13 +279,6 @@ void DWARFASTParserClang::PrepareContextToReceiveMembers(
type_name_cstr ? type_name_cstr : "", die.GetOffset());
}
- // By searching for the definition DIE of the decl_ctx type, we will either:
- // 1. Found the the definition DIE and start its definition with
- // TypeSystemClang::StartTagDeclarationDefinition.
- // 2. Unable to find it, then need to forcefully complete it.
- FindDefinitionTypeForDIE(decl_ctx_die);
- if (tag_decl_ctx->isCompleteDefinition() || tag_decl_ctx->isBeingDefined())
- return;
// We don't have a type definition and/or the import failed. We must
// forcefully complete the type to avoid crashes.
ForcefullyCompleteType(type);
@@ -650,11 +624,10 @@ DWARFASTParserClang::ParseTypeModifier(const SymbolContext &sc,
if (tag == DW_TAG_typedef) {
// DeclContext will be populated when the clang type is materialized in
// Type::ResolveCompilerType.
- DWARFDIE decl_ctx_die;
- clang::DeclContext *decl_ctx =
- GetClangDeclContextContainingDIE(die, &decl_ctx_die);
- PrepareContextToReceiveMembers(decl_ctx, decl_ctx_die, die,
- attrs.name.GetCString());
+ PrepareContextToReceiveMembers(
+ m_ast, GetClangASTImporter(),
+ GetClangDeclContextContainingDIE(die, nullptr), die,
+ attrs.name.GetCString());
if (attrs.type.IsValid()) {
// Try to parse a typedef from the (DWARF embedded in the) Clang
@@ -1134,6 +1107,32 @@ DWARFASTParserClang::ParseSubroutine(const DWARFDIE &die,
// struct and see if this is actually a C++ method
Type *class_type = dwarf->ResolveType(decl_ctx_die);
if (class_type) {
+ if (class_type->GetID() != decl_ctx_die.GetID() ||
+ IsClangModuleFwdDecl(decl_ctx_die)) {
+
+ // We uniqued the parent class of this function to another
+ // class so we now need to associate all dies under
+ // "decl_ctx_die" to DIEs in the DIE for "class_type"...
+ DWARFDIE class_type_die = dwarf->GetDIE(class_type->GetID());
+
+ if (class_type_die) {
+ std::vector<DWARFDIE> failures;
+
+ CopyUniqueClassMethodTypes(decl_ctx_die, class_type_die,
+ class_type, failures);
+
+ // FIXME do something with these failures that's
+ // smarter than just dropping them on the ground.
+ // Unfortunately classes don't like having stuff added
+ // to them after their definitions are complete...
+
+ Type *type_ptr = dwarf->GetDIEToType()[die.GetDIE()];
+ if (type_ptr && type_ptr != DIE_IS_BEING_PARSED) {
+ return type_ptr->shared_from_this();
+ }
+ }
+ }
+
if (attrs.specification.IsValid()) {
// We have a specification which we are going to base our
// function prototype off of, so we need this type to be
@@ -1268,39 +1267,6 @@ DWARFASTParserClang::ParseSubroutine(const DWARFDIE &die,
}
}
}
- // By here, we should have already completed the c++ class_type
- // because if either specification or abstract_origin is present, we
- // call GetClangDeclContextForDIE to resolve the DW_TAG_subprogram
- // refered by this one until we reached the DW_TAG_subprogram without
- // specification or abstract_origin (the else branch above). Then the
- // above GetFullCompilerType() will complete the class_type if it's
- // not completed yet. After that, we will have the mapping from DIEs
- // in class_type_die to DeclContexts in m_die_to_decl_ctx.
- if (class_type->GetID() != decl_ctx_die.GetID() ||
- IsClangModuleFwdDecl(decl_ctx_die)) {
-
- // We uniqued the parent class of this function to another
- // class so we now need to associate all dies under
- // "decl_ctx_die" to DIEs in the DIE for "class_type"...
- DWARFDIE class_type_die = dwarf->GetDIE(class_type->GetID());
-
- if (class_type_die) {
- std::vector<DWARFDIE> failures;
-
- CopyUniqueClassMethodTypes(decl_ctx_die, class_type_die,
- class_type, failures);
-
- // FIXME do something with these failures that's
- // smarter than just dropping them on the ground.
- // Unfortunately classes don't like having stuff added
- // to them after their definitions are complete...
-
- Type *type_ptr = dwarf->GetDIEToType()[die.GetDIE()];
- if (type_ptr && type_ptr != DIE_IS_BEING_PARSED) {
- return type_ptr->shared_from_this();
- }
- }
- }
}
}
}
@@ -1673,93 +1639,6 @@ DWARFASTParserClang::GetCPlusPlusQualifiedName(const DWARFDIE &die) {
return qualified_name;
}
-lldb_private::Type *
-DWARFASTParserClang::FindDefinitionTypeForDIE(const DWARFDIE &die) {
- SymbolFileDWARF *dwarf = die.GetDWARF();
- ParsedDWARFTypeAttributes attrs(die);
- bool is_forward_declaration = IsForwardDeclaration(
- die, attrs, SymbolFileDWARF::GetLanguage(*die.GetCU()));
- if (!is_forward_declaration)
- return dwarf->GetDIEToType()[die.GetDIE()];
-
- const dw_tag_t tag = die.Tag();
- TypeSP type_sp;
- Log *log = GetLog(DWARFLog::TypeCompletion | DWARFLog::Lookups);
- if (log) {
- dwarf->GetObjectFile()->GetModule()->LogMessage(
- log,
- "SymbolFileDWARF({0:p}) - {1:x16}: {2} type \"{3}\" is a "
- "forward declaration DIE, trying to find definition DIE",
- static_cast<void *>(this), die.GetOffset(), DW_TAG_value_to_name(tag),
- attrs.name.GetCString());
- }
- // We haven't parse definition die for this type, starting to search for it.
- // After we found the definition die, the GetDeclarationDIEToDefinitionDIE()
- // map will have the new mapping from this declaration die to definition die.
- if (attrs.class_language == eLanguageTypeObjC ||
- attrs.class_language == eLanguageTypeObjC_plus_plus) {
- if (!attrs.is_complete_objc_class &&
- die.Supports_DW_AT_APPLE_objc_complete_type()) {
- // We have a valid eSymbolTypeObjCClass class symbol whose name
- // matches the current objective C class that we are trying to find
- // and this DIE isn't the complete definition (we checked
- // is_complete_objc_class above and know it is false), so the real
- // definition is in here somewhere
- type_sp =
- dwarf->FindCompleteObjCDefinitionTypeForDIE(die, attrs.name, true);
-
- if (!type_sp) {
- SymbolFileDWARFDebugMap *debug_map_symfile =
- dwarf->GetDebugMapSymfile();
- if (debug_map_symfile) {
- // We weren't able to find a full declaration in this DWARF,
- // see if we have a declaration anywhere else...
- type_sp = debug_map_symfile->FindCompleteObjCDefinitionTypeForDIE(
- die, attrs.name, true);
- }
- }
-
- if (type_sp && log) {
- dwarf->GetObjectFile()->GetModule()->LogMessage(
- log,
- "SymbolFileDWARF({0:p}) - {1:x16}: {2} ({3}) type \"{4}\" is an "
- "incomplete objc type, complete type is {5:x8}",
- static_cast<void *>(this), die.GetOffset(),
- DW_TAG_value_to_name(tag), tag, attrs.name.GetCString(),
- type_sp->GetID());
- }
- }
- }
-
- type_sp = dwarf->FindDefinitionTypeForDWARFDeclContext(die);
- if (!type_sp) {
- SymbolFileDWARFDebugMap *debug_map_symfile = dwarf->GetDebugMapSymfile();
- if (debug_map_symfile) {
- // We weren't able to find a full declaration in this DWARF, see
- // if we have a declaration anywhere else...
- type_sp = debug_map_symfile->FindDefinitionTypeForDWARFDeclContext(die);
- }
- if (type_sp && log) {
- dwarf->GetObjectFile()->GetModule()->LogMessage(
- log,
- "SymbolFileDWARF({0:p}) - {1:x16}: {2} type \"{3}\" is a "
- "forward declaration, complete type is {4:x8}",
- static_cast<void *>(this), die.GetOffset(), DW_TAG_value_to_name(tag),
- attrs.name.GetCString(), type_sp->GetID());
- }
- }
-
- if (!type_sp && log) {
- dwarf->GetObjectFile()->GetModule()->LogMessage(
- log,
- "SymbolFileDWARF({0:p}) - {1:x16}: {2} type \"{3}\" is a "
- "forward declaration, unable to find definition DIE for it",
- static_cast<void *>(this), die.GetOffset(), DW_TAG_value_to_name(tag),
- attrs.name.GetCString());
- }
- return type_sp.get();
-}
-
TypeSP
DWARFASTParserClang::ParseStructureLikeDIE(const SymbolContext &sc,
const DWARFDIE &die,
@@ -1771,10 +1650,14 @@ DWARFASTParserClang::ParseStructureLikeDIE(const SymbolContext &sc,
LanguageType cu_language = SymbolFileDWARF::GetLanguage(*die.GetCU());
Log *log = GetLog(DWARFLog::TypeCompletion | DWARFLog::Lookups);
+ // UniqueDWARFASTType is large, so don't create a local variables on the
+ // stack, put it on the heap. This function is often called recursively and
+ // clang isn't good at sharing the stack space for variables in different
+ // blocks.
+ auto unique_ast_entry_up = std::make_unique<UniqueDWARFASTType>();
+
ConstString unique_typename(attrs.name);
Declaration unique_decl(attrs.decl);
- uint64_t byte_size = attrs.byte_size.value_or(0);
- attrs.is_forward_declaration = IsForwardDeclaration(die, attrs, cu_language);
if (attrs.name) {
if (Language::LanguageIsCPlusPlus(cu_language)) {
@@ -1787,42 +1670,14 @@ DWARFASTParserClang::ParseStructureLikeDIE(const SymbolContext &sc,
unique_decl.Clear();
}
- if (UniqueDWARFASTType *unique_ast_entry_type =
- dwarf->GetUniqueDWARFASTTypeMap().Find(
- unique_typename, die, unique_decl, byte_size,
- attrs.is_forward_declaration)) {
- type_sp = unique_ast_entry_type->m_type_sp;
+ if (dwarf->GetUniqueDWARFASTTypeMap().Find(
+ unique_typename, die, unique_decl, attrs.byte_size.value_or(-1),
+ *unique_ast_entry_up)) {
+ type_sp = unique_ast_entry_up->m_type_sp;
if (type_sp) {
dwarf->GetDIEToType()[die.GetDIE()] = type_sp.get();
LinkDeclContextToDIE(
- GetCachedClangDeclContextForDIE(unique_ast_entry_type->m_die), die);
- if (!attrs.is_forward_declaration) {
- // If the DIE being parsed in this function is a definition and the
- // entry in the map is a declaration, then we need to update the entry
- // to point to the definition DIE.
- if (unique_ast_entry_type->m_is_forward_declaration) {
- unique_ast_entry_type->m_die = die;
- unique_ast_entry_type->m_byte_size = byte_size;
- unique_ast_entry_type->m_declaration = unique_decl;
- unique_ast_entry_type->m_is_forward_declaration = false;
- // Need to update Type ID to refer to the definition DIE. because
- // it's used in ParseSubroutine to determine if we need to copy cxx
- // method types from a declaration DIE to this definition DIE.
- type_sp->SetID(die.GetID());
- clang_type = type_sp->GetForwardCompilerType();
- if (attrs.class_language != eLanguageTypeObjC &&
- attrs.class_language != eLanguageTypeObjC_plus_plus)
- TypeSystemClang::StartTagDeclarationDefinition(clang_type);
-
- CompilerType compiler_type_no_qualifiers =
- ClangUtil::RemoveFastQualifiers(clang_type);
- auto result = dwarf->GetForwardDeclCompilerTypeToDIE().try_emplace(
- compiler_type_no_qualifiers.GetOpaqueQualType(),
- *die.GetDIERef());
- if (!result.second)
- result.first->second = *die.GetDIERef();
- }
- }
+ GetCachedClangDeclContextForDIE(unique_ast_entry_up->m_die), die);
return type_sp;
}
}
@@ -1844,21 +1699,125 @@ DWARFASTParserClang::ParseStructureLikeDIE(const SymbolContext &sc,
default_accessibility = eAccessPrivate;
}
+ if (attrs.byte_size && *attrs.byte_size == 0 && attrs.name &&
+ !die.HasChildren() && cu_language == eLanguageTypeObjC) {
+ // Work around an issue with clang at the moment where forward
+ // declarations for objective C classes are emitted as:
+ // DW_TAG_structure_type [2]
+ // DW_AT_name( "ForwardObjcClass" )
+ // DW_AT_byte_size( 0x00 )
+ // DW_AT_decl_file( "..." )
+ // DW_AT_decl_line( 1 )
+ //
+ // Note that there is no DW_AT_declaration and there are no children,
+ // and the byte size is zero.
+ attrs.is_forward_declaration = true;
+ }
+
+ if (attrs.class_language == eLanguageTypeObjC ||
+ attrs.class_language == eLanguageTypeObjC_plus_plus) {
+ if (!attrs.is_complete_objc_class &&
+ die.Supports_DW_AT_APPLE_objc_complete_type()) {
+ // We have a valid eSymbolTypeObjCClass class symbol whose name
+ // matches the current objective C class that we are trying to find
+ // and this DIE isn't the complete definition (we checked
+ // is_complete_objc_class above and know it is false), so the real
+ // definition is in here somewhere
+ type_sp =
+ dwarf->FindCompleteObjCDefinitionTypeForDIE(die, attrs.name, true);
+
+ if (!type_sp) {
+ SymbolFileDWARFDebugMap *debug_map_symfile =
+ dwarf->GetDebugMapSymfile();
+ if (debug_map_symfile) {
+ // We weren't able to find a full declaration in this DWARF,
+ // see if we have a declaration anywhere else...
+ type_sp = debug_map_symfile->FindCompleteObjCDefinitionTypeForDIE(
+ die, attrs.name, true);
+ }
+ }
+
+ if (type_sp) {
+ if (log) {
+ dwarf->GetObjectFile()->GetModule()->LogMessage(
+ log,
+ "SymbolFileDWARF({0:p}) - {1:x16}: {2} ({3}) type \"{4}\" is an "
+ "incomplete objc type, complete type is {5:x8}",
+ static_cast<void *>(this), die.GetOffset(),
+ DW_TAG_value_to_name(tag), tag, attrs.name.GetCString(),
+ type_sp->GetID());
+ }
+
+ // We found a real definition for this type elsewhere so lets use
+ // it and cache the fact that we found a complete type for this
+ // die
+ dwarf->GetDIEToType()[die.GetDIE()] = type_sp.get();
+ return type_sp;
+ }
+ }
+ }
+
if (attrs.is_forward_declaration) {
+ // We have a forward declaration to a type and we need to try and
+ // find a full declaration. We look in the current type index just in
+ // case we have a forward declaration followed by an actual
+ // declarations in the DWARF. If this fails, we need to look
+ // elsewhere...
+ if (log) {
+ dwarf->GetObjectFile()->GetModule()->LogMessage(
+ log,
+ "SymbolFileDWARF({0:p}) - {1:x16}: {2} ({3}) type \"{4}\" is a "
+ "forward declaration, trying to find complete type",
+ static_cast<void *>(this), die.GetOffset(), DW_TAG_value_to_name(tag),
+ tag, attrs.name.GetCString());
+ }
+
// See if the type comes from a Clang module and if so, track down
// that type.
type_sp = ParseTypeFromClangModule(sc, die, log);
if (type_sp)
return type_sp;
- }
+ // type_sp = FindDefinitionTypeForDIE (dwarf_cu, die,
+ // type_name_const_str);
+ type_sp = dwarf->FindDefinitionTypeForDWARFDeclContext(die);
+
+ if (!type_sp) {
+ SymbolFileDWARFDebugMap *debug_map_symfile = dwarf->GetDebugMapSymfile();
+ if (debug_map_symfile) {
+ // We weren't able to find a full declaration in this DWARF, see
+ // if we have a declaration anywhere else...
+ type_sp = debug_map_symfile->FindDefinitionTypeForDWARFDeclContext(die);
+ }
+ }
+
+ if (type_sp) {
+ if (log) {
+ dwarf->GetObjectFile()->GetModule()->LogMessage(
+ log,
+ "SymbolFileDWARF({0:p}) - {1:x16}: {2} ({3}) type \"{4}\" is a "
+ "forward declaration, complete type is {5:x8}",
+ static_cast<void *>(this), die.GetOffset(),
+ DW_TAG_value_to_name(tag), tag, attrs.name.GetCString(),
+ type_sp->GetID());
+ }
+
+ // We found a real definition for this type elsewhere so lets use
+ // it and cache the fact that we found a complete type for this die
+ dwarf->GetDIEToType()[die.GetDIE()] = type_sp.get();
+ clang::DeclContext *defn_decl_ctx =
+ GetCachedClangDeclContextForDIE(dwarf->GetDIE(type_sp->GetID()));
+ if (defn_decl_ctx)
+ LinkDeclContextToDIE(defn_decl_ctx, die);
+ return type_sp;
+ }
+ }
assert(tag_decl_kind != -1);
UNUSED_IF_ASSERT_DISABLED(tag_decl_kind);
- DWARFDIE decl_ctx_die;
- clang::DeclContext *decl_ctx =
- GetClangDeclContextContainingDIE(die, &decl_ctx_die);
+ bool clang_type_was_created = false;
+ clang::DeclContext *decl_ctx = GetClangDeclContextContainingDIE(die, nullptr);
- PrepareContextToReceiveMembers(decl_ctx, decl_ctx_die, die,
+ PrepareContextToReceiveMembers(m_ast, GetClangASTImporter(), decl_ctx, die,
attrs.name.GetCString());
if (attrs.accessibility == eAccessNone && decl_ctx) {
@@ -1897,17 +1856,20 @@ DWARFASTParserClang::ParseStructureLikeDIE(const SymbolContext &sc,
tag_decl_kind, template_param_infos);
clang_type =
m_ast.CreateClassTemplateSpecializationType(class_specialization_decl);
+ clang_type_was_created = true;
m_ast.SetMetadata(class_template_decl, metadata);
m_ast.SetMetadata(class_specialization_decl, metadata);
}
- if (!clang_type) {
+ if (!clang_type_was_created) {
+ clang_type_was_created = true;
clang_type = m_ast.CreateRecordType(
decl_ctx, GetOwningClangModule(die), attrs.accessibility,
attrs.name.GetCString(), tag_decl_kind, attrs.class_language, &metadata,
attrs.exports_symbols);
}
+
// Store a forward declaration to this class type in case any
// parameters in any class methods need it for the clang types for
// function prototypes.
@@ -1918,19 +1880,13 @@ DWARFASTParserClang::ParseStructureLikeDIE(const SymbolContext &sc,
Type::ResolveState::Forward,
TypePayloadClang(OptionalClangModuleID(), attrs.is_complete_objc_class));
- // UniqueDWARFASTType is large, so don't create a local variables on the
- // stack, put it on the heap. This function is often called recursively and
- // clang isn't good at sharing the stack space for variables in different
- // blocks.
- auto unique_ast_entry_up = std::make_unique<UniqueDWARFASTType>();
// Add our type to the unique type map so we don't end up creating many
// copies of the same type over and over in the ASTContext for our
// module
unique_ast_entry_up->m_type_sp = type_sp;
unique_ast_entry_up->m_die = die;
unique_ast_entry_up->m_declaration = unique_decl;
- unique_ast_entry_up->m_byte_size = byte_size;
- unique_ast_entry_up->m_is_forward_declaration = attrs.is_forward_declaration;
+ unique_ast_entry_up->m_byte_size = attrs.byte_size.value_or(0);
dwarf->GetUniqueDWARFASTTypeMap().Insert(unique_typename,
*unique_ast_entry_up);
@@ -1971,7 +1927,7 @@ DWARFASTParserClang::ParseStructureLikeDIE(const SymbolContext &sc,
GetClangASTImporter().SetRecordLayout(record_decl, layout);
}
}
- } else {
+ } else if (clang_type_was_created) {
// Start the definition if the class is not objective C since the
// underlying decls respond to isCompleteDefinition(). Objective
// C decls don't respond to isCompleteDefinition() so we can't
@@ -1983,21 +1939,26 @@ DWARFASTParserClang::ParseStructureLikeDIE(const SymbolContext &sc,
if (attrs.class_language != eLanguageTypeObjC &&
attrs.class_language != eLanguageTypeObjC_plus_plus)
TypeSystemClang::StartTagDeclarationDefinition(clang_type);
+
+ // Leave this as a forward declaration until we need to know the
+ // details of the type. lldb_private::Type will automatically call
+ // the SymbolFile virtual function
+ // "SymbolFileDWARF::CompleteType(Type *)" When the definition
+ // needs to be defined.
+ assert(!dwarf->GetForwardDeclCompilerTypeToDIE().count(
+ ClangUtil::RemoveFastQualifiers(clang_type)
+ .GetOpaqueQualType()) &&
+ "Type already in the forward declaration map!");
+ // Can't assume m_ast.GetSymbolFile() is actually a
+ // SymbolFileDWARF, it can be a SymbolFileDWARFDebugMap for Apple
+ // binaries.
+ dwarf->GetForwardDeclCompilerTypeToDIE().try_emplace(
+ ClangUtil::RemoveFastQualifiers(clang_type).GetOpaqueQualType(),
+ *die.GetDIERef());
+ m_ast.SetHasExternalStorage(clang_type.GetOpaqueQualType(), true);
}
}
- // If this is a declaration DIE, leave this as a forward declaration until we
- // need to know the details of the type. lldb_private::Type will automatically
- // call the SymbolFile virtual function "SymbolFileDWARF::CompleteType(Type
- // *)" When the definition needs to be defined.
- assert(!dwarf->GetForwardDeclCompilerTypeToDIE().count(
- ClangUtil::RemoveFastQualifiers(clang_type).GetOpaqueQualType()) &&
- "Type already in the forward declaration map!");
- dwarf->GetForwardDeclCompilerTypeToDIE().try_emplace(
- ClangUtil::RemoveFastQualifiers(clang_type).GetOpaqueQualType(),
- *die.GetDIERef());
- m_ast.SetHasExternalStorage(clang_type.GetOpaqueQualType(), true);
-
// If we made a clang type, set the trivial abi if applicable: We only
// do this for pass by value - which implies the Trivial ABI. There
// isn't a way to assert that something that would normally be pass by
@@ -2236,10 +2197,6 @@ bool DWARFASTParserClang::CompleteRecordType(const DWARFDIE &die,
// For objective C we don't start the definition when the class is
// created.
TypeSystemClang::StartTagDeclarationDefinition(clang_type);
- } else {
- assert(clang_type.IsBeingDefined() &&
- "Trying to complete a definition without a prior call to "
- "StartTagDeclarationDefinition.");
}
AccessType default_accessibility = eAccessNone;
diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.h
index 853b8ccc3036..8d4af203bb28 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.h
+++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.h
@@ -42,40 +42,40 @@ struct ParsedDWARFTypeAttributes;
class DWARFASTParserClang : public lldb_private::plugin::dwarf::DWARFASTParser {
public:
- typedef lldb_private::plugin::dwarf::DWARFDIE DWARFDIE;
-
DWARFASTParserClang(lldb_private::TypeSystemClang &ast);
~DWARFASTParserClang() override;
// DWARFASTParser interface.
- lldb::TypeSP ParseTypeFromDWARF(const lldb_private::SymbolContext &sc,
- const DWARFDIE &die,
- bool *type_is_new_ptr) override;
+ lldb::TypeSP
+ ParseTypeFromDWARF(const lldb_private::SymbolContext &sc,
+ const lldb_private::plugin::dwarf::DWARFDIE &die,
+ bool *type_is_new_ptr) override;
- lldb_private::ConstString
- ConstructDemangledNameFromDWARF(const DWARFDIE &die) override;
+ lldb_private::ConstString ConstructDemangledNameFromDWARF(
+ const lldb_private::plugin::dwarf::DWARFDIE &die) override;
lldb_private::Function *
ParseFunctionFromDWARF(lldb_private::CompileUnit &comp_unit,
- const DWARFDIE &die,
+ const lldb_private::plugin::dwarf::DWARFDIE &die,
const lldb_private::AddressRange &func_range) override;
bool
- CompleteTypeFromDWARF(const DWARFDIE &die, lldb_private::Type *type,
+ CompleteTypeFromDWARF(const lldb_private::plugin::dwarf::DWARFDIE &die,
+ lldb_private::Type *type,
lldb_private::CompilerType &compiler_type) override;
- lldb_private::CompilerDecl
- GetDeclForUIDFromDWARF(const DWARFDIE &die) override;
+ lldb_private::CompilerDecl GetDeclForUIDFromDWARF(
+ const lldb_private::plugin::dwarf::DWARFDIE &die) override;
void EnsureAllDIEsInDeclContextHaveBeenParsed(
lldb_private::CompilerDeclContext decl_context) override;
- lldb_private::CompilerDeclContext
- GetDeclContextForUIDFromDWARF(const DWARFDIE &die) override;
+ lldb_private::CompilerDeclContext GetDeclContextForUIDFromDWARF(
+ const lldb_private::plugin::dwarf::DWARFDIE &die) override;
- lldb_private::CompilerDeclContext
- GetDeclContextContainingUIDFromDWARF(const DWARFDIE &die) override;
+ lldb_private::CompilerDeclContext GetDeclContextContainingUIDFromDWARF(
+ const lldb_private::plugin::dwarf::DWARFDIE &die) override;
lldb_private::ClangASTImporter &GetClangASTImporter();
@@ -105,13 +105,8 @@ public:
/// \return A string, including surrounding '<>', of the template parameters.
/// If the DIE's name already has '<>', returns an empty ConstString because
/// it's assumed that the caller is using the DIE name anyway.
- lldb_private::ConstString
- GetDIEClassTemplateParams(const DWARFDIE &die) override;
-
- // Searching for definition DIE for the given DIE and return the type
- // associated with the definition DIE, or nullptr if definition DIE is not
- // found.
- lldb_private::Type *FindDefinitionTypeForDIE(const DWARFDIE &die) override;
+ lldb_private::ConstString GetDIEClassTemplateParams(
+ const lldb_private::plugin::dwarf::DWARFDIE &die) override;
protected:
/// Protected typedefs and members.
@@ -123,7 +118,8 @@ protected:
const lldb_private::plugin::dwarf::DWARFDebugInfoEntry *,
clang::DeclContext *>
DIEToDeclContextMap;
- typedef std::multimap<const clang::DeclContext *, const DWARFDIE>
+ typedef std::multimap<const clang::DeclContext *,
+ const lldb_private::plugin::dwarf::DWARFDIE>
DeclContextToDIEMap;
typedef llvm::DenseMap<
const lldb_private::plugin::dwarf::DWARFDebugInfoEntry *,
@@ -141,11 +137,14 @@ protected:
std::unique_ptr<lldb_private::ClangASTImporter> m_clang_ast_importer_up;
/// @}
- clang::DeclContext *GetDeclContextForBlock(const DWARFDIE &die);
+ clang::DeclContext *
+ GetDeclContextForBlock(const lldb_private::plugin::dwarf::DWARFDIE &die);
- clang::BlockDecl *ResolveBlockDIE(const DWARFDIE &die);
+ clang::BlockDecl *
+ ResolveBlockDIE(const lldb_private::plugin::dwarf::DWARFDIE &die);
- clang::NamespaceDecl *ResolveNamespaceDIE(const DWARFDIE &die);
+ clang::NamespaceDecl *
+ ResolveNamespaceDIE(const lldb_private::plugin::dwarf::DWARFDIE &die);
/// Returns the namespace decl that a DW_TAG_imported_declaration imports.
///
@@ -156,86 +155,96 @@ protected:
/// 'die' imports. If the imported entity is not a namespace
/// or another import declaration, returns nullptr. If an error
/// occurs, returns nullptr.
- clang::NamespaceDecl *ResolveImportedDeclarationDIE(const DWARFDIE &die);
+ clang::NamespaceDecl *ResolveImportedDeclarationDIE(
+ const lldb_private::plugin::dwarf::DWARFDIE &die);
- bool ParseTemplateDIE(const DWARFDIE &die,
+ bool ParseTemplateDIE(const lldb_private::plugin::dwarf::DWARFDIE &die,
lldb_private::TypeSystemClang::TemplateParameterInfos
&template_param_infos);
bool ParseTemplateParameterInfos(
- const DWARFDIE &parent_die,
+ const lldb_private::plugin::dwarf::DWARFDIE &parent_die,
lldb_private::TypeSystemClang::TemplateParameterInfos
&template_param_infos);
- std::string GetCPlusPlusQualifiedName(const DWARFDIE &die);
+ std::string
+ GetCPlusPlusQualifiedName(const lldb_private::plugin::dwarf::DWARFDIE &die);
bool ParseChildMembers(
- const DWARFDIE &die, lldb_private::CompilerType &class_compiler_type,
+ const lldb_private::plugin::dwarf::DWARFDIE &die,
+ lldb_private::CompilerType &class_compiler_type,
std::vector<std::unique_ptr<clang::CXXBaseSpecifier>> &base_classes,
- std::vector<DWARFDIE> &member_function_dies,
- std::vector<DWARFDIE> &contained_type_dies,
+ std::vector<lldb_private::plugin::dwarf::DWARFDIE> &member_function_dies,
+ std::vector<lldb_private::plugin::dwarf::DWARFDIE> &contained_type_dies,
DelayedPropertyList &delayed_properties,
const lldb::AccessType default_accessibility,
lldb_private::ClangASTImporter::LayoutInfo &layout_info);
size_t
ParseChildParameters(clang::DeclContext *containing_decl_ctx,
- const DWARFDIE &parent_die, bool skip_artificial,
- bool &is_static, bool &is_variadic,
+ const lldb_private::plugin::dwarf::DWARFDIE &parent_die,
+ bool skip_artificial, bool &is_static, bool &is_variadic,
bool &has_template_params,
std::vector<lldb_private::CompilerType> &function_args,
std::vector<clang::ParmVarDecl *> &function_param_decls,
unsigned &type_quals);
- size_t ParseChildEnumerators(lldb_private::CompilerType &compiler_type,
- bool is_signed, uint32_t enumerator_byte_size,
- const DWARFDIE &parent_die);
+ size_t ParseChildEnumerators(
+ lldb_private::CompilerType &compiler_type, bool is_signed,
+ uint32_t enumerator_byte_size,
+ const lldb_private::plugin::dwarf::DWARFDIE &parent_die);
/// Parse a structure, class, or union type DIE.
- lldb::TypeSP ParseStructureLikeDIE(const lldb_private::SymbolContext &sc,
- const DWARFDIE &die,
- ParsedDWARFTypeAttributes &attrs);
+ lldb::TypeSP
+ ParseStructureLikeDIE(const lldb_private::SymbolContext &sc,
+ const lldb_private::plugin::dwarf::DWARFDIE &die,
+ ParsedDWARFTypeAttributes &attrs);
- clang::Decl *GetClangDeclForDIE(const DWARFDIE &die);
+ clang::Decl *
+ GetClangDeclForDIE(const lldb_private::plugin::dwarf::DWARFDIE &die);
- clang::DeclContext *GetClangDeclContextForDIE(const DWARFDIE &die);
+ clang::DeclContext *
+ GetClangDeclContextForDIE(const lldb_private::plugin::dwarf::DWARFDIE &die);
- clang::DeclContext *GetClangDeclContextContainingDIE(const DWARFDIE &die,
- DWARFDIE *decl_ctx_die);
- lldb_private::OptionalClangModuleID GetOwningClangModule(const DWARFDIE &die);
+ clang::DeclContext *GetClangDeclContextContainingDIE(
+ const lldb_private::plugin::dwarf::DWARFDIE &die,
+ lldb_private::plugin::dwarf::DWARFDIE *decl_ctx_die);
+ lldb_private::OptionalClangModuleID
+ GetOwningClangModule(const lldb_private::plugin::dwarf::DWARFDIE &die);
- bool CopyUniqueClassMethodTypes(const DWARFDIE &src_class_die,
- const DWARFDIE &dst_class_die,
- lldb_private::Type *class_type,
- std::vector<DWARFDIE> &failures);
+ bool CopyUniqueClassMethodTypes(
+ const lldb_private::plugin::dwarf::DWARFDIE &src_class_die,
+ const lldb_private::plugin::dwarf::DWARFDIE &dst_class_die,
+ lldb_private::Type *class_type,
+ std::vector<lldb_private::plugin::dwarf::DWARFDIE> &failures);
- clang::DeclContext *GetCachedClangDeclContextForDIE(const DWARFDIE &die);
+ clang::DeclContext *GetCachedClangDeclContextForDIE(
+ const lldb_private::plugin::dwarf::DWARFDIE &die);
- void LinkDeclContextToDIE(clang::DeclContext *decl_ctx, const DWARFDIE &die);
+ void LinkDeclContextToDIE(clang::DeclContext *decl_ctx,
+ const lldb_private::plugin::dwarf::DWARFDIE &die);
- void LinkDeclToDIE(clang::Decl *decl, const DWARFDIE &die);
+ void LinkDeclToDIE(clang::Decl *decl,
+ const lldb_private::plugin::dwarf::DWARFDIE &die);
/// If \p type_sp is valid, calculate and set its symbol context scope, and
/// update the type list for its backing symbol file.
///
/// Returns \p type_sp.
- lldb::TypeSP
- UpdateSymbolContextScopeForType(const lldb_private::SymbolContext &sc,
- const DWARFDIE &die, lldb::TypeSP type_sp);
+ lldb::TypeSP UpdateSymbolContextScopeForType(
+ const lldb_private::SymbolContext &sc,
+ const lldb_private::plugin::dwarf::DWARFDIE &die, lldb::TypeSP type_sp);
/// Follow Clang Module Skeleton CU references to find a type definition.
- lldb::TypeSP ParseTypeFromClangModule(const lldb_private::SymbolContext &sc,
- const DWARFDIE &die,
- lldb_private::Log *log);
+ lldb::TypeSP
+ ParseTypeFromClangModule(const lldb_private::SymbolContext &sc,
+ const lldb_private::plugin::dwarf::DWARFDIE &die,
+ lldb_private::Log *log);
// Return true if this type is a declaration to a type in an external
// module.
- lldb::ModuleSP GetModuleForType(const DWARFDIE &die);
-
- void PrepareContextToReceiveMembers(clang::DeclContext *decl_ctx,
- const DWARFDIE &decl_ctx_die,
- const DWARFDIE &die,
- const char *type_name_cstr);
+ lldb::ModuleSP
+ GetModuleForType(const lldb_private::plugin::dwarf::DWARFDIE &die);
static bool classof(const DWARFASTParser *Parser) {
return Parser->GetKind() == Kind::DWARFASTParserClang;
@@ -265,8 +274,10 @@ private:
/// Parsed form of all attributes that are relevant for parsing type members.
struct MemberAttributes {
- explicit MemberAttributes(const DWARFDIE &die, const DWARFDIE &parent_die,
- lldb::ModuleSP module_sp);
+ explicit MemberAttributes(
+ const lldb_private::plugin::dwarf::DWARFDIE &die,
+ const lldb_private::plugin::dwarf::DWARFDIE &parent_die,
+ lldb::ModuleSP module_sp);
const char *name = nullptr;
/// Indicates how many bits into the word (according to the host endianness)
/// the low-order bit of the field starts. Can be negative.
@@ -313,12 +324,15 @@ private:
/// created property.
/// \param delayed_properties The list of delayed properties that the result
/// will be appended to.
- void ParseObjCProperty(const DWARFDIE &die, const DWARFDIE &parent_die,
- const lldb_private::CompilerType &class_clang_type,
- DelayedPropertyList &delayed_properties);
+ void
+ ParseObjCProperty(const lldb_private::plugin::dwarf::DWARFDIE &die,
+ const lldb_private::plugin::dwarf::DWARFDIE &parent_die,
+ const lldb_private::CompilerType &class_clang_type,
+ DelayedPropertyList &delayed_properties);
void
- ParseSingleMember(const DWARFDIE &die, const DWARFDIE &parent_die,
+ ParseSingleMember(const lldb_private::plugin::dwarf::DWARFDIE &die,
+ const lldb_private::plugin::dwarf::DWARFDIE &parent_die,
const lldb_private::CompilerType &class_clang_type,
lldb::AccessType default_accessibility,
lldb_private::ClangASTImporter::LayoutInfo &layout_info,
@@ -336,25 +350,31 @@ private:
/// \param[in] class_clang_type The parent RecordType of the static
/// member this function will create.
void CreateStaticMemberVariable(
- const DWARFDIE &die, const MemberAttributes &attrs,
+ const lldb_private::plugin::dwarf::DWARFDIE &die,
+ const MemberAttributes &attrs,
const lldb_private::CompilerType &class_clang_type);
- bool CompleteRecordType(const DWARFDIE &die, lldb_private::Type *type,
+ bool CompleteRecordType(const lldb_private::plugin::dwarf::DWARFDIE &die,
+ lldb_private::Type *type,
lldb_private::CompilerType &clang_type);
- bool CompleteEnumType(const DWARFDIE &die, lldb_private::Type *type,
+ bool CompleteEnumType(const lldb_private::plugin::dwarf::DWARFDIE &die,
+ lldb_private::Type *type,
lldb_private::CompilerType &clang_type);
- lldb::TypeSP ParseTypeModifier(const lldb_private::SymbolContext &sc,
- const DWARFDIE &die,
- ParsedDWARFTypeAttributes &attrs);
+ lldb::TypeSP
+ ParseTypeModifier(const lldb_private::SymbolContext &sc,
+ const lldb_private::plugin::dwarf::DWARFDIE &die,
+ ParsedDWARFTypeAttributes &attrs);
lldb::TypeSP ParseEnum(const lldb_private::SymbolContext &sc,
- const DWARFDIE &die, ParsedDWARFTypeAttributes &attrs);
- lldb::TypeSP ParseSubroutine(const DWARFDIE &die,
+ const lldb_private::plugin::dwarf::DWARFDIE &die,
+ ParsedDWARFTypeAttributes &attrs);
+ lldb::TypeSP ParseSubroutine(const lldb_private::plugin::dwarf::DWARFDIE &die,
const ParsedDWARFTypeAttributes &attrs);
- lldb::TypeSP ParseArrayType(const DWARFDIE &die,
+ lldb::TypeSP ParseArrayType(const lldb_private::plugin::dwarf::DWARFDIE &die,
const ParsedDWARFTypeAttributes &attrs);
- lldb::TypeSP ParsePointerToMemberType(const DWARFDIE &die,
- const ParsedDWARFTypeAttributes &attrs);
+ lldb::TypeSP
+ ParsePointerToMemberType(const lldb_private::plugin::dwarf::DWARFDIE &die,
+ const ParsedDWARFTypeAttributes &attrs);
/// Parses a DW_TAG_inheritance DIE into a base/super class.
///
@@ -371,7 +391,8 @@ private:
/// \param layout_info The layout information that will be updated for C++
/// base classes with the base offset.
void ParseInheritance(
- const DWARFDIE &die, const DWARFDIE &parent_die,
+ const lldb_private::plugin::dwarf::DWARFDIE &die,
+ const lldb_private::plugin::dwarf::DWARFDIE &parent_die,
const lldb_private::CompilerType class_clang_type,
const lldb::AccessType default_accessibility,
const lldb::ModuleSP &module_sp,
@@ -388,7 +409,8 @@ private:
/// \param layout_info The layout information that will be updated for
// base classes with the base offset
void
- ParseRustVariantPart(DWARFDIE &die, const DWARFDIE &parent_die,
+ ParseRustVariantPart(lldb_private::plugin::dwarf::DWARFDIE &die,
+ const lldb_private::plugin::dwarf::DWARFDIE &parent_die,
lldb_private::CompilerType &class_clang_type,
const lldb::AccessType default_accesibility,
lldb_private::ClangASTImporter::LayoutInfo &layout_info);
@@ -398,9 +420,8 @@ private:
/// Some attributes are relevant for all kinds of types (declaration), while
/// others are only meaningful to a specific type (is_virtual)
struct ParsedDWARFTypeAttributes {
- typedef lldb_private::plugin::dwarf::DWARFDIE DWARFDIE;
-
- explicit ParsedDWARFTypeAttributes(const DWARFDIE &die);
+ explicit ParsedDWARFTypeAttributes(
+ const lldb_private::plugin::dwarf::DWARFDIE &die);
lldb::AccessType accessibility = lldb::eAccessNone;
bool is_artificial = false;
@@ -417,7 +438,7 @@ struct ParsedDWARFTypeAttributes {
const char *mangled_name = nullptr;
lldb_private::ConstString name;
lldb_private::Declaration decl;
- DWARFDIE object_pointer;
+ lldb_private::plugin::dwarf::DWARFDIE object_pointer;
lldb_private::plugin::dwarf::DWARFFormValue abstract_origin;
lldb_private::plugin::dwarf::DWARFFormValue containing_type;
lldb_private::plugin::dwarf::DWARFFormValue signature;
diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DebugNamesDWARFIndex.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DebugNamesDWARFIndex.cpp
index 6330470b970e..90e42be7202d 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/DebugNamesDWARFIndex.cpp
+++ b/lldb/source/Plugins/SymbolFile/DWARF/DebugNamesDWARFIndex.cpp
@@ -85,11 +85,6 @@ bool DebugNamesDWARFIndex::ProcessEntry(
DWARFDIE die = GetDIE(entry);
if (!die)
return true;
- // Clang erroneously emits index entries for declaration DIEs in case when the
- // definition is in a type unit (llvm.org/pr77696). Weed those out.
- if (die.IsStructUnionOrClass() &&
- die.GetAttributeValueAsUnsigned(DW_AT_declaration, 0))
- return true;
return callback(die);
}
diff --git a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp
index af3ba2cd5b39..a52a7d676737 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp
+++ b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp
@@ -481,13 +481,6 @@ static ConstString GetDWARFMachOSegmentName() {
return g_dwarf_section_name;
}
-llvm::DenseMap<lldb::opaque_compiler_type_t, DIERef> &
-SymbolFileDWARF::GetForwardDeclCompilerTypeToDIE() {
- if (SymbolFileDWARFDebugMap *debug_map_symfile = GetDebugMapSymfile())
- return debug_map_symfile->GetForwardDeclCompilerTypeToDIE();
- return m_forward_decl_compiler_type_to_die;
-}
-
UniqueDWARFASTTypeMap &SymbolFileDWARF::GetUniqueDWARFASTTypeMap() {
SymbolFileDWARFDebugMap *debug_map_symfile = GetDebugMapSymfile();
if (debug_map_symfile)
@@ -1639,33 +1632,27 @@ bool SymbolFileDWARF::CompleteType(CompilerType &compiler_type) {
return true;
}
- // Once we start resolving this type, remove it from the forward
- // declaration map in case anyone's child members or other types require this
- // type to get resolved.
- DWARFDIE dwarf_die = GetDIE(die_it->second);
- GetForwardDeclCompilerTypeToDIE().erase(die_it);
- Type *type = nullptr;
- if (DWARFASTParser *dwarf_ast = GetDWARFParser(*dwarf_die.GetCU()))
- type = dwarf_ast->FindDefinitionTypeForDIE(dwarf_die);
- if (!type)
- return false;
-
- die_it = GetForwardDeclCompilerTypeToDIE().find(
- compiler_type_no_qualifiers.GetOpaqueQualType());
- if (die_it != GetForwardDeclCompilerTypeToDIE().end()) {
- dwarf_die = GetDIE(die_it->getSecond());
+ DWARFDIE dwarf_die = GetDIE(die_it->getSecond());
+ if (dwarf_die) {
+ // Once we start resolving this type, remove it from the forward
+ // declaration map in case anyone child members or other types require this
+ // type to get resolved. The type will get resolved when all of the calls
+ // to SymbolFileDWARF::ResolveClangOpaqueTypeDefinition are done.
GetForwardDeclCompilerTypeToDIE().erase(die_it);
- }
- if (Log *log = GetLog(DWARFLog::DebugInfo | DWARFLog::TypeCompletion))
- GetObjectFile()->GetModule()->LogMessageVerboseBacktrace(
- log, "{0:x8}: {1} ({2}) '{3}' resolving forward declaration...",
- dwarf_die.GetID(), DW_TAG_value_to_name(dwarf_die.Tag()),
- dwarf_die.Tag(), type->GetName().AsCString());
- assert(compiler_type);
- if (DWARFASTParser *dwarf_ast = GetDWARFParser(*dwarf_die.GetCU()))
- return dwarf_ast->CompleteTypeFromDWARF(dwarf_die, type, compiler_type);
- return true;
+ Type *type = GetDIEToType().lookup(dwarf_die.GetDIE());
+
+ Log *log = GetLog(DWARFLog::DebugInfo | DWARFLog::TypeCompletion);
+ if (log)
+ GetObjectFile()->GetModule()->LogMessageVerboseBacktrace(
+ log, "{0:x8}: {1} ({2}) '{3}' resolving forward declaration...",
+ dwarf_die.GetID(), DW_TAG_value_to_name(dwarf_die.Tag()),
+ dwarf_die.Tag(), type->GetName().AsCString());
+ assert(compiler_type);
+ if (DWARFASTParser *dwarf_ast = GetDWARFParser(*dwarf_die.GetCU()))
+ return dwarf_ast->CompleteTypeFromDWARF(dwarf_die, type, compiler_type);
+ }
+ return false;
}
Type *SymbolFileDWARF::ResolveType(const DWARFDIE &die,
diff --git a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.h b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.h
index 35893f2072dd..7282c08c6857 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.h
+++ b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.h
@@ -335,8 +335,12 @@ public:
virtual DIEToTypePtr &GetDIEToType() { return m_die_to_type; }
- virtual llvm::DenseMap<lldb::opaque_compiler_type_t, DIERef> &
- GetForwardDeclCompilerTypeToDIE();
+ typedef llvm::DenseMap<lldb::opaque_compiler_type_t, DIERef>
+ CompilerTypeToDIE;
+
+ virtual CompilerTypeToDIE &GetForwardDeclCompilerTypeToDIE() {
+ return m_forward_decl_compiler_type_to_die;
+ }
typedef llvm::DenseMap<const DWARFDebugInfoEntry *, lldb::VariableSP>
DIEToVariableSP;
@@ -529,14 +533,9 @@ protected:
NameToOffsetMap m_function_scope_qualified_name_map;
std::unique_ptr<DWARFDebugRanges> m_ranges;
UniqueDWARFASTTypeMap m_unique_ast_type_map;
- // A map from DIE to lldb_private::Type. For record type, the key might be
- // either declaration DIE or definition DIE.
DIEToTypePtr m_die_to_type;
DIEToVariableSP m_die_to_variable_sp;
- // A map from CompilerType to the struct/class/union/enum DIE (might be a
- // declaration or a definition) that is used to construct it.
- llvm::DenseMap<lldb::opaque_compiler_type_t, DIERef>
- m_forward_decl_compiler_type_to_die;
+ CompilerTypeToDIE m_forward_decl_compiler_type_to_die;
llvm::DenseMap<dw_offset_t, std::unique_ptr<SupportFileList>>
m_type_unit_support_files;
std::vector<uint32_t> m_lldb_cu_to_dwarf_unit;
diff --git a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDebugMap.h b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDebugMap.h
index d7d571919bc7..de22dd676eef 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDebugMap.h
+++ b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDebugMap.h
@@ -284,11 +284,6 @@ protected:
lldb::TypeSP FindCompleteObjCDefinitionTypeForDIE(
const DWARFDIE &die, ConstString type_name, bool must_be_implementation);
- llvm::DenseMap<lldb::opaque_compiler_type_t, DIERef> &
- GetForwardDeclCompilerTypeToDIE() {
- return m_forward_decl_compiler_type_to_die;
- }
-
UniqueDWARFASTTypeMap &GetUniqueDWARFASTTypeMap() {
return m_unique_ast_type_map;
}
@@ -326,10 +321,6 @@ protected:
std::vector<uint32_t> m_func_indexes; // Sorted by address
std::vector<uint32_t> m_glob_indexes;
std::map<std::pair<ConstString, llvm::sys::TimePoint<>>, OSOInfoSP> m_oso_map;
- // A map from CompilerType to the struct/class/union/enum DIE (might be a
- // declaration or a definition) that is used to construct it.
- llvm::DenseMap<lldb::opaque_compiler_type_t, DIERef>
- m_forward_decl_compiler_type_to_die;
UniqueDWARFASTTypeMap m_unique_ast_type_map;
LazyBool m_supports_DW_AT_APPLE_objc_complete_type;
DebugMap m_debug_map;
diff --git a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDwo.cpp b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDwo.cpp
index e4db39cabf6f..71c9997e4c82 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDwo.cpp
+++ b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDwo.cpp
@@ -110,7 +110,7 @@ SymbolFileDWARF::DIEToVariableSP &SymbolFileDWARFDwo::GetDIEToVariable() {
return GetBaseSymbolFile().GetDIEToVariable();
}
-llvm::DenseMap<lldb::opaque_compiler_type_t, DIERef> &
+SymbolFileDWARF::CompilerTypeToDIE &
SymbolFileDWARFDwo::GetForwardDeclCompilerTypeToDIE() {
return GetBaseSymbolFile().GetForwardDeclCompilerTypeToDIE();
}
diff --git a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDwo.h b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDwo.h
index 2f0ac415e90d..1500540424b5 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDwo.h
+++ b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDwo.h
@@ -72,8 +72,7 @@ protected:
DIEToVariableSP &GetDIEToVariable() override;
- llvm::DenseMap<lldb::opaque_compiler_type_t, DIERef> &
- GetForwardDeclCompilerTypeToDIE() override;
+ CompilerTypeToDIE &GetForwardDeclCompilerTypeToDIE() override;
UniqueDWARFASTTypeMap &GetUniqueDWARFASTTypeMap() override;
diff --git a/lldb/source/Plugins/SymbolFile/DWARF/UniqueDWARFASTType.cpp b/lldb/source/Plugins/SymbolFile/DWARF/UniqueDWARFASTType.cpp
index 3d201e96f92c..223518f0ae82 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/UniqueDWARFASTType.cpp
+++ b/lldb/source/Plugins/SymbolFile/DWARF/UniqueDWARFASTType.cpp
@@ -13,75 +13,66 @@
using namespace lldb_private::dwarf;
using namespace lldb_private::plugin::dwarf;
-static bool IsStructOrClassTag(llvm::dwarf::Tag Tag) {
- return Tag == llvm::dwarf::Tag::DW_TAG_class_type ||
- Tag == llvm::dwarf::Tag::DW_TAG_structure_type;
-}
-
-UniqueDWARFASTType *UniqueDWARFASTTypeList::Find(
- const DWARFDIE &die, const lldb_private::Declaration &decl,
- const int32_t byte_size, bool is_forward_declaration) {
- for (UniqueDWARFASTType &udt : m_collection) {
+bool UniqueDWARFASTTypeList::Find(const DWARFDIE &die,
+ const lldb_private::Declaration &decl,
+ const int32_t byte_size,
+ UniqueDWARFASTType &entry) const {
+ for (const UniqueDWARFASTType &udt : m_collection) {
// Make sure the tags match
- if (udt.m_die.Tag() == die.Tag() || (IsStructOrClassTag(udt.m_die.Tag()) &&
- IsStructOrClassTag(die.Tag()))) {
- // If they are not both definition DIEs or both declaration DIEs, then
- // don't check for byte size and declaration location, because declaration
- // DIEs usually don't have those info.
- bool matching_size_declaration =
- udt.m_is_forward_declaration != is_forward_declaration
- ? true
- : (udt.m_byte_size < 0 || byte_size < 0 ||
- udt.m_byte_size == byte_size) &&
- udt.m_declaration == decl;
- if (!matching_size_declaration)
- continue;
- // The type has the same name, and was defined on the same file and
- // line. Now verify all of the parent DIEs match.
- DWARFDIE parent_arg_die = die.GetParent();
- DWARFDIE parent_pos_die = udt.m_die.GetParent();
- bool match = true;
- bool done = false;
- while (!done && match && parent_arg_die && parent_pos_die) {
- const dw_tag_t parent_arg_tag = parent_arg_die.Tag();
- const dw_tag_t parent_pos_tag = parent_pos_die.Tag();
- if (parent_arg_tag == parent_pos_tag ||
- (IsStructOrClassTag(parent_arg_tag) &&
- IsStructOrClassTag(parent_pos_tag))) {
- switch (parent_arg_tag) {
- case DW_TAG_class_type:
- case DW_TAG_structure_type:
- case DW_TAG_union_type:
- case DW_TAG_namespace: {
- const char *parent_arg_die_name = parent_arg_die.GetName();
- if (parent_arg_die_name == nullptr) {
- // Anonymous (i.e. no-name) struct
- match = false;
- } else {
- const char *parent_pos_die_name = parent_pos_die.GetName();
- if (parent_pos_die_name == nullptr ||
- ((parent_arg_die_name != parent_pos_die_name) &&
- strcmp(parent_arg_die_name, parent_pos_die_name)))
- match = false;
+ if (udt.m_die.Tag() == die.Tag()) {
+ // Validate byte sizes of both types only if both are valid.
+ if (udt.m_byte_size < 0 || byte_size < 0 ||
+ udt.m_byte_size == byte_size) {
+ // Make sure the file and line match
+ if (udt.m_declaration == decl) {
+ // The type has the same name, and was defined on the same file and
+ // line. Now verify all of the parent DIEs match.
+ DWARFDIE parent_arg_die = die.GetParent();
+ DWARFDIE parent_pos_die = udt.m_die.GetParent();
+ bool match = true;
+ bool done = false;
+ while (!done && match && parent_arg_die && parent_pos_die) {
+ const dw_tag_t parent_arg_tag = parent_arg_die.Tag();
+ const dw_tag_t parent_pos_tag = parent_pos_die.Tag();
+ if (parent_arg_tag == parent_pos_tag) {
+ switch (parent_arg_tag) {
+ case DW_TAG_class_type:
+ case DW_TAG_structure_type:
+ case DW_TAG_union_type:
+ case DW_TAG_namespace: {
+ const char *parent_arg_die_name = parent_arg_die.GetName();
+ if (parent_arg_die_name ==
+ nullptr) // Anonymous (i.e. no-name) struct
+ {
+ match = false;
+ } else {
+ const char *parent_pos_die_name = parent_pos_die.GetName();
+ if (parent_pos_die_name == nullptr ||
+ ((parent_arg_die_name != parent_pos_die_name) &&
+ strcmp(parent_arg_die_name, parent_pos_die_name)))
+ match = false;
+ }
+ } break;
+
+ case DW_TAG_compile_unit:
+ case DW_TAG_partial_unit:
+ done = true;
+ break;
+ default:
+ break;
+ }
}
- } break;
+ parent_arg_die = parent_arg_die.GetParent();
+ parent_pos_die = parent_pos_die.GetParent();
+ }
- case DW_TAG_compile_unit:
- case DW_TAG_partial_unit:
- done = true;
- break;
- default:
- break;
+ if (match) {
+ entry = udt;
+ return true;
}
}
- parent_arg_die = parent_arg_die.GetParent();
- parent_pos_die = parent_pos_die.GetParent();
- }
-
- if (match) {
- return &udt;
}
}
}
- return nullptr;
+ return false;
}
diff --git a/lldb/source/Plugins/SymbolFile/DWARF/UniqueDWARFASTType.h b/lldb/source/Plugins/SymbolFile/DWARF/UniqueDWARFASTType.h
index 29e5c02dcbe1..bf3cbae55e5c 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/UniqueDWARFASTType.h
+++ b/lldb/source/Plugins/SymbolFile/DWARF/UniqueDWARFASTType.h
@@ -23,19 +23,31 @@ public:
// Constructors and Destructors
UniqueDWARFASTType() : m_type_sp(), m_die(), m_declaration() {}
+ UniqueDWARFASTType(lldb::TypeSP &type_sp, const DWARFDIE &die,
+ const Declaration &decl, int32_t byte_size)
+ : m_type_sp(type_sp), m_die(die), m_declaration(decl),
+ m_byte_size(byte_size) {}
+
UniqueDWARFASTType(const UniqueDWARFASTType &rhs)
: m_type_sp(rhs.m_type_sp), m_die(rhs.m_die),
- m_declaration(rhs.m_declaration), m_byte_size(rhs.m_byte_size),
- m_is_forward_declaration(rhs.m_is_forward_declaration) {}
+ m_declaration(rhs.m_declaration), m_byte_size(rhs.m_byte_size) {}
~UniqueDWARFASTType() = default;
+ UniqueDWARFASTType &operator=(const UniqueDWARFASTType &rhs) {
+ if (this != &rhs) {
+ m_type_sp = rhs.m_type_sp;
+ m_die = rhs.m_die;
+ m_declaration = rhs.m_declaration;
+ m_byte_size = rhs.m_byte_size;
+ }
+ return *this;
+ }
+
lldb::TypeSP m_type_sp;
DWARFDIE m_die;
Declaration m_declaration;
int32_t m_byte_size = -1;
- // True if the m_die is a forward declaration DIE.
- bool m_is_forward_declaration = true;
};
class UniqueDWARFASTTypeList {
@@ -50,9 +62,8 @@ public:
m_collection.push_back(entry);
}
- UniqueDWARFASTType *Find(const DWARFDIE &die, const Declaration &decl,
- const int32_t byte_size,
- bool is_forward_declaration);
+ bool Find(const DWARFDIE &die, const Declaration &decl,
+ const int32_t byte_size, UniqueDWARFASTType &entry) const;
protected:
typedef std::vector<UniqueDWARFASTType> collection;
@@ -69,15 +80,14 @@ public:
m_collection[name.GetCString()].Append(entry);
}
- UniqueDWARFASTType *Find(ConstString name, const DWARFDIE &die,
- const Declaration &decl, const int32_t byte_size,
- bool is_forward_declaration) {
+ bool Find(ConstString name, const DWARFDIE &die, const Declaration &decl,
+ const int32_t byte_size, UniqueDWARFASTType &entry) const {
const char *unique_name_cstr = name.GetCString();
- collection::iterator pos = m_collection.find(unique_name_cstr);
+ collection::const_iterator pos = m_collection.find(unique_name_cstr);
if (pos != m_collection.end()) {
- return pos->second.Find(die, decl, byte_size, is_forward_declaration);
+ return pos->second.Find(die, decl, byte_size, entry);
}
- return nullptr;
+ return false;
}
protected:
diff --git a/lldb/source/Target/Statistics.cpp b/lldb/source/Target/Statistics.cpp
index be0848573f81..2a5300012511 100644
--- a/lldb/source/Target/Statistics.cpp
+++ b/lldb/source/Target/Statistics.cpp
@@ -355,14 +355,14 @@ llvm::json::Value DebuggerStats::ReportStatistics(
}
global_stats.try_emplace("targets", std::move(json_targets));
+ ConstStringStats const_string_stats;
+ json::Object json_memory{
+ {"strings", const_string_stats.ToJSON()},
+ };
+ global_stats.try_emplace("memory", std::move(json_memory));
if (!summary_only) {
- ConstStringStats const_string_stats;
- json::Object json_memory{
- {"strings", const_string_stats.ToJSON()},
- };
json::Value cmd_stats = debugger.GetCommandInterpreter().GetStatistics();
global_stats.try_emplace("modules", std::move(json_modules));
- global_stats.try_emplace("memory", std::move(json_memory));
global_stats.try_emplace("commands", std::move(cmd_stats));
}
diff --git a/lldb/source/Target/ThreadPlanStepOverRange.cpp b/lldb/source/Target/ThreadPlanStepOverRange.cpp
index 84f282f1de52..3fe02e0bf4fa 100644
--- a/lldb/source/Target/ThreadPlanStepOverRange.cpp
+++ b/lldb/source/Target/ThreadPlanStepOverRange.cpp
@@ -355,7 +355,7 @@ bool ThreadPlanStepOverRange::DoPlanExplainsStop(Event *event_ptr) {
return_value = NextRangeBreakpointExplainsStop(stop_info_sp);
} else {
if (log)
- log->PutCString("ThreadPlanStepInRange got asked if it explains the "
+ log->PutCString("ThreadPlanStepOverRange got asked if it explains the "
"stop for some reason other than step.");
return_value = false;
}
diff --git a/lldb/test/API/functionalities/stats_api/TestStatisticsAPI.py b/lldb/test/API/functionalities/stats_api/TestStatisticsAPI.py
index 851097bdfecf..f06c9ae14bb7 100644
--- a/lldb/test/API/functionalities/stats_api/TestStatisticsAPI.py
+++ b/lldb/test/API/functionalities/stats_api/TestStatisticsAPI.py
@@ -85,14 +85,15 @@ class TestStatsAPI(TestBase):
stats_summary.GetAsJSON(stream_summary)
debug_stats_summary = json.loads(stream_summary.GetData())
self.assertNotIn("modules", debug_stats_summary)
- self.assertNotIn("memory", debug_stats_summary)
self.assertNotIn("commands", debug_stats_summary)
# Summary values should be the same as in full statistics.
- # Except the parse time on Mac OS X is not deterministic.
+ # The exceptions to this are:
+ # - The parse time on Mac OS X is not deterministic.
+ # - Memory usage may grow over time due to the use of ConstString.
for key, value in debug_stats_summary.items():
self.assertIn(key, debug_stats)
- if key != "targets" and not key.endswith("Time"):
+ if key != "memory" and key != "targets" and not key.endswith("Time"):
self.assertEqual(debug_stats[key], value)
def test_command_stats_api(self):
diff --git a/lldb/test/API/tools/lldb-server/TestPtyServer.py b/lldb/test/API/tools/lldb-server/TestPtyServer.py
index 4bfcf70bfa01..345f68f6d87d 100644
--- a/lldb/test/API/tools/lldb-server/TestPtyServer.py
+++ b/lldb/test/API/tools/lldb-server/TestPtyServer.py
@@ -7,6 +7,7 @@ from lldbgdbserverutils import *
import xml.etree.ElementTree as ET
+@skipIfRemote
@skipIf(hostoslist=["windows"])
class PtyServerTestCase(gdbremote_testcase.GdbRemoteTestCaseBase):
def setUp(self):
diff --git a/lldb/test/Shell/ScriptInterpreter/Python/Crashlog/app_specific_backtrace_crashlog.test b/lldb/test/Shell/ScriptInterpreter/Python/Crashlog/app_specific_backtrace_crashlog.test
index c57cefdaf32d..9c0510c34cca 100644
--- a/lldb/test/Shell/ScriptInterpreter/Python/Crashlog/app_specific_backtrace_crashlog.test
+++ b/lldb/test/Shell/ScriptInterpreter/Python/Crashlog/app_specific_backtrace_crashlog.test
@@ -1,9 +1,9 @@
-# REQUIRES: python, native && target-aarch64 && system-darwin
+# REQUIRES: python, native && system-darwin
# RUN: mkdir -p %t.dir
# RUN: yaml2obj %S/Inputs/application_specific_info/asi.yaml > %t.dir/asi
# RUN: %lldb -o 'command script import lldb.macosx.crashlog' \
-# RUN: -o 'crashlog -a -i -t %t.dir/asi %S/Inputs/application_specific_info/asi.txt' \
+# RUN: -o 'crashlog -i -t %t.dir/asi %S/Inputs/application_specific_info/asi.txt' \
# RUN: -o "thread list" -o "bt all" 2>&1 | FileCheck %s
# CHECK: "crashlog" {{.*}} commands have been installed, use the "--help" options on these commands
diff --git a/lldb/test/Shell/ScriptInterpreter/Python/Crashlog/interactive_crashlog_invalid_target.test b/lldb/test/Shell/ScriptInterpreter/Python/Crashlog/interactive_crashlog_invalid_target.test
index abd1e7c3da53..eb1f5f456a2d 100644
--- a/lldb/test/Shell/ScriptInterpreter/Python/Crashlog/interactive_crashlog_invalid_target.test
+++ b/lldb/test/Shell/ScriptInterpreter/Python/Crashlog/interactive_crashlog_invalid_target.test
@@ -1,4 +1,4 @@
-# REQUIRES: python, native && target-aarch64 && system-darwin
+# REQUIRES: python, native && system-darwin
# RUN: %lldb -o 'command script import lldb.macosx.crashlog' \
# RUN: -o 'crashlog -V' \
diff --git a/lldb/test/Shell/ScriptInterpreter/Python/Crashlog/interactive_crashlog_json.test b/lldb/test/Shell/ScriptInterpreter/Python/Crashlog/interactive_crashlog_json.test
index fccd71ce31f7..684be2846f78 100644
--- a/lldb/test/Shell/ScriptInterpreter/Python/Crashlog/interactive_crashlog_json.test
+++ b/lldb/test/Shell/ScriptInterpreter/Python/Crashlog/interactive_crashlog_json.test
@@ -1,4 +1,4 @@
-# REQUIRES: python, native && target-aarch64 && system-darwin
+# REQUIRES: python, native && system-darwin
# RUN: mkdir -p %t.dir
# RUN: yaml2obj %S/Inputs/interactive_crashlog/multithread-test.yaml > %t.dir/multithread-test
diff --git a/lldb/test/Shell/ScriptInterpreter/Python/Crashlog/interactive_crashlog_legacy.test b/lldb/test/Shell/ScriptInterpreter/Python/Crashlog/interactive_crashlog_legacy.test
index 6e2826e88aed..271a4c2aa90f 100644
--- a/lldb/test/Shell/ScriptInterpreter/Python/Crashlog/interactive_crashlog_legacy.test
+++ b/lldb/test/Shell/ScriptInterpreter/Python/Crashlog/interactive_crashlog_legacy.test
@@ -1,4 +1,4 @@
-# REQUIRES: python, native && target-aarch64 && system-darwin
+# REQUIRES: python, native && system-darwin
# RUN: mkdir -p %t.dir
# RUN: yaml2obj %S/Inputs/interactive_crashlog/multithread-test.yaml > %t.dir/multithread-test
diff --git a/lldb/test/Shell/ScriptInterpreter/Python/Crashlog/last_exception_backtrace_crashlog.test b/lldb/test/Shell/ScriptInterpreter/Python/Crashlog/last_exception_backtrace_crashlog.test
index c2f61963ed0c..a17b7ac18a62 100644
--- a/lldb/test/Shell/ScriptInterpreter/Python/Crashlog/last_exception_backtrace_crashlog.test
+++ b/lldb/test/Shell/ScriptInterpreter/Python/Crashlog/last_exception_backtrace_crashlog.test
@@ -1,9 +1,9 @@
-# REQUIRES: python, native && target-aarch64 && system-darwin
+# REQUIRES: python, native && system-darwin
# RUN: mkdir -p %t.dir
# RUN: yaml2obj %S/Inputs/application_specific_info/asi.yaml > %t.dir/asi
# RUN: %lldb -o 'command script import lldb.macosx.crashlog' \
-# RUN: -o 'crashlog -a -i -t %t.dir/asi %S/Inputs/application_specific_info/leb.txt' \
+# RUN: -o 'crashlog -i -t %t.dir/asi %S/Inputs/application_specific_info/leb.txt' \
# RUN: -o "thread list" -o "bt all" 2>&1 | FileCheck %s
# CHECK: "crashlog" {{.*}} commands have been installed, use the "--help" options on these commands
diff --git a/lldb/test/Shell/ScriptInterpreter/Python/Crashlog/lit.local.cfg b/lldb/test/Shell/ScriptInterpreter/Python/Crashlog/lit.local.cfg
index 3da9265b3553..b72b29419893 100644
--- a/lldb/test/Shell/ScriptInterpreter/Python/Crashlog/lit.local.cfg
+++ b/lldb/test/Shell/ScriptInterpreter/Python/Crashlog/lit.local.cfg
@@ -5,3 +5,6 @@ if 'lldb-repro' in config.available_features:
config.unsupported = True
config.environment["LLDB_APPLE_DSYMFORUUID_EXECUTABLE"] = ""
+
+# Temporary parallel image loading deadlock workaround
+config.environment["NO_PARALLEL_IMG_LOADING"] = ""
diff --git a/lldb/test/Shell/ScriptInterpreter/Python/Crashlog/skipped_status_interactive_crashlog.test b/lldb/test/Shell/ScriptInterpreter/Python/Crashlog/skipped_status_interactive_crashlog.test
index 81e06868eaee..64cd0904371a 100644
--- a/lldb/test/Shell/ScriptInterpreter/Python/Crashlog/skipped_status_interactive_crashlog.test
+++ b/lldb/test/Shell/ScriptInterpreter/Python/Crashlog/skipped_status_interactive_crashlog.test
@@ -1,4 +1,4 @@
-# REQUIRES: python, native && target-aarch64 && system-darwin
+# REQUIRES: python, native && system-darwin
# RUN: mkdir -p %t.dir
# RUN: yaml2obj %S/Inputs/interactive_crashlog/multithread-test.yaml > %t.dir/multithread-test
diff --git a/lldb/test/Shell/SymbolFile/DWARF/delayed-definition-die-searching.test b/lldb/test/Shell/SymbolFile/DWARF/delayed-definition-die-searching.test
deleted file mode 100644
index d253981b498c..000000000000
--- a/lldb/test/Shell/SymbolFile/DWARF/delayed-definition-die-searching.test
+++ /dev/null
@@ -1,36 +0,0 @@
-# Test definition DIE searching is delayed until complete type is required.
-
-# UNSUPPORTED: system-windows
-
-# RUN: split-file %s %t
-# RUN: %clangxx_host %t/main.cpp %t/t1_def.cpp -gdwarf -o %t.out
-# RUN: %lldb -b %t.out -s %t/lldb.cmd | FileCheck %s
-
-# CHECK: (lldb) p v1
-# CHECK: DWARFASTParserClang::ParseTypeFromDWARF{{.*}}DW_TAG_structure_type (DW_TAG_structure_type) name = 't2<t1>'
-# CHECK: DWARFASTParserClang::ParseTypeFromDWARF{{.*}}DW_TAG_structure_type (DW_TAG_structure_type) name = 't1'
-# CHECK: DW_TAG_structure_type (DW_TAG_structure_type) 't2<t1>' resolving forward declaration...
-# CHECK: (t2<t1>) {}
-# CHECK: (lldb) p v2
-# CHECK: DWARFASTParserClang::ParseTypeFromDWARF{{.*}}DW_TAG_structure_type (DW_TAG_structure_type) name = 't1'
-# CHECK: DW_TAG_structure_type (DW_TAG_structure_type) 't1' resolving forward declaration...
-
-#--- lldb.cmd
-log enable dwarf comp
-p v1
-p v2
-
-#--- main.cpp
-template<typename T>
-struct t2 {
-};
-struct t1;
-t2<t1> v1; // this CU doesn't have definition DIE for t1, but only declaration DIE for it.
-int main() {
-}
-
-#--- t1_def.cpp
-struct t1 { // this CU contains definition DIE for t1.
- int x;
-};
-t1 v2;
diff --git a/lldb/test/Shell/SymbolFile/DWARF/x86/simple-template-names-context.cpp b/lldb/test/Shell/SymbolFile/DWARF/x86/simple-template-names-context.cpp
new file mode 100644
index 000000000000..a8a4d3b8fbd5
--- /dev/null
+++ b/lldb/test/Shell/SymbolFile/DWARF/x86/simple-template-names-context.cpp
@@ -0,0 +1,44 @@
+// Test that we can correctly resolve forward declared types when they only
+// differ in the template arguments of the surrounding context. The reproducer
+// is sensitive to the order of declarations, so we test in both directions.
+
+// REQUIRES: lld
+
+// RUN: %clang --target=x86_64-pc-linux -c %s -o %t-a.o -g -gsimple-template-names -DFILE_A
+// RUN: %clang --target=x86_64-pc-linux -c %s -o %t-b.o -g -gsimple-template-names -DFILE_B
+// RUN: ld.lld %t-a.o %t-b.o -o %t
+// RUN: %lldb %t -o "target variable --ptr-depth 1 --show-types both_a both_b" -o exit | FileCheck %s
+
+// CHECK: (lldb) target variable
+// CHECK-NEXT: (ReferencesBoth<'A'>) both_a = {
+// CHECK-NEXT: (Outer<'A'>::Inner *) a = 0x{{[0-9A-Fa-f]*}} {}
+// CHECK-NEXT: (Outer<'A'>::Inner *) b = 0x{{[0-9A-Fa-f]*}} {}
+// CHECK-NEXT: }
+// CHECK-NEXT: (ReferencesBoth<'B'>) both_b = {
+// CHECK-NEXT: (Outer<'A'>::Inner *) a = 0x{{[0-9A-Fa-f]*}} {}
+// CHECK-NEXT: (Outer<'B'>::Inner *) b = 0x{{[0-9A-Fa-f]*}} {}
+// CHECK-NEXT: }
+
+template<char C>
+struct Outer {
+ struct Inner {};
+};
+
+template<char C>
+struct ReferencesBoth {
+ Outer<'A'>::Inner *a;
+ Outer<'B'>::Inner *b;
+};
+
+#ifdef FILE_A
+Outer<'A'>::Inner inner_a;
+extern Outer<'B'>::Inner inner_b;
+
+ReferencesBoth<'A'> both_a{&inner_a, &inner_b};
+
+#else
+extern Outer<'A'>::Inner inner_a;
+Outer<'B'>::Inner inner_b;
+
+ReferencesBoth<'B'> both_b{&inner_a, &inner_b};
+#endif
diff --git a/llvm/docs/AMDGPUUsage.rst b/llvm/docs/AMDGPUUsage.rst
index bb6751038fc9..aa50ce329d1d 100644
--- a/llvm/docs/AMDGPUUsage.rst
+++ b/llvm/docs/AMDGPUUsage.rst
@@ -504,6 +504,13 @@ Every processor supports every OS ABI (see :ref:`amdgpu-os`) with the following
work-item Add product
IDs names.
+ ``gfx1152`` ``amdgcn`` APU - cumode - Architected *TBA*
+ - wavefrontsize64 flat
+ scratch .. TODO::
+ - Packed
+ work-item Add product
+ IDs names.
+
``gfx1200`` ``amdgcn`` dGPU - cumode - Architected *TBA*
- wavefrontsize64 flat
scratch .. TODO::
@@ -591,11 +598,13 @@ Generic processor code objects are versioned. See :ref:`amdgpu-generic-processor
- ``gfx1102`` - Packed hazards specific to some targets
- ``gfx1103`` work-item within this family.
- ``gfx1150`` IDs
- - ``gfx1151`` Not all VGPRs can be used on:
+ - ``gfx1151``
+ - ``gfx1152`` Not all VGPRs can be used on:
- ``gfx1100``
- ``gfx1101``
- ``gfx1151``
+ - ``gfx1152``
SALU floating point instructions
and single-use VGPR hint
@@ -604,12 +613,14 @@ Generic processor code objects are versioned. See :ref:`amdgpu-generic-processor
- ``gfx1150``
- ``gfx1151``
+ - ``gfx1152``
SGPRs are not supported for src1
in dpp instructions for:
- ``gfx1150``
- ``gfx1151``
+ - ``gfx1152``
``gfx12-generic`` ``amdgcn`` - ``gfx1200`` - wavefrontsize64 - Architected No restrictions.
@@ -1979,7 +1990,7 @@ The AMDGPU backend uses the following ELF header:
``EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC`` 0x052 ``gfx10-1-generic``
``EF_AMDGPU_MACH_AMDGCN_GFX10_3_GENERIC`` 0x053 ``gfx10-3-generic``
``EF_AMDGPU_MACH_AMDGCN_GFX11_GENERIC`` 0x054 ``gfx11-generic``
- *reserved* 0x055 Reserved.
+ ``EF_AMDGPU_MACH_AMDGCN_GFX1152`` 0x055 ``gfx1152``.
*reserved* 0x056 Reserved.
*reserved* 0x057 Reserved.
*reserved* 0x058 Reserved.
diff --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst
index 393b97efa547..c4a85620fc3e 100644
--- a/llvm/docs/ReleaseNotes.rst
+++ b/llvm/docs/ReleaseNotes.rst
@@ -116,6 +116,10 @@ Changes to the Hexagon Backend
Changes to the LoongArch Backend
--------------------------------
+* i32 is now a native type in the datalayout string. This enables
+ LoopStrengthReduce for loops with i32 induction variables, among other
+ optimizations.
+
Changes to the MIPS Backend
---------------------------
diff --git a/llvm/include/llvm/Analysis/CodeMetrics.h b/llvm/include/llvm/Analysis/CodeMetrics.h
index a9431bca1125..d09018daf954 100644
--- a/llvm/include/llvm/Analysis/CodeMetrics.h
+++ b/llvm/include/llvm/Analysis/CodeMetrics.h
@@ -20,12 +20,15 @@
namespace llvm {
class AssumptionCache;
class BasicBlock;
+class Instruction;
class Loop;
class Function;
template <class T> class SmallPtrSetImpl;
class TargetTransformInfo;
class Value;
+enum struct ConvergenceKind { None, Controlled, ExtendedLoop, Uncontrolled };
+
/// Utility to calculate the size and a few similar metrics for a set
/// of basic blocks.
struct CodeMetrics {
@@ -42,8 +45,8 @@ struct CodeMetrics {
/// one or more 'noduplicate' instructions.
bool notDuplicatable = false;
- /// True if this function contains a call to a convergent function.
- bool convergent = false;
+ /// The kind of convergence specified in this function.
+ ConvergenceKind Convergence = ConvergenceKind::None;
/// True if this function calls alloca (in the C sense).
bool usesDynamicAlloca = false;
@@ -77,7 +80,7 @@ struct CodeMetrics {
/// Add information about a block to the current state.
void analyzeBasicBlock(const BasicBlock *BB, const TargetTransformInfo &TTI,
const SmallPtrSetImpl<const Value *> &EphValues,
- bool PrepareForLTO = false);
+ bool PrepareForLTO = false, const Loop *L = nullptr);
/// Collect a loop's ephemeral values (those used only by an assume
/// or similar intrinsics in the loop).
diff --git a/llvm/include/llvm/Analysis/LoopInfo.h b/llvm/include/llvm/Analysis/LoopInfo.h
index 52084630560c..4f06a7e889f9 100644
--- a/llvm/include/llvm/Analysis/LoopInfo.h
+++ b/llvm/include/llvm/Analysis/LoopInfo.h
@@ -649,6 +649,9 @@ int getIntLoopAttribute(const Loop *TheLoop, StringRef Name, int Default = 0);
std::optional<const MDOperand *> findStringMetadataForLoop(const Loop *TheLoop,
StringRef Name);
+/// Find the convergence heart of the loop.
+CallBase *getLoopConvergenceHeart(const Loop *TheLoop);
+
/// Look for the loop attribute that requires progress within the loop.
/// Note: Most consumers probably want "isMustProgress" which checks
/// the containing function attribute too.
diff --git a/llvm/include/llvm/AsmParser/LLToken.h b/llvm/include/llvm/AsmParser/LLToken.h
index 69821c22dcd6..db6780b70ca5 100644
--- a/llvm/include/llvm/AsmParser/LLToken.h
+++ b/llvm/include/llvm/AsmParser/LLToken.h
@@ -147,6 +147,7 @@ enum Kind {
kw_aarch64_vector_pcs,
kw_aarch64_sve_vector_pcs,
kw_aarch64_sme_preservemost_from_x0,
+ kw_aarch64_sme_preservemost_from_x1,
kw_aarch64_sme_preservemost_from_x2,
kw_msp430_intrcc,
kw_avr_intrcc,
diff --git a/llvm/include/llvm/BinaryFormat/ELF.h b/llvm/include/llvm/BinaryFormat/ELF.h
index 7364d619bc1b..dfba18014991 100644
--- a/llvm/include/llvm/BinaryFormat/ELF.h
+++ b/llvm/include/llvm/BinaryFormat/ELF.h
@@ -795,7 +795,7 @@ enum : unsigned {
EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC = 0x052,
EF_AMDGPU_MACH_AMDGCN_GFX10_3_GENERIC = 0x053,
EF_AMDGPU_MACH_AMDGCN_GFX11_GENERIC = 0x054,
- EF_AMDGPU_MACH_AMDGCN_RESERVED_0X55 = 0x055,
+ EF_AMDGPU_MACH_AMDGCN_GFX1152 = 0x055,
EF_AMDGPU_MACH_AMDGCN_RESERVED_0X56 = 0x056,
EF_AMDGPU_MACH_AMDGCN_RESERVED_0X57 = 0x057,
EF_AMDGPU_MACH_AMDGCN_RESERVED_0X58 = 0x058,
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
index 12e5b31e5817..43659564d5ac 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
@@ -871,6 +871,14 @@ public:
bool matchFreezeOfSingleMaybePoisonOperand(MachineInstr &MI,
BuildFnTy &MatchInfo);
+ bool matchAddOfVScale(const MachineOperand &MO, BuildFnTy &MatchInfo);
+
+ bool matchMulOfVScale(const MachineOperand &MO, BuildFnTy &MatchInfo);
+
+ bool matchSubOfVScale(const MachineOperand &MO, BuildFnTy &MatchInfo);
+
+ bool matchShlOfVScale(const MachineOperand &MO, BuildFnTy &MatchInfo);
+
private:
/// Checks for legality of an indexed variant of \p LdSt.
bool isIndexedLoadStoreLegal(GLoadStore &LdSt) const;
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h b/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
index 2b3efc3b609f..227372563771 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
@@ -14,10 +14,12 @@
#ifndef LLVM_CODEGEN_GLOBALISEL_GENERICMACHINEINSTRS_H
#define LLVM_CODEGEN_GLOBALISEL_GENERICMACHINEINSTRS_H
-#include "llvm/IR/Instructions.h"
+#include "llvm/ADT/APInt.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/TargetOpcodes.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Instructions.h"
#include "llvm/Support/Casting.h"
namespace llvm {
@@ -856,6 +858,43 @@ public:
};
};
+/// Represents a vscale.
+class GVScale : public GenericMachineInstr {
+public:
+ APInt getSrc() const { return getOperand(1).getCImm()->getValue(); }
+
+ static bool classof(const MachineInstr *MI) {
+ return MI->getOpcode() == TargetOpcode::G_VSCALE;
+ };
+};
+
+/// Represents an integer subtraction.
+class GSub : public GIntBinOp {
+public:
+ static bool classof(const MachineInstr *MI) {
+ return MI->getOpcode() == TargetOpcode::G_SUB;
+ };
+};
+
+/// Represents an integer multiplication.
+class GMul : public GIntBinOp {
+public:
+ static bool classof(const MachineInstr *MI) {
+ return MI->getOpcode() == TargetOpcode::G_MUL;
+ };
+};
+
+/// Represents a shift left.
+class GShl : public GenericMachineInstr {
+public:
+ Register getSrcReg() const { return getOperand(1).getReg(); }
+ Register getShiftReg() const { return getOperand(2).getReg(); }
+
+ static bool classof(const MachineInstr *MI) {
+ return MI->getOpcode() == TargetOpcode::G_SHL;
+ };
+};
+
} // namespace llvm
#endif // LLVM_CODEGEN_GLOBALISEL_GENERICMACHINEINSTRS_H
diff --git a/llvm/include/llvm/CodeGen/MachineFunction.h b/llvm/include/llvm/CodeGen/MachineFunction.h
index 9f8e846cac45..6e7292abeddb 100644
--- a/llvm/include/llvm/CodeGen/MachineFunction.h
+++ b/llvm/include/llvm/CodeGen/MachineFunction.h
@@ -18,7 +18,6 @@
#define LLVM_CODEGEN_MACHINEFUNCTION_H
#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/GraphTraits.h"
#include "llvm/ADT/SmallVector.h"
@@ -34,6 +33,7 @@
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Recycler.h"
#include "llvm/Target/TargetOptions.h"
+#include <bitset>
#include <cassert>
#include <cstdint>
#include <memory>
@@ -217,22 +217,21 @@ public:
}
MachineFunctionProperties &reset(const MachineFunctionProperties &MFP) {
- Properties.reset(MFP.Properties);
+ Properties &= ~MFP.Properties;
return *this;
}
// Returns true if all properties set in V (i.e. required by a pass) are set
// in this.
bool verifyRequiredProperties(const MachineFunctionProperties &V) const {
- return !V.Properties.test(Properties);
+ return (Properties | ~V.Properties).all();
}
/// Print the MachineFunctionProperties in human-readable form.
void print(raw_ostream &OS) const;
private:
- BitVector Properties =
- BitVector(static_cast<unsigned>(Property::LastProperty)+1);
+ std::bitset<static_cast<unsigned>(Property::LastProperty) + 1> Properties;
};
struct SEHHandler {
diff --git a/llvm/include/llvm/CodeGen/SDPatternMatch.h b/llvm/include/llvm/CodeGen/SDPatternMatch.h
index 7970441c8371..071a27a79506 100644
--- a/llvm/include/llvm/CodeGen/SDPatternMatch.h
+++ b/llvm/include/llvm/CodeGen/SDPatternMatch.h
@@ -330,9 +330,7 @@ template <typename... Preds> struct And {
template <typename Pred, typename... Preds>
struct And<Pred, Preds...> : And<Preds...> {
Pred P;
- And(Pred &&p, Preds &&...preds)
- : And<Preds...>(std::forward<Preds>(preds)...), P(std::forward<Pred>(p)) {
- }
+ And(const Pred &p, const Preds &...preds) : And<Preds...>(preds...), P(p) {}
template <typename MatchContext>
bool match(const MatchContext &Ctx, SDValue N) {
@@ -349,8 +347,7 @@ template <typename... Preds> struct Or {
template <typename Pred, typename... Preds>
struct Or<Pred, Preds...> : Or<Preds...> {
Pred P;
- Or(Pred &&p, Preds &&...preds)
- : Or<Preds...>(std::forward<Preds>(preds)...), P(std::forward<Pred>(p)) {}
+ Or(const Pred &p, const Preds &...preds) : Or<Preds...>(preds...), P(p) {}
template <typename MatchContext>
bool match(const MatchContext &Ctx, SDValue N) {
@@ -376,16 +373,16 @@ template <typename Pred> inline Not<Pred> m_Unless(const Pred &P) {
return Not{P};
}
-template <typename... Preds> And<Preds...> m_AllOf(Preds &&...preds) {
- return And<Preds...>(std::forward<Preds>(preds)...);
+template <typename... Preds> And<Preds...> m_AllOf(const Preds &...preds) {
+ return And<Preds...>(preds...);
}
-template <typename... Preds> Or<Preds...> m_AnyOf(Preds &&...preds) {
- return Or<Preds...>(std::forward<Preds>(preds)...);
+template <typename... Preds> Or<Preds...> m_AnyOf(const Preds &...preds) {
+ return Or<Preds...>(preds...);
}
-template <typename... Preds> auto m_NoneOf(Preds &&...preds) {
- return m_Unless(m_AnyOf(std::forward<Preds>(preds)...));
+template <typename... Preds> auto m_NoneOf(const Preds &...preds) {
+ return m_Unless(m_AnyOf(preds...));
}
// === Generic node matching ===
@@ -402,10 +399,8 @@ struct Operands_match<OpIdx, OpndPred, OpndPreds...>
: Operands_match<OpIdx + 1, OpndPreds...> {
OpndPred P;
- Operands_match(OpndPred &&p, OpndPreds &&...preds)
- : Operands_match<OpIdx + 1, OpndPreds...>(
- std::forward<OpndPreds>(preds)...),
- P(std::forward<OpndPred>(p)) {}
+ Operands_match(const OpndPred &p, const OpndPreds &...preds)
+ : Operands_match<OpIdx + 1, OpndPreds...>(preds...), P(p) {}
template <typename MatchContext>
bool match(const MatchContext &Ctx, SDValue N) {
@@ -419,9 +414,8 @@ struct Operands_match<OpIdx, OpndPred, OpndPreds...>
};
template <typename... OpndPreds>
-auto m_Node(unsigned Opcode, OpndPreds &&...preds) {
- return m_AllOf(m_Opc(Opcode), Operands_match<0, OpndPreds...>(
- std::forward<OpndPreds>(preds)...));
+auto m_Node(unsigned Opcode, const OpndPreds &...preds) {
+ return m_AllOf(m_Opc(Opcode), Operands_match<0, OpndPreds...>(preds...));
}
/// Provide number of operands that are not chain or glue, as well as the first
@@ -647,10 +641,9 @@ template <typename Opnd> inline UnaryOpc_match<Opnd> m_ZExt(const Opnd &Op) {
return UnaryOpc_match<Opnd>(ISD::ZERO_EXTEND, Op);
}
-template <typename Opnd> inline auto m_SExt(Opnd &&Op) {
- return m_AnyOf(
- UnaryOpc_match<Opnd>(ISD::SIGN_EXTEND, Op),
- m_Node(ISD::SIGN_EXTEND_INREG, std::forward<Opnd>(Op), m_Value()));
+template <typename Opnd> inline auto m_SExt(const Opnd &Op) {
+ return m_AnyOf(UnaryOpc_match<Opnd>(ISD::SIGN_EXTEND, Op),
+ m_Node(ISD::SIGN_EXTEND_INREG, Op, m_Value()));
}
template <typename Opnd> inline UnaryOpc_match<Opnd> m_AnyExt(const Opnd &Op) {
@@ -663,30 +656,28 @@ template <typename Opnd> inline UnaryOpc_match<Opnd> m_Trunc(const Opnd &Op) {
/// Match a zext or identity
/// Allows to peek through optional extensions
-template <typename Opnd> inline auto m_ZExtOrSelf(Opnd &&Op) {
- return m_AnyOf(m_ZExt(std::forward<Opnd>(Op)), std::forward<Opnd>(Op));
+template <typename Opnd> inline auto m_ZExtOrSelf(const Opnd &Op) {
+ return m_AnyOf(m_ZExt(Op), Op);
}
/// Match a sext or identity
/// Allows to peek through optional extensions
-template <typename Opnd> inline auto m_SExtOrSelf(Opnd &&Op) {
- return m_AnyOf(m_SExt(std::forward<Opnd>(Op)), std::forward<Opnd>(Op));
+template <typename Opnd> inline auto m_SExtOrSelf(const Opnd &Op) {
+ return m_AnyOf(m_SExt(Op), Op);
}
/// Match a aext or identity
/// Allows to peek through optional extensions
template <typename Opnd>
-inline Or<UnaryOpc_match<Opnd>, Opnd> m_AExtOrSelf(Opnd &&Op) {
- return Or<UnaryOpc_match<Opnd>, Opnd>(m_AnyExt(std::forward<Opnd>(Op)),
- std::forward<Opnd>(Op));
+inline Or<UnaryOpc_match<Opnd>, Opnd> m_AExtOrSelf(const Opnd &Op) {
+ return Or<UnaryOpc_match<Opnd>, Opnd>(m_AnyExt(Op), Op);
}
/// Match a trunc or identity
/// Allows to peek through optional truncations
template <typename Opnd>
-inline Or<UnaryOpc_match<Opnd>, Opnd> m_TruncOrSelf(Opnd &&Op) {
- return Or<UnaryOpc_match<Opnd>, Opnd>(m_Trunc(std::forward<Opnd>(Op)),
- std::forward<Opnd>(Op));
+inline Or<UnaryOpc_match<Opnd>, Opnd> m_TruncOrSelf(const Opnd &Op) {
+ return Or<UnaryOpc_match<Opnd>, Opnd>(m_Trunc(Op), Op);
}
// === Constants ===
diff --git a/llvm/include/llvm/CodeGen/SelectionDAG.h b/llvm/include/llvm/CodeGen/SelectionDAG.h
index 48cb0cdf851c..7b0e5e7d9504 100644
--- a/llvm/include/llvm/CodeGen/SelectionDAG.h
+++ b/llvm/include/llvm/CodeGen/SelectionDAG.h
@@ -1893,7 +1893,8 @@ public:
const SDNode *N2);
SDValue FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT,
- ArrayRef<SDValue> Ops);
+ ArrayRef<SDValue> Ops,
+ SDNodeFlags Flags = SDNodeFlags());
/// Fold floating-point operations when all operands are constants and/or
/// undefined.
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index d1912b1c4c0f..aa7a32e86ad8 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -96,7 +96,7 @@ class Value;
namespace Sched {
-enum Preference {
+enum Preference : uint8_t {
None, // No preference
Source, // Follow source order.
RegPressure, // Scheduling for lowest register pressure.
diff --git a/llvm/include/llvm/IR/CallingConv.h b/llvm/include/llvm/IR/CallingConv.h
index a05d1a4d5878..55e32028e3ed 100644
--- a/llvm/include/llvm/IR/CallingConv.h
+++ b/llvm/include/llvm/IR/CallingConv.h
@@ -267,6 +267,9 @@ namespace CallingConv {
/// Calling convention used for RISC-V V-extension.
RISCV_VectorCall = 110,
+ /// Preserve X1-X15, X19-X29, SP, Z0-Z31, P0-P15.
+ AArch64_SME_ABI_Support_Routines_PreserveMost_From_X1 = 111,
+
/// The highest possible ID. Must be some 2^k - 1.
MaxID = 1023
};
diff --git a/llvm/include/llvm/IR/IRBuilderFolder.h b/llvm/include/llvm/IR/IRBuilderFolder.h
index 3c42eb2cf2a5..921001c8a5d5 100644
--- a/llvm/include/llvm/IR/IRBuilderFolder.h
+++ b/llvm/include/llvm/IR/IRBuilderFolder.h
@@ -15,6 +15,7 @@
#define LLVM_IR_IRBUILDERFOLDER_H
#include "llvm/ADT/ArrayRef.h"
+#include "llvm/IR/GEPNoWrapFlags.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instruction.h"
diff --git a/llvm/include/llvm/IR/InstrTypes.h b/llvm/include/llvm/IR/InstrTypes.h
index 9dd1bb455a71..ad649b53761a 100644
--- a/llvm/include/llvm/IR/InstrTypes.h
+++ b/llvm/include/llvm/IR/InstrTypes.h
@@ -1588,6 +1588,14 @@ public:
static CallBase *removeOperandBundle(CallBase *CB, uint32_t ID,
BasicBlock::iterator InsertPt);
+ /// Return the convergence control token for this call, if it exists.
+ Value *getConvergenceControlToken() const {
+ if (auto Bundle = getOperandBundle(llvm::LLVMContext::OB_convergencectrl)) {
+ return Bundle->Inputs[0].get();
+ }
+ return nullptr;
+ }
+
static bool classof(const Instruction *I) {
return I->getOpcode() == Instruction::Call ||
I->getOpcode() == Instruction::Invoke ||
@@ -2120,6 +2128,15 @@ public:
return Attrs.getParamStackAlignment(ArgNo);
}
+ /// Extract the byref type for a call or parameter.
+ Type *getParamByRefType(unsigned ArgNo) const {
+ if (auto *Ty = Attrs.getParamByRefType(ArgNo))
+ return Ty;
+ if (const Function *F = getCalledFunction())
+ return F->getAttributes().getParamByRefType(ArgNo);
+ return nullptr;
+ }
+
/// Extract the byval type for a call or parameter.
Type *getParamByValType(unsigned ArgNo) const {
if (auto *Ty = Attrs.getParamByValType(ArgNo))
diff --git a/llvm/include/llvm/IR/IntrinsicInst.h b/llvm/include/llvm/IR/IntrinsicInst.h
index fcd3a1025ac1..9010e1a1c896 100644
--- a/llvm/include/llvm/IR/IntrinsicInst.h
+++ b/llvm/include/llvm/IR/IntrinsicInst.h
@@ -1799,17 +1799,14 @@ public:
return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
}
- // Returns the convergence intrinsic referenced by |I|'s convergencectrl
- // attribute if any.
- static IntrinsicInst *getParentConvergenceToken(Instruction *I) {
- auto *CI = dyn_cast<llvm::CallInst>(I);
- if (!CI)
- return nullptr;
-
- auto Bundle = CI->getOperandBundle(llvm::LLVMContext::OB_convergencectrl);
- assert(Bundle->Inputs.size() == 1 &&
- Bundle->Inputs[0]->getType()->isTokenTy());
- return dyn_cast<llvm::IntrinsicInst>(Bundle->Inputs[0].get());
+ bool isAnchor() {
+ return getIntrinsicID() == Intrinsic::experimental_convergence_anchor;
+ }
+ bool isEntry() {
+ return getIntrinsicID() == Intrinsic::experimental_convergence_entry;
+ }
+ bool isLoop() {
+ return getIntrinsicID() == Intrinsic::experimental_convergence_loop;
}
};
diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h
index c4c1825bbf09..8803ef5a90e6 100644
--- a/llvm/include/llvm/InitializePasses.h
+++ b/llvm/include/llvm/InitializePasses.h
@@ -106,6 +106,7 @@ void initializeExpandLargeDivRemLegacyPassPass(PassRegistry&);
void initializeExpandMemCmpLegacyPassPass(PassRegistry &);
void initializeExpandPostRAPass(PassRegistry&);
void initializeExpandReductionsPass(PassRegistry&);
+void initializeExpandVariadicsPass(PassRegistry &);
void initializeExpandVectorPredicationPass(PassRegistry &);
void initializeExternalAAWrapperPassPass(PassRegistry&);
void initializeFEntryInserterPass(PassRegistry&);
diff --git a/llvm/include/llvm/MC/MCSymbolWasm.h b/llvm/include/llvm/MC/MCSymbolWasm.h
index 0ce95c72a73f..0c2b97a59423 100644
--- a/llvm/include/llvm/MC/MCSymbolWasm.h
+++ b/llvm/include/llvm/MC/MCSymbolWasm.h
@@ -114,9 +114,11 @@ public:
return isTable() && hasTableType() &&
getTableType().ElemType == wasm::ValType::FUNCREF;
}
- void setFunctionTable() {
+ void setFunctionTable(bool is64) {
setType(wasm::WASM_SYMBOL_TYPE_TABLE);
- setTableType(wasm::ValType::FUNCREF);
+ uint8_t flags =
+ is64 ? wasm::WASM_LIMITS_FLAG_IS_64 : wasm::WASM_LIMITS_FLAG_NONE;
+ setTableType(wasm::ValType::FUNCREF, flags);
}
void setUsedInGOT() const { IsUsedInGOT = true; }
@@ -140,10 +142,11 @@ public:
return *TableType;
}
void setTableType(wasm::WasmTableType TT) { TableType = TT; }
- void setTableType(wasm::ValType VT) {
+ void setTableType(wasm::ValType VT,
+ uint8_t flags = wasm::WASM_LIMITS_FLAG_NONE) {
// Declare a table with element type VT and no limits (min size 0, no max
// size).
- wasm::WasmLimits Limits = {wasm::WASM_LIMITS_FLAG_NONE, 0, 0};
+ wasm::WasmLimits Limits = {flags, 0, 0};
setTableType({VT, Limits});
}
};
diff --git a/llvm/include/llvm/ProfileData/MemProf.h b/llvm/include/llvm/ProfileData/MemProf.h
index 406144d9db1e..6b0fa6cd6541 100644
--- a/llvm/include/llvm/ProfileData/MemProf.h
+++ b/llvm/include/llvm/ProfileData/MemProf.h
@@ -199,7 +199,7 @@ struct Frame {
GlobalValue::GUID Function;
// The symbol name for the function. Only populated in the Frame by the reader
// if requested during initialization. This field should not be serialized.
- std::optional<std::string> SymbolName;
+ std::unique_ptr<std::string> SymbolName;
// The source line offset of the call from the beginning of parent function.
uint32_t LineOffset;
// The source column number of the call to help distinguish multiple calls
@@ -210,7 +210,9 @@ struct Frame {
Frame(const Frame &Other) {
Function = Other.Function;
- SymbolName = Other.SymbolName;
+ SymbolName = Other.SymbolName
+ ? std::make_unique<std::string>(*Other.SymbolName)
+ : nullptr;
LineOffset = Other.LineOffset;
Column = Other.Column;
IsInlineFrame = Other.IsInlineFrame;
@@ -228,7 +230,9 @@ struct Frame {
Frame &operator=(const Frame &Other) {
Function = Other.Function;
- SymbolName = Other.SymbolName;
+ SymbolName = Other.SymbolName
+ ? std::make_unique<std::string>(*Other.SymbolName)
+ : nullptr;
LineOffset = Other.LineOffset;
Column = Other.Column;
IsInlineFrame = Other.IsInlineFrame;
@@ -237,10 +241,10 @@ struct Frame {
bool operator!=(const Frame &Other) const { return !operator==(Other); }
- bool hasSymbolName() const { return SymbolName.has_value(); }
+ bool hasSymbolName() const { return !!SymbolName; }
StringRef getSymbolName() const {
- assert(SymbolName.has_value());
+ assert(hasSymbolName());
return *SymbolName;
}
@@ -364,7 +368,7 @@ struct IndexedAllocationInfo {
// be used for temporary in-memory instances.
struct AllocationInfo {
// Same as IndexedAllocationInfo::CallStack with the frame contents inline.
- llvm::SmallVector<Frame> CallStack;
+ std::vector<Frame> CallStack;
// Same as IndexedAllocationInfo::Info;
PortableMemInfoBlock Info;
@@ -446,8 +450,7 @@ struct IndexedMemProfRecord {
// Convert IndexedMemProfRecord to MemProfRecord. Callback is used to
// translate CallStackId to call stacks with frames inline.
MemProfRecord toMemProfRecord(
- llvm::function_ref<llvm::SmallVector<Frame>(const CallStackId)> Callback)
- const;
+ llvm::function_ref<std::vector<Frame>(const CallStackId)> Callback) const;
// Returns the GUID for the function name after canonicalization. For
// memprof, we remove any .llvm suffix added by LTO. MemProfRecords are
@@ -462,7 +465,7 @@ struct MemProfRecord {
// Same as IndexedMemProfRecord::AllocSites with frame contents inline.
llvm::SmallVector<AllocationInfo> AllocSites;
// Same as IndexedMemProfRecord::CallSites with frame contents inline.
- llvm::SmallVector<llvm::SmallVector<Frame>> CallSites;
+ llvm::SmallVector<std::vector<Frame>> CallSites;
MemProfRecord() = default;
MemProfRecord(
@@ -472,7 +475,7 @@ struct MemProfRecord {
AllocSites.emplace_back(IndexedAI, IdToFrameCallback);
}
for (const ArrayRef<FrameId> Site : Record.CallSites) {
- llvm::SmallVector<Frame> Frames;
+ std::vector<Frame> Frames;
for (const FrameId Id : Site) {
Frames.push_back(IdToFrameCallback(Id));
}
@@ -490,7 +493,7 @@ struct MemProfRecord {
if (!CallSites.empty()) {
OS << " CallSites:\n";
- for (const llvm::SmallVector<Frame> &Frames : CallSites) {
+ for (const std::vector<Frame> &Frames : CallSites) {
for (const Frame &F : Frames) {
OS << " -\n";
F.printYAML(OS);
@@ -844,8 +847,8 @@ template <typename MapTy> struct CallStackIdConverter {
CallStackIdConverter(const CallStackIdConverter &) = delete;
CallStackIdConverter &operator=(const CallStackIdConverter &) = delete;
- llvm::SmallVector<Frame> operator()(CallStackId CSId) {
- llvm::SmallVector<Frame> Frames;
+ std::vector<Frame> operator()(CallStackId CSId) {
+ std::vector<Frame> Frames;
auto CSIter = Map.find(CSId);
if (CSIter == Map.end()) {
LastUnmappedId = CSId;
@@ -886,8 +889,8 @@ struct LinearCallStackIdConverter {
std::function<Frame(LinearFrameId)> FrameIdToFrame)
: CallStackBase(CallStackBase), FrameIdToFrame(FrameIdToFrame) {}
- llvm::SmallVector<Frame> operator()(LinearCallStackId LinearCSId) {
- llvm::SmallVector<Frame> Frames;
+ std::vector<Frame> operator()(LinearCallStackId LinearCSId) {
+ std::vector<Frame> Frames;
const unsigned char *Ptr =
CallStackBase +
@@ -920,6 +923,113 @@ struct IndexedMemProfData {
llvm::MapVector<CallStackId, llvm::SmallVector<FrameId>> CallStackData;
};
+// Construct a radix tree of call stacks.
+//
+// A set of call stacks might look like:
+//
+// CallStackId 1: f1 -> f2 -> f3
+// CallStackId 2: f1 -> f2 -> f4 -> f5
+// CallStackId 3: f1 -> f2 -> f4 -> f6
+// CallStackId 4: f7 -> f8 -> f9
+//
+// where each fn refers to a stack frame.
+//
+// Since we expect a lot of common prefixes, we can compress the call stacks
+// into a radix tree like:
+//
+// CallStackId 1: f1 -> f2 -> f3
+// |
+// CallStackId 2: +---> f4 -> f5
+// |
+// CallStackId 3: +---> f6
+//
+// CallStackId 4: f7 -> f8 -> f9
+//
+// Now, we are interested in retrieving call stacks for a given CallStackId, so
+// we just need a pointer from a given call stack to its parent. For example,
+// CallStackId 2 would point to CallStackId 1 as a parent.
+//
+// We serialize the radix tree above into a single array along with the length
+// of each call stack and pointers to the parent call stacks.
+//
+// Index: 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14
+// Array: L3 f9 f8 f7 L4 f6 J3 L4 f5 f4 J3 L3 f3 f2 f1
+// ^ ^ ^ ^
+// | | | |
+// CallStackId 4: 0 --+ | | |
+// CallStackId 3: 4 --------------+ | |
+// CallStackId 2: 7 -----------------------+ |
+// CallStackId 1: 11 -----------------------------------+
+//
+// - LN indicates the length of a call stack, encoded as ordinary integer N.
+//
+// - JN indicates a pointer to the parent, encoded as -N.
+//
+// The radix tree allows us to reconstruct call stacks in the leaf-to-root
+// order as we scan the array from left ro right while following pointers to
+// parents along the way
+//
+// For example, if we are decoding CallStackId 2, we start a forward traversal
+// at Index 7, noting the call stack length of 4 and obtaining f5 and f4. When
+// we see J3 at Index 10, we resume a forward traversal at Index 13 = 10 + 3,
+// picking up f2 and f1. We are done after collecting 4 frames as indicated at
+// the beginning of the traversal.
+//
+// On-disk IndexedMemProfRecord will refer to call stacks by their indexes into
+// the radix tree array, so we do not explicitly encode mappings like:
+// "CallStackId 1 -> 11".
+class CallStackRadixTreeBuilder {
+ // The radix tree array.
+ std::vector<LinearFrameId> RadixArray;
+
+ // Mapping from CallStackIds to indexes into RadixArray.
+ llvm::DenseMap<CallStackId, LinearCallStackId> CallStackPos;
+
+ // In build, we partition a given call stack into two parts -- the prefix
+ // that's common with the previously encoded call stack and the frames beyond
+ // the common prefix -- the unique portion. Then we want to find out where
+ // the common prefix is stored in RadixArray so that we can link the unique
+ // portion to the common prefix. Indexes, declared below, helps with our
+ // needs. Intuitively, Indexes tells us where each of the previously encoded
+ // call stack is stored in RadixArray. More formally, Indexes satisfies:
+ //
+ // RadixArray[Indexes[I]] == Prev[I]
+ //
+ // for every I, where Prev is the the call stack in the root-to-leaf order
+ // previously encoded by build. (Note that Prev, as passed to
+ // encodeCallStack, is in the leaf-to-root order.)
+ //
+ // For example, if the call stack being encoded shares 5 frames at the root of
+ // the call stack with the previously encoded call stack,
+ // RadixArray[Indexes[0]] is the root frame of the common prefix.
+ // RadixArray[Indexes[5 - 1]] is the last frame of the common prefix.
+ std::vector<LinearCallStackId> Indexes;
+
+ using CSIdPair = std::pair<CallStackId, llvm::SmallVector<FrameId>>;
+
+ // Encode a call stack into RadixArray. Return the starting index within
+ // RadixArray.
+ LinearCallStackId encodeCallStack(
+ const llvm::SmallVector<FrameId> *CallStack,
+ const llvm::SmallVector<FrameId> *Prev,
+ const llvm::DenseMap<FrameId, LinearFrameId> &MemProfFrameIndexes);
+
+public:
+ CallStackRadixTreeBuilder() = default;
+
+ // Build a radix tree array.
+ void build(llvm::MapVector<CallStackId, llvm::SmallVector<FrameId>>
+ &&MemProfCallStackData,
+ const llvm::DenseMap<FrameId, LinearFrameId> &MemProfFrameIndexes);
+
+ const std::vector<LinearFrameId> &getRadixArray() const { return RadixArray; }
+
+ const llvm::DenseMap<CallStackId, LinearCallStackId> &
+ getCallStackPos() const {
+ return CallStackPos;
+ }
+};
+
// Verify that each CallStackId is computed with hashCallStack. This function
// is intended to help transition from CallStack to CSId in
// IndexedAllocationInfo.
diff --git a/llvm/include/llvm/Support/Endian.h b/llvm/include/llvm/Support/Endian.h
index 30e0852b972c..5831fe66a1f7 100644
--- a/llvm/include/llvm/Support/Endian.h
+++ b/llvm/include/llvm/Support/Endian.h
@@ -72,7 +72,8 @@ template <typename value_type, endianness endian, std::size_t alignment>
/// Read a value of a particular endianness from a buffer, and increment the
/// buffer past that value.
-template <typename value_type, std::size_t alignment, typename CharT>
+template <typename value_type, std::size_t alignment = unaligned,
+ typename CharT>
[[nodiscard]] inline value_type readNext(const CharT *&memory,
endianness endian) {
value_type ret = read<value_type, alignment>(memory, endian);
diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index 1ea2652871ab..bd43b9589903 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -1605,6 +1605,37 @@ def insert_vector_elt_oob : GICombineRule<
[{ return Helper.matchInsertVectorElementOOB(*${root}, ${matchinfo}); }]),
(apply [{ Helper.applyBuildFn(*${root}, ${matchinfo}); }])>;
+def add_of_vscale : GICombineRule<
+ (defs root:$root, build_fn_matchinfo:$matchinfo),
+ (match (G_VSCALE $left, $imm1),
+ (G_VSCALE $right, $imm2),
+ (G_ADD $root, $left, $right, (MIFlags NoSWrap)),
+ [{ return Helper.matchAddOfVScale(${root}, ${matchinfo}); }]),
+ (apply [{ Helper.applyBuildFnMO(${root}, ${matchinfo}); }])>;
+
+def mul_of_vscale : GICombineRule<
+ (defs root:$root, build_fn_matchinfo:$matchinfo),
+ (match (G_VSCALE $left, $scale),
+ (G_CONSTANT $x, $imm1),
+ (G_MUL $root, $left, $x, (MIFlags NoSWrap)),
+ [{ return Helper.matchMulOfVScale(${root}, ${matchinfo}); }]),
+ (apply [{ Helper.applyBuildFnMO(${root}, ${matchinfo}); }])>;
+
+def shl_of_vscale : GICombineRule<
+ (defs root:$root, build_fn_matchinfo:$matchinfo),
+ (match (G_VSCALE $left, $imm),
+ (G_CONSTANT $x, $imm1),
+ (G_SHL $root, $left, $x, (MIFlags NoSWrap)),
+ [{ return Helper.matchShlOfVScale(${root}, ${matchinfo}); }]),
+ (apply [{ Helper.applyBuildFnMO(${root}, ${matchinfo}); }])>;
+
+def sub_of_vscale : GICombineRule<
+ (defs root:$root, build_fn_matchinfo:$matchinfo),
+ (match (G_VSCALE $right, $imm),
+ (G_SUB $root, $x, $right, (MIFlags NoSWrap)),
+ [{ return Helper.matchSubOfVScale(${root}, ${matchinfo}); }]),
+ (apply [{ Helper.applyBuildFnMO(${root}, ${matchinfo}); }])>;
+
// match_extract_of_element and insert_vector_elt_oob must be the first!
def vector_ops_combines: GICombineGroup<[
match_extract_of_element_undef_vector,
@@ -1637,7 +1668,11 @@ extract_vector_element_build_vector_trunc6,
extract_vector_element_build_vector_trunc7,
extract_vector_element_build_vector_trunc8,
extract_vector_element_shuffle_vector,
-insert_vector_element_extract_vector_element
+insert_vector_element_extract_vector_element,
+add_of_vscale,
+mul_of_vscale,
+shl_of_vscale,
+sub_of_vscale,
]>;
diff --git a/llvm/include/llvm/TargetParser/AArch64TargetParser.h b/llvm/include/llvm/TargetParser/AArch64TargetParser.h
index 5025ab2491de..afe6789ceb25 100644
--- a/llvm/include/llvm/TargetParser/AArch64TargetParser.h
+++ b/llvm/include/llvm/TargetParser/AArch64TargetParser.h
@@ -577,6 +577,11 @@ inline constexpr CpuInfo CpuInfos[] = {
AArch64::AEK_SHA2, AArch64::AEK_AES,
AArch64::AEK_MTE, AArch64::AEK_SB,
AArch64::AEK_SSBS, AArch64::AEK_CSSC})},
+ {"oryon-1", ARMV8_6A,
+ (AArch64::ExtensionBitset({AArch64::AEK_AES, AArch64::AEK_CRYPTO,
+ AArch64::AEK_RAND, AArch64::AEK_SM4,
+ AArch64::AEK_SHA3, AArch64::AEK_SHA2,
+ AArch64::AEK_PROFILE}))},
};
// Name alias.
diff --git a/llvm/include/llvm/TargetParser/TargetParser.h b/llvm/include/llvm/TargetParser/TargetParser.h
index 8fc6fa37cb9a..e03d8f6eebfc 100644
--- a/llvm/include/llvm/TargetParser/TargetParser.h
+++ b/llvm/include/llvm/TargetParser/TargetParser.h
@@ -105,6 +105,7 @@ enum GPUKind : uint32_t {
GK_GFX1103 = 93,
GK_GFX1150 = 94,
GK_GFX1151 = 95,
+ GK_GFX1152 = 96,
GK_GFX1200 = 100,
GK_GFX1201 = 101,
diff --git a/llvm/include/llvm/Transforms/IPO/ExpandVariadics.h b/llvm/include/llvm/Transforms/IPO/ExpandVariadics.h
new file mode 100644
index 000000000000..4c5a1b61e2d4
--- /dev/null
+++ b/llvm/include/llvm/Transforms/IPO/ExpandVariadics.h
@@ -0,0 +1,40 @@
+//===- ExpandVariadics.h - expand variadic functions ------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_TRANSFORMS_IPO_EXPANDVARIADICS_H
+#define LLVM_TRANSFORMS_IPO_EXPANDVARIADICS_H
+
+#include "llvm/IR/PassManager.h"
+
+namespace llvm {
+
+class Module;
+class ModulePass;
+class OptimizationLevel;
+
+enum class ExpandVariadicsMode {
+ Unspecified, // Use the implementation defaults
+ Disable, // Disable the pass entirely
+ Optimize, // Optimise without changing ABI
+ Lowering, // Change variadic calling convention
+};
+
+class ExpandVariadicsPass : public PassInfoMixin<ExpandVariadicsPass> {
+ const ExpandVariadicsMode Mode;
+
+public:
+ // Operates under passed mode unless overridden on commandline
+ ExpandVariadicsPass(ExpandVariadicsMode Mode);
+
+ PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
+};
+
+ModulePass *createExpandVariadicsPass(ExpandVariadicsMode);
+
+} // end namespace llvm
+
+#endif // LLVM_TRANSFORMS_IPO_EXPANDVARIADICS_H
diff --git a/llvm/include/llvm/Transforms/Utils/UnrollLoop.h b/llvm/include/llvm/Transforms/Utils/UnrollLoop.h
index bd804dc11266..797c082333a7 100644
--- a/llvm/include/llvm/Transforms/Utils/UnrollLoop.h
+++ b/llvm/include/llvm/Transforms/Utils/UnrollLoop.h
@@ -16,6 +16,7 @@
#define LLVM_TRANSFORMS_UTILS_UNROLLLOOP_H
#include "llvm/ADT/DenseMap.h"
+#include "llvm/Analysis/CodeMetrics.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Support/InstructionCost.h"
@@ -73,6 +74,7 @@ struct UnrollLoopOptions {
bool AllowExpensiveTripCount;
bool UnrollRemainder;
bool ForgetAllSCEV;
+ const Instruction *Heart = nullptr;
};
LoopUnrollResult UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
@@ -128,14 +130,15 @@ class UnrollCostEstimator {
public:
unsigned NumInlineCandidates;
- bool Convergent;
+ ConvergenceKind Convergence;
+ bool ConvergenceAllowsRuntime;
UnrollCostEstimator(const Loop *L, const TargetTransformInfo &TTI,
const SmallPtrSetImpl<const Value *> &EphValues,
unsigned BEInsns);
/// Whether it is legal to unroll this loop.
- bool canUnroll() const { return LoopSize.isValid() && !NotDuplicatable; }
+ bool canUnroll() const;
uint64_t getRolledLoopSize() const { return *LoopSize.getValue(); }
diff --git a/llvm/lib/Analysis/CodeMetrics.cpp b/llvm/lib/Analysis/CodeMetrics.cpp
index 2637e2f97dbb..ea67b526423b 100644
--- a/llvm/lib/Analysis/CodeMetrics.cpp
+++ b/llvm/lib/Analysis/CodeMetrics.cpp
@@ -16,6 +16,7 @@
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/Function.h"
+#include "llvm/IR/IntrinsicInst.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/InstructionCost.h"
@@ -111,11 +112,24 @@ void CodeMetrics::collectEphemeralValues(
completeEphemeralValues(Visited, Worklist, EphValues);
}
+static bool extendsConvergenceOutsideLoop(const Instruction &I, const Loop *L) {
+ if (!L)
+ return false;
+ if (!isa<ConvergenceControlInst>(I))
+ return false;
+ for (const auto *U : I.users()) {
+ if (!L->contains(cast<Instruction>(U)))
+ return true;
+ }
+ return false;
+}
+
/// Fill in the current structure with information gleaned from the specified
/// block.
void CodeMetrics::analyzeBasicBlock(
const BasicBlock *BB, const TargetTransformInfo &TTI,
- const SmallPtrSetImpl<const Value *> &EphValues, bool PrepareForLTO) {
+ const SmallPtrSetImpl<const Value *> &EphValues, bool PrepareForLTO,
+ const Loop *L) {
++NumBlocks;
InstructionCost NumInstsBeforeThisBB = NumInsts;
for (const Instruction &I : *BB) {
@@ -163,19 +177,38 @@ void CodeMetrics::analyzeBasicBlock(
if (isa<ExtractElementInst>(I) || I.getType()->isVectorTy())
++NumVectorInsts;
- if (I.getType()->isTokenTy() && I.isUsedOutsideOfBlock(BB))
+ if (I.getType()->isTokenTy() && !isa<ConvergenceControlInst>(I) &&
+ I.isUsedOutsideOfBlock(BB)) {
+ LLVM_DEBUG(dbgs() << I
+ << "\n Cannot duplicate a token value used outside "
+ "the current block (except convergence control).\n");
notDuplicatable = true;
-
- if (const CallInst *CI = dyn_cast<CallInst>(&I)) {
- if (CI->cannotDuplicate())
- notDuplicatable = true;
- if (CI->isConvergent())
- convergent = true;
}
- if (const InvokeInst *InvI = dyn_cast<InvokeInst>(&I))
- if (InvI->cannotDuplicate())
+ if (const CallBase *CB = dyn_cast<CallBase>(&I)) {
+ if (CB->cannotDuplicate())
notDuplicatable = true;
+ // Compute a meet over the visited blocks for the following partial order:
+ //
+ // None -> { Controlled, ExtendedLoop, Uncontrolled}
+ // Controlled -> ExtendedLoop
+ if (Convergence <= ConvergenceKind::Controlled && CB->isConvergent()) {
+ if (isa<ConvergenceControlInst>(CB) ||
+ CB->getConvergenceControlToken()) {
+ assert(Convergence != ConvergenceKind::Uncontrolled);
+ LLVM_DEBUG(dbgs() << "Found controlled convergence:\n" << I << "\n");
+ if (extendsConvergenceOutsideLoop(I, L))
+ Convergence = ConvergenceKind::ExtendedLoop;
+ else {
+ assert(Convergence != ConvergenceKind::ExtendedLoop);
+ Convergence = ConvergenceKind::Controlled;
+ }
+ } else {
+ assert(Convergence == ConvergenceKind::None);
+ Convergence = ConvergenceKind::Uncontrolled;
+ }
+ }
+ }
NumInsts += TTI.getInstructionCost(&I, TargetTransformInfo::TCK_CodeSize);
}
diff --git a/llvm/lib/Analysis/LoopInfo.cpp b/llvm/lib/Analysis/LoopInfo.cpp
index 369ab087ffc0..c34c4974382e 100644
--- a/llvm/lib/Analysis/LoopInfo.cpp
+++ b/llvm/lib/Analysis/LoopInfo.cpp
@@ -1105,6 +1105,26 @@ int llvm::getIntLoopAttribute(const Loop *TheLoop, StringRef Name,
return getOptionalIntLoopAttribute(TheLoop, Name).value_or(Default);
}
+CallBase *llvm::getLoopConvergenceHeart(const Loop *TheLoop) {
+ BasicBlock *H = TheLoop->getHeader();
+ for (Instruction &II : *H) {
+ if (auto *CB = dyn_cast<CallBase>(&II)) {
+ if (!CB->isConvergent())
+ continue;
+ // This is the heart if it uses a token defined outside the loop. The
+ // verifier has already checked that only the loop intrinsic can use such
+ // a token.
+ if (auto *Token = CB->getConvergenceControlToken()) {
+ auto *TokenDef = cast<Instruction>(Token);
+ if (!TheLoop->contains(TokenDef->getParent()))
+ return CB;
+ }
+ return nullptr;
+ }
+ }
+ return nullptr;
+}
+
bool llvm::isFinite(const Loop *L) {
return L->getHeader()->getParent()->willReturn();
}
diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp
index 08138a5e2f2d..782c28c94483 100644
--- a/llvm/lib/Analysis/ValueTracking.cpp
+++ b/llvm/lib/Analysis/ValueTracking.cpp
@@ -7296,10 +7296,13 @@ static bool isGuaranteedNotToBeUndefOrPoison(
isa<ConstantPointerNull>(C) || isa<Function>(C))
return true;
- if (C->getType()->isVectorTy() && !isa<ConstantExpr>(C))
- return (!includesUndef(Kind) ? !C->containsPoisonElement()
- : !C->containsUndefOrPoisonElement()) &&
- !C->containsConstantExpression();
+ if (C->getType()->isVectorTy() && !isa<ConstantExpr>(C)) {
+ if (includesUndef(Kind) && C->containsUndefElement())
+ return false;
+ if (includesPoison(Kind) && C->containsPoisonElement())
+ return false;
+ return !C->containsConstantExpression();
+ }
}
// Strip cast operations from a pointer value.
diff --git a/llvm/lib/AsmParser/LLLexer.cpp b/llvm/lib/AsmParser/LLLexer.cpp
index d3ab306904da..7d7fe19568e8 100644
--- a/llvm/lib/AsmParser/LLLexer.cpp
+++ b/llvm/lib/AsmParser/LLLexer.cpp
@@ -604,6 +604,7 @@ lltok::Kind LLLexer::LexIdentifier() {
KEYWORD(aarch64_vector_pcs);
KEYWORD(aarch64_sve_vector_pcs);
KEYWORD(aarch64_sme_preservemost_from_x0);
+ KEYWORD(aarch64_sme_preservemost_from_x1);
KEYWORD(aarch64_sme_preservemost_from_x2);
KEYWORD(msp430_intrcc);
KEYWORD(avr_intrcc);
diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp
index 07c8aa23fc5e..f0fde9ae4df5 100644
--- a/llvm/lib/AsmParser/LLParser.cpp
+++ b/llvm/lib/AsmParser/LLParser.cpp
@@ -2153,6 +2153,7 @@ void LLParser::parseOptionalDLLStorageClass(unsigned &Res) {
/// ::= 'aarch64_vector_pcs'
/// ::= 'aarch64_sve_vector_pcs'
/// ::= 'aarch64_sme_preservemost_from_x0'
+/// ::= 'aarch64_sme_preservemost_from_x1'
/// ::= 'aarch64_sme_preservemost_from_x2'
/// ::= 'msp430_intrcc'
/// ::= 'avr_intrcc'
@@ -2212,6 +2213,9 @@ bool LLParser::parseOptionalCallingConv(unsigned &CC) {
case lltok::kw_aarch64_sme_preservemost_from_x0:
CC = CallingConv::AArch64_SME_ABI_Support_Routines_PreserveMost_From_X0;
break;
+ case lltok::kw_aarch64_sme_preservemost_from_x1:
+ CC = CallingConv::AArch64_SME_ABI_Support_Routines_PreserveMost_From_X1;
+ break;
case lltok::kw_aarch64_sme_preservemost_from_x2:
CC = CallingConv::AArch64_SME_ABI_Support_Routines_PreserveMost_From_X2;
break;
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelperVectorOps.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelperVectorOps.cpp
index b4765fb280f9..66b1c5f8ca82 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelperVectorOps.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelperVectorOps.cpp
@@ -6,7 +6,8 @@
//
//===----------------------------------------------------------------------===//
//
-// This file implements CombinerHelper for G_EXTRACT_VECTOR_ELT.
+// This file implements CombinerHelper for G_EXTRACT_VECTOR_ELT,
+// G_INSERT_VECTOR_ELT, and G_VSCALE
//
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
@@ -400,3 +401,86 @@ bool CombinerHelper::matchInsertVectorElementOOB(MachineInstr &MI,
return false;
}
+
+bool CombinerHelper::matchAddOfVScale(const MachineOperand &MO,
+ BuildFnTy &MatchInfo) {
+ GAdd *Add = cast<GAdd>(MRI.getVRegDef(MO.getReg()));
+ GVScale *LHSVScale = cast<GVScale>(MRI.getVRegDef(Add->getLHSReg()));
+ GVScale *RHSVScale = cast<GVScale>(MRI.getVRegDef(Add->getRHSReg()));
+
+ Register Dst = Add->getReg(0);
+
+ if (!MRI.hasOneNonDBGUse(LHSVScale->getReg(0)) ||
+ !MRI.hasOneNonDBGUse(RHSVScale->getReg(0)))
+ return false;
+
+ MatchInfo = [=](MachineIRBuilder &B) {
+ B.buildVScale(Dst, LHSVScale->getSrc() + RHSVScale->getSrc());
+ };
+
+ return true;
+}
+
+bool CombinerHelper::matchMulOfVScale(const MachineOperand &MO,
+ BuildFnTy &MatchInfo) {
+ GMul *Mul = cast<GMul>(MRI.getVRegDef(MO.getReg()));
+ GVScale *LHSVScale = cast<GVScale>(MRI.getVRegDef(Mul->getLHSReg()));
+
+ std::optional<APInt> MaybeRHS = getIConstantVRegVal(Mul->getRHSReg(), MRI);
+ if (!MaybeRHS)
+ return false;
+
+ Register Dst = MO.getReg();
+
+ if (!MRI.hasOneNonDBGUse(LHSVScale->getReg(0)))
+ return false;
+
+ MatchInfo = [=](MachineIRBuilder &B) {
+ B.buildVScale(Dst, LHSVScale->getSrc() * *MaybeRHS);
+ };
+
+ return true;
+}
+
+bool CombinerHelper::matchSubOfVScale(const MachineOperand &MO,
+ BuildFnTy &MatchInfo) {
+ GSub *Sub = cast<GSub>(MRI.getVRegDef(MO.getReg()));
+ GVScale *RHSVScale = cast<GVScale>(MRI.getVRegDef(Sub->getRHSReg()));
+
+ Register Dst = MO.getReg();
+ LLT DstTy = MRI.getType(Dst);
+
+ if (!MRI.hasOneNonDBGUse(RHSVScale->getReg(0)) ||
+ !isLegalOrBeforeLegalizer({TargetOpcode::G_ADD, DstTy}))
+ return false;
+
+ MatchInfo = [=](MachineIRBuilder &B) {
+ auto VScale = B.buildVScale(DstTy, -RHSVScale->getSrc());
+ B.buildAdd(Dst, Sub->getLHSReg(), VScale, Sub->getFlags());
+ };
+
+ return true;
+}
+
+bool CombinerHelper::matchShlOfVScale(const MachineOperand &MO,
+ BuildFnTy &MatchInfo) {
+ GShl *Shl = cast<GShl>(MRI.getVRegDef(MO.getReg()));
+ GVScale *LHSVScale = cast<GVScale>(MRI.getVRegDef(Shl->getSrcReg()));
+
+ std::optional<APInt> MaybeRHS = getIConstantVRegVal(Shl->getShiftReg(), MRI);
+ if (!MaybeRHS)
+ return false;
+
+ Register Dst = MO.getReg();
+ LLT DstTy = MRI.getType(Dst);
+
+ if (!MRI.hasOneNonDBGUse(LHSVScale->getReg(0)) ||
+ !isLegalOrBeforeLegalizer({TargetOpcode::G_VSCALE, DstTy}))
+ return false;
+
+ MatchInfo = [=](MachineIRBuilder &B) {
+ B.buildVScale(Dst, LHSVScale->getSrc().shl(*MaybeRHS));
+ };
+
+ return true;
+}
diff --git a/llvm/lib/CodeGen/RegisterCoalescer.cpp b/llvm/lib/CodeGen/RegisterCoalescer.cpp
index 3397bd0a6060..a808a541103f 100644
--- a/llvm/lib/CodeGen/RegisterCoalescer.cpp
+++ b/llvm/lib/CodeGen/RegisterCoalescer.cpp
@@ -1339,14 +1339,13 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
if (SrcIdx && DstIdx)
return false;
- [[maybe_unused]] const unsigned DefSubIdx = DefMI->getOperand(0).getSubReg();
+ const unsigned DefSubIdx = DefMI->getOperand(0).getSubReg();
const TargetRegisterClass *DefRC = TII->getRegClass(MCID, 0, TRI, *MF);
if (!DefMI->isImplicitDef()) {
if (DstReg.isPhysical()) {
Register NewDstReg = DstReg;
- unsigned NewDstIdx = TRI->composeSubRegIndices(CP.getSrcIdx(),
- DefMI->getOperand(0).getSubReg());
+ unsigned NewDstIdx = TRI->composeSubRegIndices(CP.getSrcIdx(), DefSubIdx);
if (NewDstIdx)
NewDstReg = TRI->getSubReg(DstReg, NewDstIdx);
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 9a5359015439..02cd125eeff0 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -4041,17 +4041,11 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
return DAG.getNode(ISD::ADD, DL, VT, N0, SExt);
}
- // fold Y = sra (X, size(X)-1); sub (xor (X, Y), Y) -> (abs X)
- if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
- if (N0.getOpcode() == ISD::XOR && N1.getOpcode() == ISD::SRA) {
- SDValue X0 = N0.getOperand(0), X1 = N0.getOperand(1);
- SDValue S0 = N1.getOperand(0);
- if ((X0 == S0 && X1 == N1) || (X0 == N1 && X1 == S0))
- if (ConstantSDNode *C = isConstOrConstSplat(N1.getOperand(1)))
- if (C->getAPIntValue() == (BitWidth - 1))
- return DAG.getNode(ISD::ABS, DL, VT, S0);
- }
- }
+ // fold B = sra (A, size(A)-1); sub (xor (A, B), B) -> (abs A)
+ if (hasOperation(ISD::ABS, VT) &&
+ sd_match(N1, m_Sra(m_Value(A), m_SpecificInt(BitWidth - 1))) &&
+ sd_match(N0, m_Xor(m_Specific(A), m_Specific(N1))))
+ return DAG.getNode(ISD::ABS, DL, VT, A);
// If the relocation model supports it, consider symbol offsets.
if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index 042684a434fd..8cdb4ba0ade6 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -992,11 +992,8 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) {
break;
case ISD::FMINIMUM:
case ISD::FMAXIMUM:
- if (SDValue Expanded = TLI.expandFMINIMUM_FMAXIMUM(Node, DAG)) {
- Results.push_back(Expanded);
- return;
- }
- break;
+ Results.push_back(TLI.expandFMINIMUM_FMAXIMUM(Node, DAG));
+ return;
case ISD::SMIN:
case ISD::SMAX:
case ISD::UMIN:
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 09cdec8adb27..e176cf2cc2a6 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -6333,7 +6333,8 @@ bool SelectionDAG::isUndef(unsigned Opcode, ArrayRef<SDValue> Ops) {
}
SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL,
- EVT VT, ArrayRef<SDValue> Ops) {
+ EVT VT, ArrayRef<SDValue> Ops,
+ SDNodeFlags Flags) {
// If the opcode is a target-specific ISD node, there's nothing we can
// do here and the operand rules may not line up with the below, so
// bail early.
@@ -6690,7 +6691,7 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL,
}
// Constant fold the scalar operands.
- SDValue ScalarResult = getNode(Opcode, DL, SVT, ScalarOps);
+ SDValue ScalarResult = getNode(Opcode, DL, SVT, ScalarOps, Flags);
// Legalize the (integer) scalar constant if necessary.
if (LegalSVT != SVT)
@@ -7261,7 +7262,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
}
// Perform trivial constant folding.
- if (SDValue SV = FoldConstantArithmetic(Opcode, DL, VT, {N1, N2}))
+ if (SDValue SV = FoldConstantArithmetic(Opcode, DL, VT, {N1, N2}, Flags))
return SV;
// Canonicalize an UNDEF to the RHS, even over a constant.
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 64d6fd458c62..2f3626f1c820 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -1684,7 +1684,7 @@ bool SelectionDAGBuilder::handleDebugValue(ArrayRef<const Value *> Values,
if (!FragmentExpr)
continue;
SDDbgValue *SDV = DAG.getVRegDbgValue(
- Var, *FragmentExpr, RegAndSize.first, false, DbgLoc, SDNodeOrder);
+ Var, *FragmentExpr, RegAndSize.first, false, DbgLoc, Order);
DAG.AddDbgValue(SDV, false);
Offset += RegisterSize;
}
@@ -1699,11 +1699,10 @@ bool SelectionDAGBuilder::handleDebugValue(ArrayRef<const Value *> Values,
}
// We have created a SDDbgOperand for each Value in Values.
- // Should use Order instead of SDNodeOrder?
assert(!LocationOps.empty());
- SDDbgValue *SDV = DAG.getDbgValueList(Var, Expr, LocationOps, Dependencies,
- /*IsIndirect=*/false, DbgLoc,
- SDNodeOrder, IsVariadic);
+ SDDbgValue *SDV =
+ DAG.getDbgValueList(Var, Expr, LocationOps, Dependencies,
+ /*IsIndirect=*/false, DbgLoc, Order, IsVariadic);
DAG.AddDbgValue(SDV, /*isParameter=*/false);
return true;
}
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index f856c8a51984..e1c1a6b09b11 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -8427,10 +8427,6 @@ SDValue TargetLowering::expandFMINIMUM_FMAXIMUM(SDNode *N,
bool IsMax = Opc == ISD::FMAXIMUM;
SDNodeFlags Flags = N->getFlags();
- if (VT.isVector() &&
- isOperationLegalOrCustomOrPromote(Opc, VT.getScalarType()))
- return SDValue();
-
// First, implement comparison not propagating NaN. If no native fmin or fmax
// available, use plain select with setcc instead.
SDValue MinMax;
@@ -8447,6 +8443,9 @@ SDValue TargetLowering::expandFMINIMUM_FMAXIMUM(SDNode *N,
} else if (isOperationLegalOrCustom(CompOpc, VT)) {
MinMax = DAG.getNode(CompOpc, DL, VT, LHS, RHS, Flags);
} else {
+ if (VT.isVector() && !isOperationLegalOrCustom(ISD::VSELECT, VT))
+ return DAG.UnrollVectorOp(N);
+
// NaN (if exists) will be propagated later, so orderness doesn't matter.
SDValue Compare =
DAG.getSetCC(DL, CCVT, LHS, RHS, IsMax ? ISD::SETGT : ISD::SETLT);
@@ -9159,6 +9158,7 @@ SDValue TargetLowering::expandABS(SDNode *N, SelectionDAG &DAG,
if (!IsNegative && isOperationLegal(ISD::SUB, VT) &&
isOperationLegal(ISD::SMAX, VT)) {
SDValue Zero = DAG.getConstant(0, dl, VT);
+ Op = DAG.getFreeze(Op);
return DAG.getNode(ISD::SMAX, dl, VT, Op,
DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
}
@@ -9175,8 +9175,8 @@ SDValue TargetLowering::expandABS(SDNode *N, SelectionDAG &DAG,
// 0 - abs(x) -> smin(x, sub(0,x))
if (IsNegative && isOperationLegal(ISD::SUB, VT) &&
isOperationLegal(ISD::SMIN, VT)) {
- Op = DAG.getFreeze(Op);
SDValue Zero = DAG.getConstant(0, dl, VT);
+ Op = DAG.getFreeze(Op);
return DAG.getNode(ISD::SMIN, dl, VT, Op,
DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
}
diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index 2c4b45255d05..92213e19c9d9 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -3961,7 +3961,7 @@ static int32_t computeHeuristicUnrollFactor(CanonicalLoopInfo *CLI) {
UnrollCostEstimator UCE(L, TTI, EphValues, UP.BEInsns);
// Loop is not unrollable if the loop contains certain instructions.
- if (!UCE.canUnroll() || UCE.Convergent) {
+ if (!UCE.canUnroll()) {
LLVM_DEBUG(dbgs() << "Loop not considered unrollable\n");
return 1;
}
diff --git a/llvm/lib/IR/AsmWriter.cpp b/llvm/lib/IR/AsmWriter.cpp
index 7a5f18fe2cbd..0bf8be9ac55f 100644
--- a/llvm/lib/IR/AsmWriter.cpp
+++ b/llvm/lib/IR/AsmWriter.cpp
@@ -326,6 +326,9 @@ static void PrintCallingConv(unsigned cc, raw_ostream &Out) {
case CallingConv::AArch64_SME_ABI_Support_Routines_PreserveMost_From_X0:
Out << "aarch64_sme_preservemost_from_x0";
break;
+ case CallingConv::AArch64_SME_ABI_Support_Routines_PreserveMost_From_X1:
+ Out << "aarch64_sme_preservemost_from_x1";
+ break;
case CallingConv::AArch64_SME_ABI_Support_Routines_PreserveMost_From_X2:
Out << "aarch64_sme_preservemost_from_x2";
break;
diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp
index a7ed2de6e8a5..2f4b8351e747 100644
--- a/llvm/lib/IR/AutoUpgrade.cpp
+++ b/llvm/lib/IR/AutoUpgrade.cpp
@@ -5368,8 +5368,8 @@ std::string llvm::UpgradeDataLayoutString(StringRef DL, StringRef TT) {
return DL.empty() ? std::string("G1") : (DL + "-G1").str();
}
- if (T.isRISCV64()) {
- // Make i32 a native type for 64-bit RISC-V.
+ if (T.isLoongArch64() || T.isRISCV64()) {
+ // Make i32 a native type for 64-bit LoongArch and RISC-V.
auto I = DL.find("-n64-");
if (I != StringRef::npos)
return (DL.take_front(I) + "-n32:64-" + DL.drop_front(I + 5)).str();
diff --git a/llvm/lib/MC/WasmObjectWriter.cpp b/llvm/lib/MC/WasmObjectWriter.cpp
index 985f9351f4a3..788e92f94b26 100644
--- a/llvm/lib/MC/WasmObjectWriter.cpp
+++ b/llvm/lib/MC/WasmObjectWriter.cpp
@@ -877,7 +877,7 @@ void WasmObjectWriter::writeImportSection(ArrayRef<wasm::WasmImport> Imports,
break;
case wasm::WASM_EXTERNAL_TABLE:
W->OS << char(Import.Table.ElemType);
- encodeULEB128(0, W->OS); // flags
+ encodeULEB128(Import.Table.Limits.Flags, W->OS);
encodeULEB128(NumElements, W->OS); // initial
break;
case wasm::WASM_EXTERNAL_TAG:
@@ -1022,7 +1022,8 @@ void WasmObjectWriter::writeElemSection(
encodeULEB128(TableNumber, W->OS); // the table number
// init expr for starting offset
- W->OS << char(wasm::WASM_OPCODE_I32_CONST);
+ W->OS << char(is64Bit() ? wasm::WASM_OPCODE_I64_CONST
+ : wasm::WASM_OPCODE_I32_CONST);
encodeSLEB128(InitialTableOffset, W->OS);
W->OS << char(wasm::WASM_OPCODE_END);
diff --git a/llvm/lib/Object/ELFObjectFile.cpp b/llvm/lib/Object/ELFObjectFile.cpp
index 2b6bdbf24afa..cbc55a145e0e 100644
--- a/llvm/lib/Object/ELFObjectFile.cpp
+++ b/llvm/lib/Object/ELFObjectFile.cpp
@@ -586,6 +586,8 @@ StringRef ELFObjectFileBase::getAMDGPUCPUName() const {
return "gfx1150";
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1151:
return "gfx1151";
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1152:
+ return "gfx1152";
// AMDGCN GFX12.
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1200:
diff --git a/llvm/lib/ObjectYAML/ELFYAML.cpp b/llvm/lib/ObjectYAML/ELFYAML.cpp
index 8e2a9481c922..0fee299994bc 100644
--- a/llvm/lib/ObjectYAML/ELFYAML.cpp
+++ b/llvm/lib/ObjectYAML/ELFYAML.cpp
@@ -611,6 +611,7 @@ void ScalarBitSetTraits<ELFYAML::ELF_EF>::bitset(IO &IO,
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1103, EF_AMDGPU_MACH);
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1150, EF_AMDGPU_MACH);
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1151, EF_AMDGPU_MACH);
+ BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1152, EF_AMDGPU_MACH);
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1200, EF_AMDGPU_MACH);
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1201, EF_AMDGPU_MACH);
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC, EF_AMDGPU_MACH);
diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp
index 316d05bf1dc3..8dd060d0151a 100644
--- a/llvm/lib/Passes/PassBuilder.cpp
+++ b/llvm/lib/Passes/PassBuilder.cpp
@@ -139,6 +139,7 @@
#include "llvm/Transforms/IPO/DeadArgumentElimination.h"
#include "llvm/Transforms/IPO/ElimAvailExtern.h"
#include "llvm/Transforms/IPO/EmbedBitcodePass.h"
+#include "llvm/Transforms/IPO/ExpandVariadics.h"
#include "llvm/Transforms/IPO/ForceFunctionAttrs.h"
#include "llvm/Transforms/IPO/FunctionAttrs.h"
#include "llvm/Transforms/IPO/FunctionImport.h"
diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def
index 50682ca4970f..dad97146a9f6 100644
--- a/llvm/lib/Passes/PassRegistry.def
+++ b/llvm/lib/Passes/PassRegistry.def
@@ -59,6 +59,7 @@ MODULE_PASS("dot-callgraph", CallGraphDOTPrinterPass())
MODULE_PASS("dxil-upgrade", DXILUpgradePass())
MODULE_PASS("elim-avail-extern", EliminateAvailableExternallyPass())
MODULE_PASS("extract-blocks", BlockExtractorPass({}, false))
+MODULE_PASS("expand-variadics", ExpandVariadicsPass(ExpandVariadicsMode::Disable))
MODULE_PASS("forceattrs", ForceFunctionAttrsPass())
MODULE_PASS("function-import", FunctionImportPass())
MODULE_PASS("globalopt", GlobalOptPass())
diff --git a/llvm/lib/ProfileData/InstrProf.cpp b/llvm/lib/ProfileData/InstrProf.cpp
index ad63086430bf..6a8f25d4d3bf 100644
--- a/llvm/lib/ProfileData/InstrProf.cpp
+++ b/llvm/lib/ProfileData/InstrProf.cpp
@@ -1176,16 +1176,6 @@ void ValueProfData::deserializeTo(InstrProfRecord &Record,
}
}
-template <class T>
-static T swapToHostOrder(const unsigned char *&D, llvm::endianness Orig) {
- using namespace support;
-
- if (Orig == llvm::endianness::little)
- return endian::readNext<T, llvm::endianness::little>(D);
- else
- return endian::readNext<T, llvm::endianness::big>(D);
-}
-
static std::unique_ptr<ValueProfData> allocValueProfData(uint32_t TotalSize) {
return std::unique_ptr<ValueProfData>(new (::operator new(TotalSize))
ValueProfData());
@@ -1224,7 +1214,8 @@ ValueProfData::getValueProfData(const unsigned char *D,
return make_error<InstrProfError>(instrprof_error::truncated);
const unsigned char *Header = D;
- uint32_t TotalSize = swapToHostOrder<uint32_t>(Header, Endianness);
+ uint32_t TotalSize = endian::readNext<uint32_t>(Header, Endianness);
+
if (D + TotalSize > BufferEnd)
return make_error<InstrProfError>(instrprof_error::too_large);
diff --git a/llvm/lib/ProfileData/MemProf.cpp b/llvm/lib/ProfileData/MemProf.cpp
index 1d9860e0ea7e..aecac2416952 100644
--- a/llvm/lib/ProfileData/MemProf.cpp
+++ b/llvm/lib/ProfileData/MemProf.cpp
@@ -338,8 +338,7 @@ IndexedMemProfRecord::deserialize(const MemProfSchema &Schema,
}
MemProfRecord IndexedMemProfRecord::toMemProfRecord(
- llvm::function_ref<llvm::SmallVector<Frame>(const CallStackId)> Callback)
- const {
+ llvm::function_ref<std::vector<Frame>(const CallStackId)> Callback) const {
MemProfRecord Record;
Record.AllocSites.reserve(AllocSites.size());
@@ -410,6 +409,165 @@ CallStackId hashCallStack(ArrayRef<FrameId> CS) {
return CSId;
}
+// Encode a call stack into RadixArray. Return the starting index within
+// RadixArray. For each call stack we encode, we emit two or three components
+// into RadixArray. If a given call stack doesn't have a common prefix relative
+// to the previous one, we emit:
+//
+// - the frames in the given call stack in the root-to-leaf order
+//
+// - the length of the given call stack
+//
+// If a given call stack has a non-empty common prefix relative to the previous
+// one, we emit:
+//
+// - the relative location of the common prefix, encoded as a negative number.
+//
+// - a portion of the given call stack that's beyond the common prefix
+//
+// - the length of the given call stack, including the length of the common
+// prefix.
+//
+// The resulting RadixArray requires a somewhat unintuitive backward traversal
+// to reconstruct a call stack -- read the call stack length and scan backward
+// while collecting frames in the leaf to root order. build, the caller of this
+// function, reverses RadixArray in place so that we can reconstruct a call
+// stack as if we were deserializing an array in a typical way -- the call stack
+// length followed by the frames in the leaf-to-root order except that we need
+// to handle pointers to parents along the way.
+//
+// To quickly determine the location of the common prefix within RadixArray,
+// Indexes caches the indexes of the previous call stack's frames within
+// RadixArray.
+LinearCallStackId CallStackRadixTreeBuilder::encodeCallStack(
+ const llvm::SmallVector<FrameId> *CallStack,
+ const llvm::SmallVector<FrameId> *Prev,
+ const llvm::DenseMap<FrameId, LinearFrameId> &MemProfFrameIndexes) {
+ // Compute the length of the common root prefix between Prev and CallStack.
+ uint32_t CommonLen = 0;
+ if (Prev) {
+ auto Pos = std::mismatch(Prev->rbegin(), Prev->rend(), CallStack->rbegin(),
+ CallStack->rend());
+ CommonLen = std::distance(CallStack->rbegin(), Pos.second);
+ }
+
+ // Drop the portion beyond CommonLen.
+ assert(CommonLen <= Indexes.size());
+ Indexes.resize(CommonLen);
+
+ // Append a pointer to the parent.
+ if (CommonLen) {
+ uint32_t CurrentIndex = RadixArray.size();
+ uint32_t ParentIndex = Indexes.back();
+ // The offset to the parent must be negative because we are pointing to an
+ // element we've already added to RadixArray.
+ assert(ParentIndex < CurrentIndex);
+ RadixArray.push_back(ParentIndex - CurrentIndex);
+ }
+
+ // Copy the part of the call stack beyond the common prefix to RadixArray.
+ assert(CommonLen <= CallStack->size());
+ for (FrameId F : llvm::drop_begin(llvm::reverse(*CallStack), CommonLen)) {
+ // Remember the index of F in RadixArray.
+ Indexes.push_back(RadixArray.size());
+ RadixArray.push_back(MemProfFrameIndexes.find(F)->second);
+ }
+ assert(CallStack->size() == Indexes.size());
+
+ // End with the call stack length.
+ RadixArray.push_back(CallStack->size());
+
+ // Return the index within RadixArray where we can start reconstructing a
+ // given call stack from.
+ return RadixArray.size() - 1;
+}
+
+void CallStackRadixTreeBuilder::build(
+ llvm::MapVector<CallStackId, llvm::SmallVector<FrameId>>
+ &&MemProfCallStackData,
+ const llvm::DenseMap<FrameId, LinearFrameId> &MemProfFrameIndexes) {
+ // Take the vector portion of MemProfCallStackData. The vector is exactly
+ // what we need to sort. Also, we no longer need its lookup capability.
+ llvm::SmallVector<CSIdPair, 0> CallStacks = MemProfCallStackData.takeVector();
+
+ // Return early if we have no work to do.
+ if (CallStacks.empty()) {
+ RadixArray.clear();
+ CallStackPos.clear();
+ return;
+ }
+
+ // Sort the list of call stacks in the dictionary order to maximize the length
+ // of the common prefix between two adjacent call stacks.
+ llvm::sort(CallStacks, [&](const CSIdPair &L, const CSIdPair &R) {
+ // Call stacks are stored from leaf to root. Perform comparisons from the
+ // root.
+ return std::lexicographical_compare(
+ L.second.rbegin(), L.second.rend(), R.second.rbegin(), R.second.rend(),
+ [&](FrameId F1, FrameId F2) { return F1 < F2; });
+ });
+
+ // Reserve some reasonable amount of storage.
+ RadixArray.clear();
+ RadixArray.reserve(CallStacks.size() * 8);
+
+ // Indexes will grow as long as the longest call stack.
+ Indexes.clear();
+ Indexes.reserve(512);
+
+ // CallStackPos will grow to exactly CallStacks.size() entries.
+ CallStackPos.clear();
+ CallStackPos.reserve(CallStacks.size());
+
+ // Compute the radix array. We encode one call stack at a time, computing the
+ // longest prefix that's shared with the previous call stack we encode. For
+ // each call stack we encode, we remember a mapping from CallStackId to its
+ // position within RadixArray.
+ //
+ // As an optimization, we encode from the last call stack in CallStacks to
+ // reduce the number of times we follow pointers to the parents. Consider the
+ // list of call stacks that has been sorted in the dictionary order:
+ //
+ // Call Stack 1: F1
+ // Call Stack 2: F1 -> F2
+ // Call Stack 3: F1 -> F2 -> F3
+ //
+ // If we traversed CallStacks in the forward order, we would end up with a
+ // radix tree like:
+ //
+ // Call Stack 1: F1
+ // |
+ // Call Stack 2: +---> F2
+ // |
+ // Call Stack 3: +---> F3
+ //
+ // Notice that each call stack jumps to the previous one. However, if we
+ // traverse CallStacks in the reverse order, then Call Stack 3 has the
+ // complete call stack encoded without any pointers. Call Stack 1 and 2 point
+ // to appropriate prefixes of Call Stack 3.
+ const llvm::SmallVector<FrameId> *Prev = nullptr;
+ for (const auto &[CSId, CallStack] : llvm::reverse(CallStacks)) {
+ LinearCallStackId Pos =
+ encodeCallStack(&CallStack, Prev, MemProfFrameIndexes);
+ CallStackPos.insert({CSId, Pos});
+ Prev = &CallStack;
+ }
+
+ // "RadixArray.size() - 1" below is problematic if RadixArray is empty.
+ assert(!RadixArray.empty());
+
+ // Reverse the radix array in place. We do so mostly for intuitive
+ // deserialization where we would read the length field and then the call
+ // stack frames proper just like any other array deserialization, except
+ // that we have occasional jumps to take advantage of prefixes.
+ for (size_t I = 0, J = RadixArray.size() - 1; I < J; ++I, --J)
+ std::swap(RadixArray[I], RadixArray[J]);
+
+ // "Reverse" the indexes stored in CallStackPos.
+ for (auto &[K, V] : CallStackPos)
+ V = RadixArray.size() - 1 - V;
+}
+
void verifyIndexedMemProfRecord(const IndexedMemProfRecord &Record) {
for (const auto &AS : Record.AllocSites) {
assert(AS.CSId == hashCallStack(AS.CallStack));
diff --git a/llvm/lib/ProfileData/MemProfReader.cpp b/llvm/lib/ProfileData/MemProfReader.cpp
index fc3be716087e..693897f874a2 100644
--- a/llvm/lib/ProfileData/MemProfReader.cpp
+++ b/llvm/lib/ProfileData/MemProfReader.cpp
@@ -690,7 +690,7 @@ Error RawMemProfReader::readNextRecord(
return F;
auto Iter = this->GuidToSymbolName.find(F.Function);
assert(Iter != this->GuidToSymbolName.end());
- F.SymbolName = Iter->getSecond();
+ F.SymbolName = std::make_unique<std::string>(Iter->getSecond());
return F;
};
return MemProfReader::readNextRecord(GuidRecord, IdToFrameCallback);
diff --git a/llvm/lib/Support/VirtualFileSystem.cpp b/llvm/lib/Support/VirtualFileSystem.cpp
index fcefdef992be..7360901f2962 100644
--- a/llvm/lib/Support/VirtualFileSystem.cpp
+++ b/llvm/lib/Support/VirtualFileSystem.cpp
@@ -867,21 +867,16 @@ bool InMemoryFileSystem::addFile(const Twine &P, time_t ModificationTime,
// Any intermediate directories we create should be accessible by
// the owner, even if Perms says otherwise for the final path.
const auto NewDirectoryPerms = ResolvedPerms | sys::fs::owner_all;
+
+ StringRef Name = *I;
while (true) {
- StringRef Name = *I;
- detail::InMemoryNode *Node = Dir->getChild(Name);
+ Name = *I;
++I;
+ if (I == E)
+ break;
+ detail::InMemoryNode *Node = Dir->getChild(Name);
if (!Node) {
- if (I == E) {
- // End of the path.
- Dir->addChild(
- Name, MakeNode({Dir->getUniqueID(), Path, Name, ModificationTime,
- std::move(Buffer), ResolvedUser, ResolvedGroup,
- ResolvedType, ResolvedPerms}));
- return true;
- }
-
- // Create a new directory. Use the path up to here.
+ // This isn't the last element, so we create a new directory.
Status Stat(
StringRef(Path.str().begin(), Name.end() - Path.str().begin()),
getDirectoryID(Dir->getUniqueID(), Name),
@@ -891,27 +886,33 @@ bool InMemoryFileSystem::addFile(const Twine &P, time_t ModificationTime,
Name, std::make_unique<detail::InMemoryDirectory>(std::move(Stat))));
continue;
}
+ // Creating file under another file.
+ if (!isa<detail::InMemoryDirectory>(Node))
+ return false;
+ Dir = cast<detail::InMemoryDirectory>(Node);
+ }
+ detail::InMemoryNode *Node = Dir->getChild(Name);
+ if (!Node) {
+ Dir->addChild(Name,
+ MakeNode({Dir->getUniqueID(), Path, Name, ModificationTime,
+ std::move(Buffer), ResolvedUser, ResolvedGroup,
+ ResolvedType, ResolvedPerms}));
+ return true;
+ }
+ if (isa<detail::InMemoryDirectory>(Node))
+ return ResolvedType == sys::fs::file_type::directory_file;
- if (auto *NewDir = dyn_cast<detail::InMemoryDirectory>(Node)) {
- Dir = NewDir;
- } else {
- assert((isa<detail::InMemoryFile>(Node) ||
- isa<detail::InMemoryHardLink>(Node)) &&
- "Must be either file, hardlink or directory!");
-
- // Trying to insert a directory in place of a file.
- if (I != E)
- return false;
+ assert((isa<detail::InMemoryFile>(Node) ||
+ isa<detail::InMemoryHardLink>(Node)) &&
+ "Must be either file, hardlink or directory!");
- // Return false only if the new file is different from the existing one.
- if (auto Link = dyn_cast<detail::InMemoryHardLink>(Node)) {
- return Link->getResolvedFile().getBuffer()->getBuffer() ==
- Buffer->getBuffer();
- }
- return cast<detail::InMemoryFile>(Node)->getBuffer()->getBuffer() ==
- Buffer->getBuffer();
- }
+ // Return false only if the new file is different from the existing one.
+ if (auto *Link = dyn_cast<detail::InMemoryHardLink>(Node)) {
+ return Link->getResolvedFile().getBuffer()->getBuffer() ==
+ Buffer->getBuffer();
}
+ return cast<detail::InMemoryFile>(Node)->getBuffer()->getBuffer() ==
+ Buffer->getBuffer();
}
bool InMemoryFileSystem::addFile(const Twine &P, time_t ModificationTime,
diff --git a/llvm/lib/Target/AArch64/AArch64.td b/llvm/lib/Target/AArch64/AArch64.td
index 4b2ce0d73949..5708b6173750 100644
--- a/llvm/lib/Target/AArch64/AArch64.td
+++ b/llvm/lib/Target/AArch64/AArch64.td
@@ -85,6 +85,10 @@ def SMEUnsupported : AArch64Unsupported {
SME2Unsupported.F);
}
+def MTEUnsupported : AArch64Unsupported {
+ let F = [HasMTE];
+}
+
let F = [HasPAuth, HasPAuthLR] in
def PAUnsupported : AArch64Unsupported;
@@ -109,6 +113,7 @@ include "AArch64SchedNeoverseN1.td"
include "AArch64SchedNeoverseN2.td"
include "AArch64SchedNeoverseV1.td"
include "AArch64SchedNeoverseV2.td"
+include "AArch64SchedOryon.td"
include "AArch64Processors.td"
diff --git a/llvm/lib/Target/AArch64/AArch64CallingConvention.td b/llvm/lib/Target/AArch64/AArch64CallingConvention.td
index 32646c6ee689..941990c53c4a 100644
--- a/llvm/lib/Target/AArch64/AArch64CallingConvention.td
+++ b/llvm/lib/Target/AArch64/AArch64CallingConvention.td
@@ -589,6 +589,14 @@ def CSR_AArch64_SME_ABI_Support_Routines_PreserveMost_From_X0
(sequence "X%u",19, 28),
LR, FP)>;
+// SME ABI support routines such as __arm_get_current_vg preserve most registers.
+def CSR_AArch64_SME_ABI_Support_Routines_PreserveMost_From_X1
+ : CalleeSavedRegs<(add (sequence "Z%u", 0, 31),
+ (sequence "P%u", 0, 15),
+ (sequence "X%u", 1, 15),
+ (sequence "X%u",19, 28),
+ LR, FP)>;
+
// SME ABI support routines __arm_sme_state preserves most registers.
def CSR_AArch64_SME_ABI_Support_Routines_PreserveMost_From_X2
: CalleeSavedRegs<(add (sequence "Z%u", 0, 31),
diff --git a/llvm/lib/Target/AArch64/AArch64Processors.td b/llvm/lib/Target/AArch64/AArch64Processors.td
index 8d16709114df..a759efcd9441 100644
--- a/llvm/lib/Target/AArch64/AArch64Processors.td
+++ b/llvm/lib/Target/AArch64/AArch64Processors.td
@@ -617,6 +617,27 @@ def TuneAmpere1B : SubtargetFeature<"ampere1b", "ARMProcFamily", "Ampere1B",
FeatureLdpAlignedOnly,
FeatureStpAlignedOnly]>;
+def TuneOryon : SubtargetFeature<"oryon-1", "ARMProcFamily",
+ "Oryon",
+ "Nuvia Inc Oryon processors", [
+ FeatureCrypto,
+ FeatureFPARMv8,
+ FeatureNEON,
+ FeatureFuseAES,
+ FeatureFuseAdrpAdd,
+ FeatureEnableSelectOptimize,
+ FeatureFuseCryptoEOR,
+ FeatureFuseAddress,
+ FeatureSM4,
+ FeatureSHA2,
+ FeatureSHA3,
+ FeatureAES,
+ FeatureFullFP16,
+ FeatureFP16FML,
+ FeaturePerfMon,
+ FeatureSPE,
+ FeaturePostRAScheduler,
+ HasV8_6aOps]>;
def ProcessorFeatures {
list<SubtargetFeature> A53 = [HasV8_0aOps, FeatureCRC, FeatureCrypto,
@@ -806,6 +827,11 @@ def ProcessorFeatures {
FeatureSHA3, FeatureAES, FeatureCSSC,
FeatureWFxT, FeatureFullFP16];
+ list<SubtargetFeature> Oryon = [HasV8_6aOps, FeatureNEON, FeaturePerfMon,
+ FeatureCrypto, FeatureRandGen,
+ FeaturePAuth, FeatureSM4, FeatureSHA2,
+ FeatureSHA3, FeatureAES];
+
// ETE and TRBE are future architecture extensions. We temporarily enable them
// by default for users targeting generic AArch64. The extensions do not
// affect code generated by the compiler and can be used only by explicitly
@@ -988,3 +1014,7 @@ def : ProcessorModel<"ampere1a", Ampere1Model, ProcessorFeatures.Ampere1A,
def : ProcessorModel<"ampere1b", Ampere1BModel, ProcessorFeatures.Ampere1B,
[TuneAmpere1B]>;
+
+// Qualcomm Oryon
+def : ProcessorModel<"oryon-1", OryonModel, ProcessorFeatures.Oryon,
+ [TuneOryon]>;
diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
index e97d7e3b6ed8..cc50b59dd8d7 100644
--- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
@@ -107,13 +107,22 @@ AArch64RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
if (MF->getFunction().getCallingConv() ==
CallingConv::AArch64_SME_ABI_Support_Routines_PreserveMost_From_X0)
report_fatal_error(
- "Calling convention AArch64_SME_ABI_Support_Routines_PreserveMost_From_X0 is "
- "only supported to improve calls to SME ACLE save/restore/disable-za "
+ "Calling convention "
+ "AArch64_SME_ABI_Support_Routines_PreserveMost_From_X0 is only "
+ "supported to improve calls to SME ACLE save/restore/disable-za "
"functions, and is not intended to be used beyond that scope.");
if (MF->getFunction().getCallingConv() ==
+ CallingConv::AArch64_SME_ABI_Support_Routines_PreserveMost_From_X1)
+ report_fatal_error(
+ "Calling convention "
+ "AArch64_SME_ABI_Support_Routines_PreserveMost_From_X1 is "
+ "only supported to improve calls to SME ACLE __arm_get_current_vg "
+ "function, and is not intended to be used beyond that scope.");
+ if (MF->getFunction().getCallingConv() ==
CallingConv::AArch64_SME_ABI_Support_Routines_PreserveMost_From_X2)
report_fatal_error(
- "Calling convention AArch64_SME_ABI_Support_Routines_PreserveMost_From_X2 is "
+ "Calling convention "
+ "AArch64_SME_ABI_Support_Routines_PreserveMost_From_X2 is "
"only supported to improve calls to SME ACLE __arm_sme_state "
"and is not intended to be used beyond that scope.");
if (MF->getSubtarget<AArch64Subtarget>().getTargetLowering()
@@ -153,13 +162,22 @@ AArch64RegisterInfo::getDarwinCalleeSavedRegs(const MachineFunction *MF) const {
if (MF->getFunction().getCallingConv() ==
CallingConv::AArch64_SME_ABI_Support_Routines_PreserveMost_From_X0)
report_fatal_error(
- "Calling convention AArch64_SME_ABI_Support_Routines_PreserveMost_From_X0 is "
+ "Calling convention "
+ "AArch64_SME_ABI_Support_Routines_PreserveMost_From_X0 is "
"only supported to improve calls to SME ACLE save/restore/disable-za "
"functions, and is not intended to be used beyond that scope.");
if (MF->getFunction().getCallingConv() ==
+ CallingConv::AArch64_SME_ABI_Support_Routines_PreserveMost_From_X1)
+ report_fatal_error(
+ "Calling convention "
+ "AArch64_SME_ABI_Support_Routines_PreserveMost_From_X1 is "
+ "only supported to improve calls to SME ACLE __arm_get_current_vg "
+ "function, and is not intended to be used beyond that scope.");
+ if (MF->getFunction().getCallingConv() ==
CallingConv::AArch64_SME_ABI_Support_Routines_PreserveMost_From_X2)
report_fatal_error(
- "Calling convention AArch64_SME_ABI_Support_Routines_PreserveMost_From_X2 is "
+ "Calling convention "
+ "AArch64_SME_ABI_Support_Routines_PreserveMost_From_X2 is "
"only supported to improve calls to SME ACLE __arm_sme_state "
"and is not intended to be used beyond that scope.");
if (MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS)
@@ -236,6 +254,8 @@ AArch64RegisterInfo::getDarwinCallPreservedMask(const MachineFunction &MF,
"Calling convention SVE_VectorCall is unsupported on Darwin.");
if (CC == CallingConv::AArch64_SME_ABI_Support_Routines_PreserveMost_From_X0)
return CSR_AArch64_SME_ABI_Support_Routines_PreserveMost_From_X0_RegMask;
+ if (CC == CallingConv::AArch64_SME_ABI_Support_Routines_PreserveMost_From_X1)
+ return CSR_AArch64_SME_ABI_Support_Routines_PreserveMost_From_X1_RegMask;
if (CC == CallingConv::AArch64_SME_ABI_Support_Routines_PreserveMost_From_X2)
return CSR_AArch64_SME_ABI_Support_Routines_PreserveMost_From_X2_RegMask;
if (CC == CallingConv::CFGuard_Check)
@@ -282,6 +302,8 @@ AArch64RegisterInfo::getCallPreservedMask(const MachineFunction &MF,
: CSR_AArch64_SVE_AAPCS_RegMask;
if (CC == CallingConv::AArch64_SME_ABI_Support_Routines_PreserveMost_From_X0)
return CSR_AArch64_SME_ABI_Support_Routines_PreserveMost_From_X0_RegMask;
+ if (CC == CallingConv::AArch64_SME_ABI_Support_Routines_PreserveMost_From_X1)
+ return CSR_AArch64_SME_ABI_Support_Routines_PreserveMost_From_X1_RegMask;
if (CC == CallingConv::AArch64_SME_ABI_Support_Routines_PreserveMost_From_X2)
return CSR_AArch64_SME_ABI_Support_Routines_PreserveMost_From_X2_RegMask;
if (CC == CallingConv::CFGuard_Check)
@@ -643,6 +665,7 @@ bool AArch64RegisterInfo::isArgumentRegister(const MachineFunction &MF,
case CallingConv::AArch64_VectorCall:
case CallingConv::AArch64_SVE_VectorCall:
case CallingConv::AArch64_SME_ABI_Support_Routines_PreserveMost_From_X0:
+ case CallingConv::AArch64_SME_ABI_Support_Routines_PreserveMost_From_X1:
case CallingConv::AArch64_SME_ABI_Support_Routines_PreserveMost_From_X2:
if (STI.isTargetWindows())
return HasReg(CC_AArch64_Win64PCS_ArgRegs, Reg);
diff --git a/llvm/lib/Target/AArch64/AArch64SchedOryon.td b/llvm/lib/Target/AArch64/AArch64SchedOryon.td
new file mode 100644
index 000000000000..09d1af248f0e
--- /dev/null
+++ b/llvm/lib/Target/AArch64/AArch64SchedOryon.td
@@ -0,0 +1,1659 @@
+//=- AArch64SchedOryon.td - Qualcomm Oryon CPU 001 ---*- tablegen -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the scheduling model for Qualcomm Oryon
+// family of processors.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Pipeline Description.
+
+def OryonModel : SchedMachineModel {
+ let IssueWidth = 14;
+ let MicroOpBufferSize = 376;
+ let LoadLatency = 4;
+ let MispredictPenalty = 13; // 13 cycles for mispredicted branch.
+ let LoopMicroOpBufferSize = 0; // Do not have a LoopMicroOpBuffer
+ let PostRAScheduler = 1; // Using PostRA sched.
+ let CompleteModel = 1;
+
+ list<Predicate> UnsupportedFeatures = !listconcat(SVEUnsupported.F,
+ SMEUnsupported.F,
+ MTEUnsupported.F,
+ PAUnsupported.F,
+ [HasPAuth, HasCSSC]);
+}
+
+let SchedModel = OryonModel in {
+
+// Issue ports.
+// IXU has 6 ports p0 ~ p5
+// LSU has 4 ports p6 ~ p9(ls0 ~ ls3), p10/p11(std0, std1) has to work with ls0~ls3
+// VXU has 4 ports p12 ~ p15
+
+// cross IXU/LSU/VXU resource group for FMOV P41 of VXU
+// I2V
+def ORYONI4FP0 : ProcResource<1>;
+def ORYONI5FP1 : ProcResource<1>;
+// V2I
+def ORYONFP0I4 : ProcResource<1>;
+def ORYONFP1I5 : ProcResource<1>;
+
+// store 1 for normal store instructions
+def ORYONST0 : ProcResource<1>;
+// store 2 for normal store instructions
+def ORYONST1 : ProcResource<1>;
+
+// Port 0: ALU/Indirect/Direct Branch.
+def ORYONP0 : ProcResource<1>;
+
+// Port 1: ALU/Direct Branch.
+def ORYONP1 : ProcResource<1>;
+
+// Port 2: ALU.
+def ORYONP2 : ProcResource<1>;
+
+// Port 3: ALU.
+def ORYONP3 : ProcResource<1>;
+
+// Port 4: ALU.
+def ORYONP4 : ProcResource<1> {
+ let Super = ORYONI4FP0;
+ let Super = ORYONFP0I4; }
+
+// Port 5: ALU.
+def ORYONP5 : ProcResource<1> {
+ let Super = ORYONI5FP1;
+ let Super = ORYONFP1I5; }
+
+// Port 6: Load/Store. LS0
+def ORYONP6 : ProcResource<1> {
+ let Super = ORYONST0; }
+
+// Port 7: Load/store. LS1
+def ORYONP7 : ProcResource<1> {
+ let Super = ORYONST0; }
+
+// Port 8: Load/Store. LS2
+def ORYONP8 : ProcResource<1> {
+ let Super = ORYONST1; }
+
+// Port 9: Load/store. LS3
+def ORYONP9 : ProcResource<1> {
+ let Super = ORYONST1; }
+
+// Port 10: Load/Store. STD0
+def ORYONP10SD0 : ProcResource<1> {
+ let Super = ORYONST0; }
+
+// Port 11: Load/store. STD1
+def ORYONP11SD1 : ProcResource<1> {
+ let Super = ORYONST1; }
+
+// Port 12: FP/Neon/SIMD/Crypto.
+def ORYONP12FP0 : ProcResource<1> {
+ let Super = ORYONI4FP0;
+ let Super = ORYONFP0I4; }
+
+// Port 13: FP/Neon/SIMD/Crypto.
+def ORYONP13FP1 : ProcResource<1> {
+ let Super = ORYONI5FP1;
+ let Super = ORYONFP1I5; }
+
+// Port 14: FP/Neon/SIMD/Crypto.
+def ORYONP14FP2 : ProcResource<1>;
+
+// Port 15: FP/Neon/SIMD/Crypto.
+def ORYONP15FP3 : ProcResource<1>;
+
+// Define groups for the functional units on each issue port. Each group
+// created will be used by a WriteRes.
+
+// Integer add/shift/logical/misc. instructions on port I0/I1/I2/I3/I4/I5.
+def ORYONI012345 : ProcResGroup<[ORYONP0, ORYONP1, ORYONP2,
+ ORYONP3, ORYONP4, ORYONP5]> {
+ let BufferSize = 120;
+}
+
+// Direct Conditional Branch instructions on ports I0/I1.
+def ORYONI01 : ProcResGroup<[ORYONP0, ORYONP1]> {
+ let BufferSize = 40;
+}
+
+// Indirect/crypto Conditional Branch instructions on ports I0.
+def ORYONI0 : ProcResGroup<[ORYONP0]> {
+ let BufferSize = 20;
+}
+
+// Crypto/CRC/PAU instructions on ports I2.
+def ORYONI2 : ProcResGroup<[ORYONP2]> {
+ let BufferSize = 20;
+}
+
+// Multiply/Multiply-ADD instructions on ports I4/I5.
+def ORYONI45 : ProcResGroup<[ORYONP4, ORYONP5]> {
+ let BufferSize = 40;
+}
+
+// Divide instructions on ports I5.
+def ORYONI5 : ProcResGroup<[ORYONP5]> {
+ let BufferSize = 20;
+}
+
+// Comparison instructions on ports I0/I1/I2/I3.
+def ORYONI0123 : ProcResGroup<[ORYONP0, ORYONP1,
+ ORYONP2, ORYONP3]> {
+ let BufferSize = 80;
+}
+
+// Load instructions on ports P6/P7/P8/P9.
+def ORYONLD : ProcResGroup<[ORYONP6, ORYONP7, ORYONP8, ORYONP9]> {
+ let BufferSize = 64;
+}
+
+// Store instructions on combo of STA/STD pipes
+def ORYONST : ProcResGroup<[ORYONST0, ORYONST1]> {
+ let BufferSize = 64;
+}
+
+// Arithmetic and CRYP-AED ASIMD/FP instructions on ports FP0/FP1/FP2/FP3.
+def ORYONFP0123 : ProcResGroup<[ORYONP12FP0, ORYONP13FP1,
+ ORYONP14FP2, ORYONP15FP3]> {
+ let BufferSize = 192;
+}
+
+// FP Comparison and F/I move instructions on ports FP0/FP1.
+def ORYONFP01 : ProcResGroup<[ORYONP12FP0, ORYONP13FP1]> {
+ let BufferSize = 96;
+}
+
+// FDIV instructions on ports FP3.
+def ORYONFP3 : ProcResGroup<[ORYONP15FP3]> {
+ let BufferSize = 48;
+}
+
+// CRYP-SHA instructions on ports FP1.
+def ORYONFP1 : ProcResGroup<[ORYONP14FP2]> {
+ let BufferSize = 48;
+}
+
+def ORYONFP2 : ProcResGroup<[ORYONP14FP2]> {
+ let BufferSize = 48;
+}
+
+// Reciprocal, Squre root on FP0.
+def ORYONFP0 : ProcResGroup<[ORYONP12FP0]> {
+ let BufferSize = 48;
+}
+
+// cross IXU/LSU/VXU resource group for FMOV P41 of VXU
+// I2V
+def ORYONI2V : ProcResGroup<[ORYONI4FP0, ORYONI5FP1]> {
+ let BufferSize = 40;
+}
+
+// V2I
+def ORYONV2I : ProcResGroup<[ORYONFP0I4, ORYONFP1I5]> {
+ let BufferSize = 96;
+}
+
+// Define commonly used write types for InstRW specializations.
+// All definitions follow the format: ORYONWrite_<NumCycles>Cyc_<Resources>.
+
+// Because of the complexity of Oryon CPU, we skip the following
+// generic definitions and define each instruction specifically
+
+// These WriteRes entries are not used in the Falkor sched model.
+def : WriteRes<WriteImm, []> { let Unsupported = 1; }
+def : WriteRes<WriteI, []> { let Unsupported = 1; }
+def : WriteRes<WriteISReg, []> { let Unsupported = 1; }
+def : WriteRes<WriteIEReg, []> { let Unsupported = 1; }
+def : WriteRes<WriteExtr, []> { let Unsupported = 1; }
+def : WriteRes<WriteIS, []> { let Unsupported = 1; }
+def : WriteRes<WriteID32, []> { let Unsupported = 1; }
+def : WriteRes<WriteID64, []> { let Unsupported = 1; }
+def : WriteRes<WriteIM32, []> { let Unsupported = 1; }
+def : WriteRes<WriteIM64, []> { let Unsupported = 1; }
+def : WriteRes<WriteBr, []> { let Unsupported = 1; }
+def : WriteRes<WriteBrReg, []> { let Unsupported = 1; }
+def : WriteRes<WriteLD, []> { let Unsupported = 1; }
+def : WriteRes<WriteST, []> { let Unsupported = 1; }
+def : WriteRes<WriteSTP, []> { let Unsupported = 1; }
+def : WriteRes<WriteAdr, []> { let Unsupported = 1; }
+def : WriteRes<WriteLDIdx, []> { let Unsupported = 1; }
+def : WriteRes<WriteSTIdx, []> { let Unsupported = 1; }
+def : WriteRes<WriteF, []> { let Unsupported = 1; }
+def : WriteRes<WriteFCmp, []> { let Unsupported = 1; }
+def : WriteRes<WriteFCvt, []> { let Unsupported = 1; }
+def : WriteRes<WriteFCopy, []> { let Unsupported = 1; }
+def : WriteRes<WriteFImm, []> { let Unsupported = 1; }
+def : WriteRes<WriteFMul, []> { let Unsupported = 1; }
+def : WriteRes<WriteFDiv, []> { let Unsupported = 1; }
+def : WriteRes<WriteVd, []> { let Unsupported = 1; }
+def : WriteRes<WriteVq, []> { let Unsupported = 1; }
+def : WriteRes<WriteVLD, []> { let Unsupported = 1; }
+def : WriteRes<WriteVST, []> { let Unsupported = 1; }
+def : WriteRes<WriteSys, []> { let Unsupported = 1; }
+def : WriteRes<WriteBarrier, []> { let Unsupported = 1; }
+def : WriteRes<WriteHint, []> { let Unsupported = 1; }
+def : WriteRes<WriteLDHi, []> { let Unsupported = 1; }
+def : WriteRes<WriteAtomic, []> { let Unsupported = 1; }
+
+// These ReadAdvance entries will be defined in later implementation
+def : ReadAdvance<ReadI, 0>;
+def : ReadAdvance<ReadISReg, 0>;
+def : ReadAdvance<ReadIEReg, 0>;
+def : ReadAdvance<ReadIM, 0>;
+def : ReadAdvance<ReadIMA, 0>;
+def : ReadAdvance<ReadID, 0>;
+def : ReadAdvance<ReadExtrHi, 0>;
+def : ReadAdvance<ReadAdrBase, 0>;
+def : ReadAdvance<ReadVLD, 0>;
+def : ReadAdvance<ReadST, 0>;
+
+
+//IXU resource definition
+// 1 cycles NO pipe
+def ORYONWrite_1Cyc_NONE : SchedWriteRes<[]>;
+
+// 1 cycles on I01.
+def ORYONWrite_1Cyc_I01 : SchedWriteRes<[ORYONI01]>;
+
+def ORYONWrite_1Cyc_2Uops_I01 : SchedWriteRes<[ORYONI01]> {
+ let NumMicroOps = 2;
+}
+
+def ORYONWrite_1Cyc_I0 : SchedWriteRes<[ORYONI0]>;
+
+// 7 cycles on I2. PAC*/AUT* instructions
+def ORYONWrite_7Cyc_I2 : SchedWriteRes<[ORYONI2]> {
+ let Latency = 7;
+}
+
+// 7 cycles on I2. PAC*/AUT* instructions
+def ORYONWrite_7Cyc_3Uops_I2 : SchedWriteRes<[ORYONI2]> {
+ let Latency = 7;
+ let NumMicroOps = 3;
+}
+
+// 9 (7+1+1) cycles on I2 and I0/I1, I0. Authentication branch instructions
+// these instructions are broken down to three uops
+// a. PtrAuth on pipe 2 taking 7 cycles
+// b. Link Register Update on pipes 0 and 1 taking 1 cycle
+// c. Indirect branch on pipe 0 taking 1 cycle
+
+def ORYONWrite_9Cyc_I012 : SchedWriteRes<[ORYONI2, ORYONI01]> {
+ let Latency = 9;
+ let NumMicroOps = 3;
+}
+
+// 3 cycles on I2. CRC32 and CRC32C instructions
+def ORYONWrite_3Cyc_I2 : SchedWriteRes<[ORYONI2]> {
+ let Latency = 3;
+}
+
+// 1 cycle on I012345
+def ORYONWrite_1Cyc_I012345 : SchedWriteRes<[ORYONI012345]>;
+
+// 1 cycle on I0123
+def ORYONWrite_1Cyc_I0123 : SchedWriteRes<[ORYONI0123]>;
+
+// 1 cycle on 2 of I012345
+def ORYONWrite_1Cyc_I012345_I012345 :
+SchedWriteRes<[ORYONI012345, ORYONI012345]> ;
+
+// 2 cycle on 2 of I0123 with ReleaseAtCycles
+def ORYONWrite_2Cyc_I0123_I0123_RC :
+SchedWriteRes<[ORYONI0123, ORYONI0123]> {
+ let Latency = 2;
+ let ReleaseAtCycles = [2,2];
+}
+
+// 2 cycle on 2 of I012345
+def ORYONWrite_2Cyc_I012345_I012345_RC :
+SchedWriteRes<[ORYONI012345, ORYONI012345]> {
+ let Latency = 2;
+ let ReleaseAtCycles = [2,2];
+}
+
+// 3 cycle on 2 of I45
+def ORYONWrite_3Cyc_I45_I45_RC :
+SchedWriteRes<[ORYONI45, ORYONI45]> {
+ let Latency = 3;
+ let ReleaseAtCycles = [2,2];
+}
+
+// 3 cycle on I45
+def ORYONWrite_3Cyc_I45 : SchedWriteRes<[ORYONI45]> {
+ let Latency = 3;
+}
+
+// 7 cycle on I2 32-bit integer division
+def ORYONWrite_7Cyc_I2_RC : SchedWriteRes<[ORYONI2]> {
+ let Latency = 7;
+ let ReleaseAtCycles = [2];
+}
+
+// 9 cycle on I2 64-bit integer division
+def ORYONWrite_9Cyc_I2_RC : SchedWriteRes<[ORYONI2]> {
+ let Latency = 9;
+ let ReleaseAtCycles = [2];
+}
+
+// LSU resource definition
+// need to define WriteLDAdr, WriteAdrAdr, WriteLDHi, WriteSTX
+// 4 cycle on LS(P6789)
+def ORYONWrite_4Cyc_LD : SchedWriteRes<[ORYONLD]> {
+ let Latency = 4;
+}
+
+// 4 cycle for Post/Pre inc/dec access, also covers all pair loads Post/Pre
+def ORYONWrite_4Cyc_LD_I012345 : SchedWriteRes<[ORYONLD, ORYONI012345]> {
+ let Latency = 4;
+}
+
+// 5 (4+1) for VXU SIMD access/could also include FP
+// resource might not be correct, as VXU resource not included
+def ORYONWrite_5Cyc_LD : SchedWriteRes<[ORYONLD]> {
+ let Latency = 5;
+}
+
+def ORYONWrite_5Cyc_2Uops_LD : SchedWriteRes<[ORYONLD]> {
+ let Latency = 5;
+ let NumMicroOps = 2;
+}
+
+def ORYONWrite_5Cyc_3Uops_LD : SchedWriteRes<[ORYONLD]> {
+ let Latency = 5;
+ let NumMicroOps = 3;
+}
+
+def ORYONWrite_5Cyc_4Uops_LD : SchedWriteRes<[ORYONLD]> {
+ let Latency = 5;
+ let NumMicroOps = 4;
+}
+
+def ORYONWrite_5Cyc_5Uops_LD : SchedWriteRes<[ORYONLD]> {
+ let Latency = 5;
+ let NumMicroOps = 5;
+}
+
+def ORYONWrite_5Cyc_6Uops_LD : SchedWriteRes<[ORYONLD]> {
+ let Latency = 5;
+ let NumMicroOps = 6;
+}
+
+def ORYONWrite_5Cyc_8Uops_LD : SchedWriteRes<[ORYONLD]> {
+ let Latency = 5;
+ let NumMicroOps = 8;
+}
+
+def ORYONWrite_5Cyc_10Uops_LD : SchedWriteRes<[ORYONLD]> {
+ let Latency = 5;
+ let NumMicroOps = 10;
+}
+
+// 6 cycle for Post/Pre inc/dec access
+def ORYONWrite_5Cyc_LD_I012345 : SchedWriteRes<[ORYONLD, ORYONI012345]> {
+ let Latency = 5;
+}
+
+def ORYONWrite_5Cyc_2Uops_LD_I012345 : SchedWriteRes<[ORYONLD, ORYONI012345]> {
+ let Latency = 5;
+ let NumMicroOps = 2;
+}
+
+def ORYONWrite_5Cyc_3Uops_LD_I012345 : SchedWriteRes<[ORYONLD, ORYONI012345]> {
+ let Latency = 5;
+ let NumMicroOps = 3;
+}
+
+def ORYONWrite_5Cyc_4Uops_LD_I012345 : SchedWriteRes<[ORYONLD, ORYONI012345]> {
+ let Latency = 5;
+ let NumMicroOps = 4;
+}
+
+def ORYONWrite_5Cyc_5Uops_LD_I012345 : SchedWriteRes<[ORYONLD, ORYONI012345]> {
+ let Latency = 5;
+ let NumMicroOps = 5;
+}
+
+def ORYONWrite_5Cyc_6Uops_LD_I012345 : SchedWriteRes<[ORYONLD, ORYONI012345]> {
+ let Latency = 5;
+ let NumMicroOps = 6;
+}
+
+def ORYONWrite_5Cyc_8Uops_LD_I012345 : SchedWriteRes<[ORYONLD, ORYONI012345]> {
+ let Latency = 5;
+ let NumMicroOps = 8;
+}
+
+def ORYONWrite_5Cyc_10Uops_LD_I012345 : SchedWriteRes<[ORYONLD, ORYONI012345]> {
+ let Latency = 5;
+ let NumMicroOps = 10;
+}
+
+// 1 cycle for all generic stores
+def ORYONWrite_1Cyc_ST : SchedWriteRes<[ORYONST]>;
+
+def ORYONWrite_1Cyc_2Uops_ST : SchedWriteRes<[ORYONST]> {
+ let NumMicroOps = 2;
+}
+
+def ORYONWrite_1Cyc_3Uops_ST : SchedWriteRes<[ORYONST]> {
+ let NumMicroOps = 3;
+}
+
+def ORYONWrite_1Cyc_4Uops_ST : SchedWriteRes<[ORYONST]> {
+ let NumMicroOps = 4;
+}
+
+def ORYONWrite_1Cyc_5Uops_ST : SchedWriteRes<[ORYONST]> {
+ let NumMicroOps = 5;
+}
+
+def ORYONWrite_1Cyc_6Uops_ST : SchedWriteRes<[ORYONST]> {
+ let NumMicroOps = 6;
+}
+
+def ORYONWrite_1Cyc_8Uops_ST : SchedWriteRes<[ORYONST]> {
+ let NumMicroOps = 8;
+}
+
+def ORYONWrite_1Cyc_10Uops_ST : SchedWriteRes<[ORYONST]> {
+ let NumMicroOps = 10;
+}
+
+// 1 cycle for neon write: float + ASIMD with Post/Pre Inc/Dec access
+// also includes Pair store until further informed
+def ORYONWrite_1Cyc_ST_I012345 : SchedWriteRes<[ORYONST, ORYONI012345]> {
+ let NumMicroOps = 3;
+}
+
+def ORYONWrite_1Cyc_2Uops_ST_I012345 : SchedWriteRes<[ORYONST, ORYONI012345]> {
+ let NumMicroOps = 2;
+}
+
+def ORYONWrite_1Cyc_3Uops_ST_I012345 : SchedWriteRes<[ORYONST, ORYONI012345]> {
+ let NumMicroOps = 3;
+}
+
+def ORYONWrite_1Cyc_4Uops_ST_I012345 : SchedWriteRes<[ORYONST, ORYONI012345]> {
+ let NumMicroOps = 4;
+}
+
+def ORYONWrite_1Cyc_5Uops_ST_I012345 : SchedWriteRes<[ORYONST, ORYONI012345]> {
+ let NumMicroOps = 5;
+}
+
+def ORYONWrite_1Cyc_6Uops_ST_I012345 : SchedWriteRes<[ORYONST, ORYONI012345]> {
+ let NumMicroOps = 6;
+}
+
+def ORYONWrite_1Cyc_8Uops_ST_I012345 : SchedWriteRes<[ORYONST, ORYONI012345]> {
+ let NumMicroOps = 8;
+}
+
+def ORYONWrite_1Cyc_10Uops_ST_I012345 : SchedWriteRes<[ORYONST, ORYONI012345]> {
+ let NumMicroOps = 10;
+}
+
+// VXU resource definition
+
+// I2V instruction has 1 uOp
+// I2v with convert has 2 uOps
+// all I2V, V2I's throughputs are 2
+// On VXU doc, p37 -- latencies and throughput
+// P41, resource taken, P42, uOps
+def ORYONWrite_I2V_4Cyc_I45 : SchedWriteRes<[ORYONI2V]> {
+ let Latency = 4;
+}
+
+// inline a FCVT, so add one more uOp
+def ORYONWrite_I2V_7Cyc_I45 : SchedWriteRes<[ORYONI2V]> {
+ let Latency = 7;
+ let NumMicroOps = 2;
+}
+
+// V2I move instruction has 1/2 uOps, P42 in VXU doc
+// Latency is 3, FCVT is also 3 cycle
+// move + convert is 6 (3+3) cycles
+// throughput is 2
+def ORYONWrite_V2I_3Cyc_FP01 : SchedWriteRes<[ORYONV2I]> {
+ let Latency = 3;
+}
+
+// inline a FCVT, so add one more uOp
+def ORYONWrite_V2I_6Cyc_FP01 : SchedWriteRes<[ORYONV2I]> {
+ let Latency = 6;
+ let NumMicroOps = 2;
+}
+
+def ORYONWrite_V2V_2Cyc_FP0123 : SchedWriteRes<[ORYONFP0123]> {
+ let Latency = 2;
+}
+
+def ORYONWrite_V2V_3Cyc_FP0123 : SchedWriteRes<[ORYONFP0123]> {
+ let Latency = 3;
+}
+
+def ORYONWrite_V2V_6Cyc_FP01 : SchedWriteRes<[ORYONFP0123]> {
+ let Latency = 6;
+ let NumMicroOps = 3;
+}
+
+def ORYONWrite_4Cyc_FP0123 : SchedWriteRes<[ORYONFP0123]> {
+ let Latency = 4;
+}
+
+def ORYONWrite_3Cyc_FP0 : SchedWriteRes<[ORYONFP0]> {
+ let Latency = 3;
+}
+
+def ORYONWrite_3Cyc_FP0123 : SchedWriteRes<[ORYONFP0123]> {
+ let Latency = 3;
+}
+
+def ORYONWrite_3Cyc_2Uops_FP0123 : SchedWriteRes<[ORYONFP0123]> {
+ let Latency = 3;
+ let NumMicroOps = 2;
+}
+
+def ORYONWrite_2Cyc_FP0123 : SchedWriteRes<[ORYONFP0123]> {
+ let Latency = 2;
+}
+
+def ORYONWrite_2Cyc_FP01 : SchedWriteRes<[ORYONFP01]> {
+ let Latency = 2;
+}
+
+// 2 cycle on FP1
+def ORYONWrite_2Cyc_FP1 : SchedWriteRes<[ORYONFP1]> {
+ let Latency = 2;
+}
+
+// 3 cycle on FP1
+def ORYONWrite_3Cyc_FP1 : SchedWriteRes<[ORYONFP1]> {
+ let Latency = 3;
+}
+
+// 4 cycle , 0.5 throughput on FP1
+def ORYONWrite_4Cyc_FP1_RC4 : SchedWriteRes<[ORYONFP1]> {
+ let Latency = 4;
+ let ReleaseAtCycles = [4];
+}
+
+// 5 cycle , 1 throughput on FP1
+def ORYONWrite_5Cyc_FP1 : SchedWriteRes<[ORYONFP1]> {
+ let Latency = 5;
+}
+
+// 8 cycle , 2 throughput on FP0123
+def ORYONWrite_8Cyc_FP0123_RC : SchedWriteRes<[ORYONFP0123]> {
+ let Latency = 8;
+ let ReleaseAtCycles = [2];
+}
+
+def ORYONWrite_6Cyc_FP3 : SchedWriteRes<[ORYONFP3]> {
+ let Latency = 6;
+}
+
+def ORYONWrite_7Cyc_FP3 : SchedWriteRes<[ORYONFP3]> {
+ let Latency = 7;
+}
+
+def ORYONWrite_8Cyc_FP3 : SchedWriteRes<[ORYONFP3]> {
+ let Latency = 8;
+}
+
+def ORYONWrite_9Cyc_FP3 : SchedWriteRes<[ORYONFP3]> {
+ let Latency = 9;
+}
+
+def ORYONWrite_10Cyc_FP3 : SchedWriteRes<[ORYONFP3]> {
+ let Latency = 10;
+}
+
+def ORYONWrite_8Cyc_FP3_RC : SchedWriteRes<[ORYONFP3]> {
+ let Latency = 8;
+ let ReleaseAtCycles = [2];
+}
+
+def ORYONWrite_10Cyc_FP3_RC : SchedWriteRes<[ORYONFP3]> {
+ let Latency = 10;
+ let ReleaseAtCycles = [2];
+}
+
+def ORYONWrite_13Cyc_FP3_RC : SchedWriteRes<[ORYONFP3]> {
+ let Latency = 13;
+ let ReleaseAtCycles = [2];
+}
+
+def ORYONWrite_4Cyc_FP0123_RC :
+SchedWriteRes<[ORYONFP0123]> {
+ let Latency = 4;
+ let ReleaseAtCycles = [2];
+}
+
+def ORYONWrite_4Cyc_FP0123_FP0123_RC :
+SchedWriteRes<[ORYONFP0123, ORYONFP0123]> {
+ let Latency = 4;
+ let NumMicroOps = 2;
+ let ReleaseAtCycles = [2,2];
+}
+
+def ORYONWrite_4Cyc_FP0123_FP0123_FP0123_RC :
+SchedWriteRes<[ORYONFP0123, ORYONFP0123, ORYONFP0123]> {
+ let Latency = 4;
+ let NumMicroOps = 3;
+ let ReleaseAtCycles = [3,3,3];
+}
+
+def ORYONWrite_6Cyc_FP0123_FP0123_FP0123_FP0123_RC :
+SchedWriteRes<[ORYONFP0123, ORYONFP0123, ORYONFP0123, ORYONFP0123]> {
+ let Latency = 6;
+ let NumMicroOps = 4;
+ let ReleaseAtCycles = [6,6,6,6];
+}
+
+//===----------------------------------------------------------------------===//
+// Instruction Tables in IXU
+//===----------------------------------------------------------------------===//
+
+//---
+// Arithmetic Instructions
+//---
+
+//1, 1, 6
+def : InstRW<[ORYONWrite_1Cyc_I012345],
+ (instregex "^ADD(W|X)r(i|r|x)", "^SUB(W|X)r(i|r|x)")>;
+
+//2,2,3
+def : InstRW<[ORYONWrite_2Cyc_I012345_I012345_RC],
+ (instregex "^ADD(W|X)rs", "^SUB(W|X)rs")>;
+
+//1,1,4 alias CMP, CMN on page 75
+def : InstRW<[ORYONWrite_1Cyc_I0123],
+ (instregex "^ADDS(W|X)r(i|r|x)(64)?", "^SUBS(W|X)r(i|r|x)")>;
+
+//2,2,2 alias CMP, CMN on page 75
+def : InstRW<[ORYONWrite_2Cyc_I0123_I0123_RC],
+ (instregex "^ADDS(W|X)rs", "^SUBS(W|X)rs")>;
+
+//1,1,4
+def : InstRW<[ORYONWrite_1Cyc_I0123],
+ (instregex "^ADC(W|X)r","^SBC(W|X)r",
+ "^ADCS(W|X)r","^SBCS(W|X)r")>;
+
+//1,1,2
+def : InstRW<[ORYONWrite_1Cyc_2Uops_I01],
+ (instrs ADR,ADRP)>;
+
+//1,1,4
+def : InstRW<[ORYONWrite_1Cyc_I0123],
+ (instregex "^CSEL(W|X)r", "^CSINV(W|X)r",
+ "^CSNEG(W|X)r", "^CSINC(W|X)r")>;
+
+//---
+//Compare Instruciton
+//---
+
+// We have CCMP, CCMN as LLVM DAG node
+// CMP is an alias of SUBS as above
+// CMN is an alias of ADDS as above
+// We also have no way to get shift compare node in LLVM
+//2,2,1.5 CMP, CMN
+
+//1,1,4
+def : InstRW<[ORYONWrite_1Cyc_I0123],
+ (instregex "^CCMP(W|X)(i|r)", "^CCMN(W|X)(i|r)")>;
+
+//---
+// Branch
+//---
+
+def : InstRW<[ORYONWrite_1Cyc_NONE], (instrs B)>;
+def : InstRW<[ORYONWrite_1Cyc_I01], (instrs BL)>;
+def : InstRW<[ORYONWrite_1Cyc_I01],
+ (instrs Bcc, CBZW, CBZX, CBNZW, CBNZX, TBZW, TBZX, TBNZW, TBNZX)>;
+def : InstRW<[ORYONWrite_1Cyc_I0], (instrs BR, BLR)>;
+def : InstRW<[ORYONWrite_1Cyc_I0], (instrs RET)>;
+
+// 3 uOp, 1 cycle for branch, 7 cycle for Authentication,
+// 1 cycle for updating link register
+// V8.3a PAC
+def : InstRW<[ORYONWrite_9Cyc_I012],
+ (instrs BLRAA, BLRAAZ, BLRAB, BLRABZ,
+ BRAA, BRAAZ, BRAB, BRABZ)>;
+def : InstRW<[ORYONWrite_9Cyc_I012], (instrs RETAA, RETAB, ERETAA, ERETAB)>;
+
+def : InstRW<[ORYONWrite_7Cyc_3Uops_I2], (instregex "^LDRAA", "^LDRAB")>;
+
+// Logical Instructions
+//---
+
+//1,1,4 TST is an alias of ANDS
+def : InstRW<[ORYONWrite_1Cyc_I0123],
+ (instregex "^ANDS(W|X)r(i|r|x)", "^BICS(W|X)r(i|r|x)")>;
+
+//2,2,2 TST shift is an alias
+def : InstRW<[ORYONWrite_2Cyc_I0123_I0123_RC],
+ (instregex "^ANDS(W|X)rs", "^BICS(W|X)rs")>;
+
+//1,1,6
+def : InstRW<[ORYONWrite_1Cyc_I012345],
+ (instregex "^AND(W|X)r(i|r|x)", "^EOR(W|X)r(i|r|x)",
+ "^ORR(W|X)r(i|r|x)", "^BIC(W|X)r(i|r|x)",
+ "^EON(W|X)r(i|r|x)", "^ORN(W|X)r(i|r|x)")>;
+
+//2,2,3
+def : InstRW<[ORYONWrite_2Cyc_I012345_I012345_RC],
+ (instregex "^AND(W|X)rs", "^EOR(W|X)rs", "^ORR(W|X)rs",
+ "^BIC(W|X)rs", "^EON(W|X)rs", "^ORN(W|X)rs")>;
+
+
+//---
+// Shift Instructions
+//---
+
+//1,1,6
+def : InstRW<[ORYONWrite_1Cyc_I012345],
+ (instregex "^ASRV(W|X)r", "^LSLV(W|X)r",
+ "^LSRV(W|X)r", "^RORV(W|X)r",
+ "RMIF")>;
+
+//---
+// Move-Data Bit-field and Sign_Extension Instructions
+//---
+
+//1,1,6
+def : InstRW<[ORYONWrite_1Cyc_I012345],
+ (instregex "^MOVK(W|X)i", "^MOVN(W|X)i",
+ "^MOVZ(W|X)i", "^SBFM(W|X)ri",
+ "^UBFM(W|X)ri", "^BFM(W|X)ri",
+ "^SXT(W|B|H|X)", "^UXT(H|B)")>;
+
+// COPY instruction is an LLVM internal DAG node, needs further study
+def : InstRW<[ORYONWrite_1Cyc_I012345], (instrs COPY)>;
+
+//---
+// Reverse Instructions
+//---
+
+//1,1,6
+def : InstRW<[ORYONWrite_1Cyc_I012345],
+ (instregex "^RBIT(W|X)r", "^REV(16|32|64)?(W|X)r")>;
+
+
+//---
+// Flag Manipulate Instructions
+//---
+
+//1,1,4
+def : InstRW<[ORYONWrite_1Cyc_I0123],
+ (instregex "^SETF8", "^SETF16", "^CFINV")>;
+
+//---
+// Miscellaneous Instructions
+//---
+
+//1,1,6
+def : InstRW<[ORYONWrite_1Cyc_I012345],
+ (instregex "^CLS(W|X)r$", "^CLZ(W|X)r$", "^EXTR(W|X)rri")>;
+
+
+//---
+// Multiply Instructions
+//---
+
+//1,3,2
+def : InstRW<[ORYONWrite_3Cyc_I45],
+ (instregex "^MADD(W|X)rrr", "^MSUB(W|X)rrr",
+ "^(S|U)MADDLrrr", "^(S|U)MSUBLrrr",
+ "^(S|U)MULHrr")>;
+
+//---
+// Divide Instructions
+//---
+
+def : InstRW<[ORYONWrite_7Cyc_I2_RC],
+ (instregex "^(S|U)DIVWr")>;
+
+def : InstRW<[ORYONWrite_9Cyc_I2_RC],
+ (instregex "^(S|U)DIVXr")>;
+
+
+//---
+// Cryptgraphy Instructions
+//
+//1,3,1 on I2
+def : InstRW<[ORYONWrite_3Cyc_I2],
+ (instregex "^CRC32(B|H|W|X)rr", "^CRC32C(B|H|W|X)rr")>;
+
+//---
+// PAU instructions
+//---
+
+// on p47 of IXU document, we have 7 cycles for all PAU instructions
+// here we just assume all signing and pauth instructions are 7 cycles
+// assume all are 7 cycles here
+
+// signing instrucitons
+def : InstRW<[ORYONWrite_7Cyc_I2], (instrs PACIA, PACIB,
+ PACDA, PACDB,
+ PACIZA, PACIZB,
+ PACDZA, PACDZB,
+ PACGA)>;
+// authentication instrucitons
+def : InstRW<[ORYONWrite_7Cyc_I2], (instrs AUTIA, AUTIB,
+ AUTDA, AUTDB,
+ AUTIZA, AUTIZB,
+ AUTDZA, AUTDZB)>;
+def : InstRW<[ORYONWrite_7Cyc_I2], (instrs XPACI, XPACD)>;
+
+//===----------------------------------------------------------------------===//
+// Instruction Tables in LSU
+//===----------------------------------------------------------------------===//
+
+// 4 cycle Load-to-use from L1D$
+// Neon load with 5 cycle
+// 6 cycle to STA ?
+// STD cycle ?
+// NEON STD + 2
+
+// Load Instructions
+// FP Load Instructions
+
+// Load pair, immed pre-index, normal
+// Load pair, immed pre-index, signed words
+// Load pair, immed post-index, normal
+// Load pair, immed post-index, signed words
+// NOTE: Handled by WriteLD, WriteLDHi, WriteAdr.
+
+def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDNPDi)>;
+def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDNPQi)>;
+def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDNPSi)>;
+def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDNPWi)>;
+def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDNPXi)>;
+
+def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDPDi)>;
+def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDPQi)>;
+def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDPSi)>;
+def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDPSWi)>;
+def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDPWi)>;
+def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDPXi)>;
+
+def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDRBui)>;
+def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDRDui)>;
+def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDRHui)>;
+def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDRQui)>;
+def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDRSui)>;
+
+def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDRDl)>;
+def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDRQl)>;
+def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDRWl)>;
+def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDRXl)>;
+
+def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDTRBi)>;
+def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDTRHi)>;
+def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDTRWi)>;
+def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDTRXi)>;
+
+def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDTRSBWi)>;
+def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDTRSBXi)>;
+def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDTRSHWi)>;
+def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDTRSHXi)>;
+def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDTRSWi)>;
+
+def : InstRW<[ORYONWrite_4Cyc_LD_I012345],
+ (instrs LDPDpre)>;
+def : InstRW<[ORYONWrite_4Cyc_LD_I012345],
+ (instrs LDPQpre)>;
+def : InstRW<[ORYONWrite_4Cyc_LD_I012345],
+ (instrs LDPSpre)>;
+def : InstRW<[ORYONWrite_4Cyc_LD_I012345],
+ (instrs LDPWpre)>;
+
+def : InstRW<[ORYONWrite_4Cyc_LD_I012345], (instrs LDRBpre)>;
+def : InstRW<[ORYONWrite_4Cyc_LD_I012345], (instrs LDRDpre)>;
+def : InstRW<[ORYONWrite_4Cyc_LD_I012345], (instrs LDRHpre)>;
+def : InstRW<[ORYONWrite_4Cyc_LD_I012345], (instrs LDRQpre)>;
+def : InstRW<[ORYONWrite_4Cyc_LD_I012345], (instrs LDRSpre)>;
+def : InstRW<[ORYONWrite_4Cyc_LD_I012345], (instrs LDRWpre)>;
+def : InstRW<[ORYONWrite_4Cyc_LD_I012345], (instrs LDRXpre)>;
+
+def : InstRW<[ORYONWrite_4Cyc_LD_I012345], (instrs LDRSBWpre)>;
+def : InstRW<[ORYONWrite_4Cyc_LD_I012345], (instrs LDRSBXpre)>;
+def : InstRW<[ORYONWrite_4Cyc_LD_I012345], (instrs LDRSBWpost)>;
+def : InstRW<[ORYONWrite_4Cyc_LD_I012345], (instrs LDRSBXpost)>;
+
+def : InstRW<[ORYONWrite_4Cyc_LD_I012345], (instrs LDRSHWpre)>;
+def : InstRW<[ORYONWrite_4Cyc_LD_I012345], (instrs LDRSHXpre)>;
+def : InstRW<[ORYONWrite_4Cyc_LD_I012345], (instrs LDRSHWpost)>;
+def : InstRW<[ORYONWrite_4Cyc_LD_I012345], (instrs LDRSHXpost)>;
+
+def : InstRW<[ORYONWrite_4Cyc_LD_I012345], (instrs LDRBBpre)>;
+def : InstRW<[ORYONWrite_4Cyc_LD_I012345], (instrs LDRBBpost)>;
+
+def : InstRW<[ORYONWrite_4Cyc_LD_I012345], (instrs LDRHHpre)>;
+def : InstRW<[ORYONWrite_4Cyc_LD_I012345], (instrs LDRHHpost)>;
+
+def : InstRW<[ORYONWrite_4Cyc_LD_I012345],
+ (instrs LDPDpost)>;
+def : InstRW<[ORYONWrite_4Cyc_LD_I012345],
+ (instrs LDPQpost)>;
+def : InstRW<[ORYONWrite_4Cyc_LD_I012345],
+ (instrs LDPSpost)>;
+def : InstRW<[ORYONWrite_4Cyc_LD_I012345],
+ (instrs LDPWpost)>;
+def : InstRW<[ORYONWrite_4Cyc_LD_I012345],
+ (instrs LDPXpost)>;
+
+def : InstRW<[ORYONWrite_4Cyc_LD_I012345], (instrs LDRBpost)>;
+def : InstRW<[ORYONWrite_4Cyc_LD_I012345], (instrs LDRDpost)>;
+def : InstRW<[ORYONWrite_4Cyc_LD_I012345], (instrs LDRHpost)>;
+def : InstRW<[ORYONWrite_4Cyc_LD_I012345], (instrs LDRQpost)>;
+def : InstRW<[ORYONWrite_4Cyc_LD_I012345], (instrs LDRSpost)>;
+def : InstRW<[ORYONWrite_4Cyc_LD_I012345], (instrs LDRWpost)>;
+def : InstRW<[ORYONWrite_4Cyc_LD_I012345], (instrs LDRXpost)>;
+
+def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDRBroW)>;
+def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDRDroW)>;
+def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDRHroW)>;
+def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDRHHroW)>;
+def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDRQroW)>;
+def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDRSroW)>;
+def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDRSHWroW)>;
+def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDRSHXroW)>;
+def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDRWroW)>;
+def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDRXroW)>;
+
+def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDRBroX)>;
+def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDRDroX)>;
+def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDRHHroX)>;
+def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDRHroX)>;
+def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDRQroX)>;
+def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDRSroX)>;
+def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDRSHWroX)>;
+def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDRSHXroX)>;
+def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDRWroX)>;
+def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDRXroX)>;
+
+def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDURBi)>;
+def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDURBBi)>;
+def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDURDi)>;
+def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDURHi)>;
+def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDURHHi)>;
+def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDURQi)>;
+def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDURSi)>;
+def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDURXi)>;
+def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDURSBWi)>;
+def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDURSBXi)>;
+def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDURSHWi)>;
+def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDURSHXi)>;
+def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDURSWi)>;
+
+
+
+// Store register, immed post-index
+// NOTE: Handled by WriteST, ReadAdrBase
+
+// Store register, immed pre-index
+// NOTE: Handled by WriteST
+
+// Store pair, immed post-index, W-form
+// Store pair, immed post-indx, X-form
+// Store pair, immed pre-index, W-form
+// Store pair, immed pre-index, X-form
+// NOTE: Handled by WriteSTP.
+
+def : InstRW<[ORYONWrite_1Cyc_ST], (instrs STURBi)>;
+def : InstRW<[ORYONWrite_1Cyc_ST], (instrs STURBBi)>;
+def : InstRW<[ORYONWrite_1Cyc_ST], (instrs STURDi)>;
+def : InstRW<[ORYONWrite_1Cyc_ST], (instrs STURHi)>;
+def : InstRW<[ORYONWrite_1Cyc_ST], (instrs STURHHi)>;
+def : InstRW<[ORYONWrite_1Cyc_ST], (instrs STURQi)>;
+def : InstRW<[ORYONWrite_1Cyc_ST], (instrs STURSi)>;
+def : InstRW<[ORYONWrite_1Cyc_ST], (instrs STURWi)>;
+def : InstRW<[ORYONWrite_1Cyc_ST], (instrs STURXi)>;
+
+def : InstRW<[ORYONWrite_1Cyc_ST], (instrs STTRBi)>;
+def : InstRW<[ORYONWrite_1Cyc_ST], (instrs STTRHi)>;
+def : InstRW<[ORYONWrite_1Cyc_ST], (instrs STTRWi)>;
+def : InstRW<[ORYONWrite_1Cyc_ST], (instrs STTRXi)>;
+
+def : InstRW<[ORYONWrite_1Cyc_ST], (instrs STNPDi)>;
+def : InstRW<[ORYONWrite_1Cyc_ST], (instrs STNPQi)>;
+def : InstRW<[ORYONWrite_1Cyc_ST], (instrs STNPXi)>;
+def : InstRW<[ORYONWrite_1Cyc_ST], (instrs STNPWi)>;
+
+def : InstRW<[ORYONWrite_1Cyc_ST], (instrs STPDi)>;
+def : InstRW<[ORYONWrite_1Cyc_ST], (instrs STPQi)>;
+def : InstRW<[ORYONWrite_1Cyc_ST], (instrs STPXi)>;
+def : InstRW<[ORYONWrite_1Cyc_ST], (instrs STPWi)>;
+
+def : InstRW<[ORYONWrite_1Cyc_ST], (instrs STRBui)>;
+def : InstRW<[ORYONWrite_1Cyc_ST], (instrs STRDui)>;
+def : InstRW<[ORYONWrite_1Cyc_ST], (instrs STRHui)>;
+def : InstRW<[ORYONWrite_1Cyc_ST], (instrs STRQui)>;
+def : InstRW<[ORYONWrite_1Cyc_ST], (instrs STRXui)>;
+def : InstRW<[ORYONWrite_1Cyc_ST], (instrs STRWui)>;
+
+def : InstRW<[ORYONWrite_1Cyc_ST_I012345],
+ (instrs STPDpre, STPDpost)>;
+def : InstRW<[ORYONWrite_1Cyc_ST_I012345],
+ (instrs STPSpre, STPSpost)>;
+def : InstRW<[ORYONWrite_1Cyc_ST_I012345],
+ (instrs STPWpre, STPWpost)>;
+def : InstRW<[ORYONWrite_1Cyc_ST_I012345],
+ (instrs STPXpre, STPXpost)>;
+
+def : InstRW<[ORYONWrite_1Cyc_ST_I012345],
+ (instrs STRBpre, STRBpost)>;
+def : InstRW<[ORYONWrite_1Cyc_ST_I012345],
+ (instrs STRBBpre, STRBBpost)>;
+def : InstRW<[ORYONWrite_1Cyc_ST_I012345],
+ (instrs STRDpre, STRDpost)>;
+def : InstRW<[ORYONWrite_1Cyc_ST_I012345],
+ (instrs STRHpre, STRHpost)>;
+def : InstRW<[ORYONWrite_1Cyc_ST_I012345],
+ (instrs STRHHpre, STRHHpost)>;
+def : InstRW<[ORYONWrite_1Cyc_ST_I012345],
+ (instrs STRQpre, STRQpost)>;
+def : InstRW<[ORYONWrite_1Cyc_ST_I012345],
+ (instrs STRSpre, STRSpost)>;
+def : InstRW<[ORYONWrite_1Cyc_ST_I012345],
+ (instrs STRWpre, STRWpost)>;
+def : InstRW<[ORYONWrite_1Cyc_ST_I012345],
+ (instrs STRXpre, STRXpost)>;
+
+def : InstRW<[ORYONWrite_1Cyc_ST],
+ (instrs STRBroW, STRBroX)>;
+def : InstRW<[ORYONWrite_1Cyc_ST],
+ (instrs STRDroW, STRDroX)>;
+def : InstRW<[ORYONWrite_1Cyc_ST],
+ (instrs STRHroW, STRHroX)>;
+def : InstRW<[ORYONWrite_1Cyc_ST],
+ (instrs STRHHroW, STRHHroX)>;
+def : InstRW<[ORYONWrite_1Cyc_ST],
+ (instrs STRQroW, STRQroX)>;
+def : InstRW<[ORYONWrite_1Cyc_ST],
+ (instrs STRSroW, STRSroX)>;
+def : InstRW<[ORYONWrite_1Cyc_ST],
+ (instrs STRWroW, STRWroX)>;
+def : InstRW<[ORYONWrite_1Cyc_ST],
+ (instrs STRXroW, STRXroX)>;
+
+// ASIMD Load instructions, 4 cycle access + 2 cycle NEON access
+// ASIMD load, 1 element, multiple, 1 reg, D-form 1uOps
+// ASIMD load, 1 element, multiple, 1 reg, Q-form 1uOps
+def : InstRW<[ORYONWrite_5Cyc_LD],
+ (instregex "^LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+
+def : InstRW<[ORYONWrite_5Cyc_LD_I012345],
+ (instregex "^LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+
+// ASIMD load, 1 element, multiple, 2 reg, D-form 3 uOps
+// ASIMD load, 1 element, multiple, 2 reg, Q-form 2 uOps
+def : InstRW<[ORYONWrite_5Cyc_3Uops_LD],
+ (instregex "^LD1Twov(8b|4h|2s|1d)$")>;
+
+def : InstRW<[ORYONWrite_5Cyc_2Uops_LD],
+ (instregex "^LD1Twov(16b|8h|4s|2d)$")>;
+
+def : InstRW<[ORYONWrite_5Cyc_3Uops_LD_I012345],
+ (instregex "^LD1Twov(8b|4h|2s|1d)_POST$")>;
+
+def : InstRW<[ORYONWrite_5Cyc_2Uops_LD_I012345],
+ (instregex "^LD1Twov(16b|8h|4s|2d)_POST$")>;
+
+// ASIMD load, 1 element, multiple, 3 reg, D-form 4 uOps
+// ASIMD load, 1 element, multiple, 3 reg, Q-form 3 uOps
+def : InstRW<[ORYONWrite_5Cyc_4Uops_LD],
+ (instregex "^LD1Threev(8b|4h|2s|1d)$")>;
+
+def : InstRW<[ORYONWrite_5Cyc_3Uops_LD],
+ (instregex "^LD1Threev(16b|8h|4s|2d)$")>;
+
+def : InstRW<[ORYONWrite_5Cyc_4Uops_LD_I012345],
+ (instregex "^LD1Threev(8b|4h|2s|1d)_POST$")>;
+
+def : InstRW<[ORYONWrite_5Cyc_3Uops_LD_I012345],
+ (instregex "^LD1Threev(16b|8h|4s|2d)_POST$")>;
+
+// ASIMD load, 1 element, multiple, 4 reg, D-form 6 uOps
+// ASIMD load, 1 element, multiple, 4 reg, Q-form 4 uOps
+def : InstRW<[ORYONWrite_5Cyc_6Uops_LD],
+ (instregex "^LD1Fourv(8b|4h|2s|1d)$")>;
+def : InstRW<[ORYONWrite_5Cyc_4Uops_LD],
+ (instregex "^LD1Fourv(16b|8h|4s|2d)$")>;
+
+def : InstRW<[ORYONWrite_5Cyc_6Uops_LD_I012345],
+ (instregex "^LD1Fourv(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[ORYONWrite_5Cyc_4Uops_LD_I012345],
+ (instregex "^LD1Fourv(16b|8h|4s|2d)_POST$")>;
+
+// ASIMD load, 1 element, one lane, B/H/S 2uOps
+// ASIMD load, 1 element, one lane, D 2UOps
+def : InstRW<[ORYONWrite_5Cyc_2Uops_LD], (instregex "^LD1i(8|16|32|64)$")>;
+def : InstRW<[ORYONWrite_5Cyc_2Uops_LD_I012345],
+ (instregex "^LD1i(8|16|32|64)_POST$")>;
+
+// ASIMD load, 1 element, all lanes, D-form, B/H/S 2uOps
+// ASIMD load, 1 element, all lanes, D-form, D 2uOps
+// ASIMD load, 1 element, all lanes, Q-form 2uOps
+def : InstRW<[ORYONWrite_5Cyc_2Uops_LD],
+ (instregex "^LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[ORYONWrite_5Cyc_2Uops_LD_I012345],
+ (instregex "^LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+
+// ASIMD load, 2 element, multiple, D-form, B/H/S 3 uOps
+// ASIMD load, 2 element, multiple, Q-form, D 4 uOps
+def : InstRW<[ORYONWrite_5Cyc_3Uops_LD],
+ (instregex "^LD2Twov(8b|4h|2s)$")>;
+def : InstRW<[ORYONWrite_5Cyc_4Uops_LD],
+ (instregex "^LD2Twov(16b|8h|4s|2d)$")>;
+def : InstRW<[ORYONWrite_5Cyc_3Uops_LD_I012345],
+ (instregex "^LD2Twov(8b|4h|2s)_POST$")>;
+def : InstRW<[ORYONWrite_5Cyc_4Uops_LD_I012345],
+ (instregex "^LD2Twov(16b|8h|4s|2d)_POST$")>;
+
+// ASIMD load, 2 element, one lane, B/H 3 uOps
+// ASIMD load, 2 element, one lane, S 3 uOps
+// ASIMD load, 2 element, one lane, D 3 uOps
+def : InstRW<[ORYONWrite_5Cyc_3Uops_LD], (instregex "^LD2i(8|16|32|64)$")>;
+def : InstRW<[ORYONWrite_5Cyc_3Uops_LD_I012345],
+ (instregex "^LD2i(8|16|32|64)_POST$")>;
+
+// ASIMD load, 2 element, all lanes, D-form, B/H/S 3 uOps
+// ASIMD load, 2 element, all lanes, D-form, D 3 uOps
+// ASIMD load, 2 element, all lanes, Q-form 3 uOps
+def : InstRW<[ORYONWrite_5Cyc_3Uops_LD],
+ (instregex "^LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[ORYONWrite_5Cyc_3Uops_LD_I012345],
+ (instregex "^LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+
+// ASIMD load, 3 element, multiple, D-form, B/H/S 5 uOps
+// ASIMD load, 3 element, multiple, Q-form, B/H/S 6 uOps
+// ASIMD load, 3 element, multiple, Q-form, D 6 uOps
+def : InstRW<[ORYONWrite_5Cyc_5Uops_LD],
+ (instregex "^LD3Threev(8b|4h|2s)$")>;
+def : InstRW<[ORYONWrite_5Cyc_6Uops_LD],
+ (instregex "^LD3Threev(16b|8h|4s|2d)$")>;
+def : InstRW<[ORYONWrite_5Cyc_5Uops_LD_I012345],
+ (instregex "^LD3Threev(8b|4h|2s)_POST$")>;
+def : InstRW<[ORYONWrite_5Cyc_6Uops_LD_I012345],
+ (instregex "^LD3Threev(16b|8h|4s|2d)_POST$")>;
+
+// ASIMD load, 3 element, one lone, B/H 4 uOps
+// ASIMD load, 3 element, one lane, S 4 uOps
+// ASIMD load, 3 element, one lane, D 5 uOps
+def : InstRW<[ORYONWrite_5Cyc_4Uops_LD], (instregex "^LD3i(8|16|32)$")>;
+def : InstRW<[ORYONWrite_5Cyc_5Uops_LD], (instregex "^LD3i(64)$")>;
+def : InstRW<[ORYONWrite_5Cyc_4Uops_LD_I012345],
+ (instregex "^LD3i(8|16|32)_POST$")>;
+def : InstRW<[ORYONWrite_5Cyc_5Uops_LD_I012345],
+ (instregex "^LD3i(64)_POST$")>;
+
+// ASIMD load, 3 element, all lanes, D-form, B/H/S 4 uOps
+// ASIMD load, 3 element, all lanes, D-form, D 5 uOps
+// ASIMD load, 3 element, all lanes, Q-form, B/H/S 4 uOps
+// ASIMD load, 3 element, all lanes, Q-form, D 5 uOps
+def : InstRW<[ORYONWrite_5Cyc_4Uops_LD],
+ (instregex "^LD3Rv(8b|4h|2s|16b|8h|4s)$")>;
+def : InstRW<[ORYONWrite_5Cyc_5Uops_LD],
+ (instregex "^LD3Rv(1d|2d)$")>;
+def : InstRW<[ORYONWrite_5Cyc_4Uops_LD_I012345],
+ (instregex "^LD3Rv(8b|4h|2s|16b|8h|4s)_POST$")>;
+def : InstRW<[ORYONWrite_5Cyc_5Uops_LD_I012345],
+ (instregex "^LD3Rv(1d|2d)_POST$")>;
+
+// ASIMD load, 4 element, multiple, D-form, B/H/S 6 uOps
+// ASIMD load, 4 element, multiple, Q-form, B/H/S 10 uOps
+// ASIMD load, 4 element, multiple, Q-form, D 8 uOps
+def : InstRW<[ORYONWrite_5Cyc_6Uops_LD],
+ (instregex "^LD4Fourv(8b|4h|2s)$")>;
+def : InstRW<[ORYONWrite_5Cyc_10Uops_LD],
+ (instregex "^LD4Fourv(16b|8h|4s)$")>;
+def : InstRW<[ORYONWrite_5Cyc_8Uops_LD],
+ (instregex "^LD4Fourv(2d)$")>;
+def : InstRW<[ORYONWrite_5Cyc_6Uops_LD_I012345],
+ (instregex "^LD4Fourv(8b|4h|2s)_POST$")>;
+def : InstRW<[ORYONWrite_5Cyc_10Uops_LD_I012345],
+ (instregex "^LD4Fourv(16b|8h|4s)_POST$")>;
+def : InstRW<[ORYONWrite_5Cyc_8Uops_LD_I012345],
+ (instregex "^LD4Fourv(2d)_POST$")>;
+
+// ASIMD load, 4 element, one lane, B/H 5 uOps
+// ASIMD load, 4 element, one lane, S 5 uOps
+// ASIMD load, 4 element, one lane, D 6 uOps
+def : InstRW<[ORYONWrite_5Cyc_5Uops_LD], (instregex "^LD4i(8|16|32)$")>;
+def : InstRW<[ORYONWrite_5Cyc_6Uops_LD], (instregex "^LD4i(64)$")>;
+def : InstRW<[ORYONWrite_5Cyc_5Uops_LD_I012345],
+ (instregex "^LD4i(8|16|32)_POST$")>;
+def : InstRW<[ORYONWrite_5Cyc_6Uops_LD_I012345],
+ (instregex "^LD4i(64)_POST$")>;
+
+// ASIMD load, 4 element, all lanes, D-form, B/H/S 5 uOps
+// ASIMD load, 4 element, all lanes, D-form, D 6 uOps
+// ASIMD load, 4 element, all lanes, Q-form, B/H/S 5 uOps
+// ASIMD load, 4 element, all lanes, Q-form, D 6 uOps
+def : InstRW<[ORYONWrite_5Cyc_5Uops_LD],
+ (instregex "^LD4Rv(8b|4h|2s|16b|8h|4s)$")>;
+def : InstRW<[ORYONWrite_5Cyc_6Uops_LD],
+ (instregex "^LD4Rv(1d|2d)$")>;
+def : InstRW<[ORYONWrite_5Cyc_5Uops_LD_I012345],
+ (instregex "^LD4Rv(8b|4h|2s|16b|8h|4s)_POST$")>;
+def : InstRW<[ORYONWrite_5Cyc_6Uops_LD_I012345],
+ (instregex "^LD4Rv(1d|2d)_POST$")>;
+
+// ASIMD Store Instructions
+// ASIMD store, 1 element, multiple, 1 reg, D-form 1 uOps
+// ASIMD store, 1 element, multiple, 1 reg, Q-form 1 uops
+def : InstRW<[ORYONWrite_1Cyc_ST],
+ (instregex "^ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[ORYONWrite_1Cyc_ST_I012345],
+ (instregex "^ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+
+// ASIMD store, 1 element, multiple, 2 reg, D-form 2 uOps
+// ASIMD store, 1 element, multiple, 2 reg, Q-form 2 uOps
+def : InstRW<[ORYONWrite_1Cyc_2Uops_ST],
+ (instregex "^ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[ORYONWrite_1Cyc_2Uops_ST_I012345],
+ (instregex "^ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+
+// ASIMD store, 1 element, multiple, 3 reg, D-form 3 uOps
+// ASIMD store, 1 element, multiple, 3 reg, Q-form 3 uOps
+def : InstRW<[ORYONWrite_1Cyc_3Uops_ST],
+ (instregex "^ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[ORYONWrite_1Cyc_3Uops_ST_I012345],
+ (instregex "^ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+
+// ASIMD store, 1 element, multiple, 4 reg, D-form 4 uOps
+// ASIMD store, 1 element, multiple, 4 reg, Q-form 4 uOps
+def : InstRW<[ORYONWrite_1Cyc_4Uops_ST],
+ (instregex "^ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[ORYONWrite_1Cyc_4Uops_ST_I012345],
+ (instregex "^ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+
+// ASIMD store, 1 element, one lane, B/H/S 2 uOps
+// ASIMD store, 1 element, one lane, D 2 uOps
+def : InstRW<[ORYONWrite_1Cyc_2Uops_ST],
+ (instregex "^ST1i(8|16|32|64)$")>;
+def : InstRW<[ORYONWrite_1Cyc_2Uops_ST_I012345],
+ (instregex "^ST1i(8|16|32|64)_POST$")>;
+
+// ASIMD store, 2 element, multiple, D-form, B/H/S 2 uOps
+// ASIMD store, 2 element, multiple, Q-form, B/H/S 4 uOps
+// ASIMD store, 2 element, multiple, Q-form, D 4 uOps
+def : InstRW<[ORYONWrite_1Cyc_2Uops_ST],
+ (instregex "^ST2Twov(8b|4h|2s)$")>;
+def : InstRW<[ORYONWrite_1Cyc_4Uops_ST],
+ (instregex "^ST2Twov(16b|8h|4s|2d)$")>;
+def : InstRW<[ORYONWrite_1Cyc_2Uops_ST_I012345],
+ (instregex "^ST2Twov(8b|4h|2s)_POST$")>;
+def : InstRW<[ORYONWrite_1Cyc_4Uops_ST_I012345],
+ (instregex "^ST2Twov(16b|8h|4s|2d)_POST$")>;
+
+// ASIMD store, 2 element, one lane, B/H/S 2 uOps
+// ASIMD store, 2 element, one lane, D 2 uOps
+def : InstRW<[ORYONWrite_1Cyc_2Uops_ST],
+ (instregex "^ST2i(8|16|32|64)$")>;
+def : InstRW<[ORYONWrite_1Cyc_2Uops_ST_I012345],
+ (instregex "^ST2i(8|16|32|64)_POST$")>;
+
+// ASIMD store, 3 element, multiple, D-form, B/H/S 4 uOps
+// ASIMD store, 3 element, multiple, Q-form, B/H/S 6 uOps
+// ASIMD store, 3 element, multiple, Q-form, D 6 uOps
+def : InstRW<[ORYONWrite_1Cyc_4Uops_ST],
+ (instregex "^ST3Threev(8b|4h|2s)$")>;
+def : InstRW<[ORYONWrite_1Cyc_6Uops_ST],
+ (instregex "^ST3Threev(16b|8h|4s|2d)$")>;
+def : InstRW<[ORYONWrite_1Cyc_4Uops_ST_I012345],
+ (instregex "^ST3Threev(8b|4h|2s)_POST$")>;
+def : InstRW<[ORYONWrite_1Cyc_6Uops_ST_I012345],
+ (instregex "^ST3Threev(16b|8h|4s|2d)_POST$")>;
+
+// ASIMD store, 3 element, one lane, B/H 2 uOps
+// ASIMD store, 3 element, one lane, S 2 uOps
+// ASIMD store, 3 element, one lane, D 4 uOps
+def : InstRW<[ORYONWrite_1Cyc_2Uops_ST], (instregex "^ST3i(8|16|32)$")>;
+def : InstRW<[ORYONWrite_1Cyc_4Uops_ST], (instregex "^ST3i(64)$")>;
+def : InstRW<[ORYONWrite_1Cyc_2Uops_ST_I012345],
+ (instregex "^ST3i(8|16|32)_POST$")>;
+def : InstRW<[ORYONWrite_1Cyc_4Uops_ST_I012345],
+ (instregex "^ST3i(64)_POST$")>;
+
+
+// ASIMD store, 4 element, multiple, D-form, B/H/S 5 uOps
+// ASIMD store, 4 element, multiple, Q-form, B/H/S 10 uOps
+// ASIMD store, 4 element, multiple, Q-form, D 8 uOps
+def : InstRW<[ORYONWrite_1Cyc_5Uops_ST],
+ (instregex "^ST4Fourv(8b|4h|2s)$")>;
+def : InstRW<[ORYONWrite_1Cyc_10Uops_ST],
+ (instregex "^ST4Fourv(16b|8h|4s)$")>;
+def : InstRW<[ORYONWrite_1Cyc_8Uops_ST],
+ (instregex "^ST4Fourv(2d)$")>;
+def : InstRW<[ORYONWrite_1Cyc_5Uops_ST_I012345],
+ (instregex "^ST4Fourv(8b|4h|2s)_POST$")>;
+def : InstRW<[ORYONWrite_1Cyc_10Uops_ST_I012345],
+ (instregex "^ST4Fourv(16b|8h|4s)_POST$")>;
+def : InstRW<[ORYONWrite_1Cyc_8Uops_ST_I012345],
+ (instregex "^ST4Fourv(2d)_POST$")>;
+
+// ASIMD store, 4 element, one lane, B/H 3 uOps
+// ASIMD store, 4 element, one lane, S 3 uOps
+// ASIMD store, 4 element, one lane, D 4 uOps
+def : InstRW<[ORYONWrite_1Cyc_3Uops_ST], (instregex "^ST4i(8|16|32)$")>;
+def : InstRW<[ORYONWrite_1Cyc_4Uops_ST], (instregex "^ST4i(64)$")>;
+def : InstRW<[ORYONWrite_1Cyc_3Uops_ST_I012345],
+ (instregex "^ST4i(8|16|32)_POST$")>;
+def : InstRW<[ORYONWrite_1Cyc_4Uops_ST_I012345],
+ (instregex "^ST4i(64)_POST$")>;
+
+
+//===----------------------------------------------------------------------===//
+// Instruction Tables in VXU
+//===----------------------------------------------------------------------===//
+// all uOps are not clearly written in the VXU document
+
+// I2V
+def : InstRW<[ORYONWrite_I2V_4Cyc_I45], (instregex "^FMOV[HSD][WX]r", "^FMOVDXHighr")>;
+
+// I2V with convert
+def : InstRW<[ORYONWrite_I2V_7Cyc_I45], (instregex "^[SU]CVTF[SU][XW][HSD]ri")>;
+
+// V2I
+def : InstRW<[ORYONWrite_V2I_3Cyc_FP01], (instregex "^FMOV[WX][HSD]r", "FMOVXDHighr")>;
+
+// V2I with convert 2nd [SU] necessary?
+def : InstRW<[ORYONWrite_V2I_6Cyc_FP01], (instregex "^FCVT[AMNPZ][SU][SU][XW][HSD]r")>;
+
+// float to float move immediate, row 7 in big chart
+def : InstRW<[ORYONWrite_V2V_2Cyc_FP0123], (instregex "^FMOV[HSD]r")>;
+def : InstRW<[ORYONWrite_V2V_2Cyc_FP0123], (instregex "^FMOV[HSD]i")>;
+
+// float to float conversion within VXU, precision conversion
+def : InstRW<[ORYONWrite_V2V_6Cyc_FP01], (instregex "^FJCVTZS")>;
+def : InstRW<[ORYONWrite_V2V_3Cyc_FP0123], (instregex "^FCVT[HSD][HSD]r",
+ "^FRINT(A|I|M|N|P|X|Z)(Sr|Dr)")>;
+
+// floating comparison write to NZCV
+def : InstRW<[ORYONWrite_2Cyc_FP01], (instregex "^FCMP(E)?[HSD]r[ir]")>;
+def : InstRW<[ORYONWrite_2Cyc_FP01], (instregex "^FCCMP(E)?[HSD]rr")>;
+
+// floating point conditional select
+def : InstRW<[ORYONWrite_2Cyc_FP0123], (instregex "^FCSEL")>;
+
+// floating multiply-add
+def : InstRW<[ORYONWrite_4Cyc_FP0123], (instregex "^(F|FN)MADD", "^(F|FN)MSUB")>;
+
+// floating unary, cycle/throughput? xls row14
+def : InstRW<[ORYONWrite_2Cyc_FP0123], (instregex "^F(ABS|NEG)[SD]r")>;
+
+//floating division/square root
+def : InstRW<[ORYONWrite_7Cyc_FP3], (instregex "^FDIVHrr")>;
+def : InstRW<[ORYONWrite_8Cyc_FP3], (instregex "^FDIVSrr")>;
+def : InstRW<[ORYONWrite_10Cyc_FP3], (instregex "^FDIVDrr")>;
+
+def : InstRW<[ORYONWrite_8Cyc_FP3_RC], (instregex "^FSQRTHr")>;
+def : InstRW<[ORYONWrite_10Cyc_FP3_RC], (instregex "^FSQRTSr")>;
+def : InstRW<[ORYONWrite_13Cyc_FP3_RC], (instregex "^FSQRTDr")>;
+
+//==========
+// SIMD move instructions
+//==========
+
+// ASIMD DUP element
+def : InstRW<[ORYONWrite_2Cyc_FP0123], (instregex "^DUPv.+lane")>;
+// ASIMD DUP general thoughput undecided, 3? FP0123
+// VXU doc, p42, 2 uOps
+def : InstRW<[ORYONWrite_3Cyc_2Uops_FP0123], (instregex "^DUPv.+gpr")>;
+
+// ASIMD insert, element to element
+def : InstRW<[ORYONWrite_2Cyc_FP0123], (instregex "^INSv.+lane")>;
+// ASIMD insert, gen reg 3? FP0123?
+def : InstRW<[ORYONWrite_3Cyc_2Uops_FP0123], (instregex "^INSv.+gpr")>;
+
+// ASIMD move, FP immed
+def : InstRW<[ORYONWrite_2Cyc_FP0123], (instregex "^FMOVv")>;
+
+// ASIMD transfer, element to gen reg
+def : InstRW<[ORYONWrite_2Cyc_FP0123], (instregex "^[SU]MOVv")>;
+
+//==========
+// SIMD arithmetic instructions
+//==========
+def : InstRW<[ORYONWrite_2Cyc_FP0123], (instregex "^ADDv", "^SUBv",
+ "^BIFv", "^BITv", "^BSLv",
+ "^ANDv", "^BICv", "^EORv",
+ "^ORRv", "^ORNv")>;
+
+
+def : InstRW<[ORYONWrite_3Cyc_FP0123], (instregex "^FABDv", "^FADDv", "^FSUBv")>;
+
+// floating division
+def : InstRW<[ORYONWrite_6Cyc_FP3], (instregex "^FDIVv.*16$")>;
+def : InstRW<[ORYONWrite_7Cyc_FP3], (instregex "^FDIVv.*32$")>;
+def : InstRW<[ORYONWrite_9Cyc_FP3], (instregex "^FDIVv.*64$")>;
+
+def : InstRW<[ORYONWrite_4Cyc_FP0123], (instregex "^FMUL(X)?v",
+ "^FRECPSv", "^FRSQRTSv")>;
+
+def : InstRW<[ORYONWrite_3Cyc_FP0123], (instregex "^MLAv","^MLSv", "^MULv",
+ "^PMULv", "UABAv")>;
+
+def : InstRW<[ORYONWrite_2Cyc_FP0123], (instregex "SABAv", "SABDv",
+ "^(SH|UH)(ADD|SUB)v",
+ "^S(MAX|MIN)v",
+ "^(SQ|UQ)(ADD|SUB)v",
+ "^(SQ|SQR|UQ|UQR)SHLv",
+ "^(SR|UR)HADDv",
+ "^(SR|UR)SHLv",
+ "^UABDv",
+ "^U(MAX|MIN)v")>;
+// IMAX or UMAX in the above line
+//==========
+// SIMD compare instructions
+//==========
+
+def : InstRW<[ORYONWrite_2Cyc_FP0123], (instregex "^CMEQv","^CMGEv","^CMGTv",
+ "^CMLEv","^CMLTv", "^CMHIv",
+ "^CMHSv",
+ "^FCMEQv", "^FCMGEv",
+ "^FCMGTv", "^FCMLEv",
+ "^FCMLTv",
+ "^FACGEv", "^FACGTv")>;
+
+//==========
+// SIMD widening and narrowing arithmetic instructions
+//==========
+// NO need to list ADDHN2, RADDHN2, RSUBHN2 as they are not distinguished
+// from ADDHN, RADDHN, RSUBHN in td file(v16i8, v8i16, v4i32).
+def : InstRW<[ORYONWrite_2Cyc_FP0123], (instregex "^ADDHNv",
+ "^SUBHNv",
+ "^RADDHNv",
+ "^RSUBHNv",
+ "^SABD(L|L2)v", "^UABD(L|L2)v",
+ "^(S|U)(ADD|SUB)(L|L2|W|W2)v")>;
+
+def : InstRW<[ORYONWrite_3Cyc_FP0123], (instregex "^PMUL(L|L2)v","^SABA(L|L2)v",
+ "^(S|U|SQ)(MLA|MSL|MUL)(L|L2)v")>;
+
+//==========
+// SIMD unary arithmetic instructions
+//==========
+//^MVNv is an alias of ^NOTv
+def : InstRW<[ORYONWrite_2Cyc_FP0123], (instregex "^ABSv", "^CLSv","^CLZv", "^CNTv",
+ "^NEGv", "^NOTv",
+ "^RBITv", "^REV(16|32|64)v",
+ "^SQ(ABS|NEG)v", "^SQ(XT|XTU)(N|N2)v",
+ "^(SU|US)QADDv",
+ "^UQXT(N|N2)v", "^XTN2?v")>;
+
+def : InstRW<[ORYONWrite_3Cyc_FP0123], (instregex "^FCVT(L|L2|N|N2|XN|XN2)v",
+ "^FRINT[AIMNPXZ]v",
+ "^FRSQRTEv",
+ "^(S|U)ADALPv",
+ "^(S|U)ADDLPv")>;
+
+
+def : InstRW<[ORYONWrite_3Cyc_FP0], (instregex "^URECPEv", "^URSQRTEv",
+ "^FRECPEv", "^FRECPXv")>;
+
+def : InstRW<[ORYONWrite_8Cyc_FP3_RC], (instregex "^FSQRTv.*16$")>;
+def : InstRW<[ORYONWrite_10Cyc_FP3_RC], (instregex "^FSQRTv.*32$")>;
+def : InstRW<[ORYONWrite_13Cyc_FP3_RC], (instregex "^FSQRTv.*64$")>;
+
+//==========
+// SIMD binary elememt arithmetic instructions
+//==========
+
+def : InstRW<[ORYONWrite_4Cyc_FP0123], (instregex "^FMLAv", "^FMLSv")>;
+
+def : InstRW<[ORYONWrite_3Cyc_FP0123], (instregex "^SQDMULHv",
+ "^SQRD(MLA|MLS|MUL)Hv")>;
+
+//==========
+// SIMD permute instructions
+//==========
+
+def : InstRW<[ORYONWrite_2Cyc_FP0123], (instregex "^EXTv", "^TRN(1|2)v",
+ "^UZP(1|2)v", "^ZIP(1|2)v")>;
+
+//==========
+// SIMD immediate instructions
+//==========
+
+def : InstRW<[ORYONWrite_2Cyc_FP0123], (instregex "^MOVIv", "^MVNIv")>;
+
+//==========
+// SIMD shift(immediate) instructions
+//==========
+def : InstRW<[ORYONWrite_2Cyc_FP0123], (instregex "^RSHR(N|N2)v", "^SHLv",
+ "^(SHL|SHR)(N|N2)v",
+ "^SLIv",
+ "^(SQ|SQR)SHR(U)?(N|N2)v",
+ "^(UQ|UQR)SHR(N|N2)v",
+ "^SQSHLUv",
+ "^SRIv",
+ "^(S|SR|U|UR)SHRv",
+ "^(S|SR|U|UR)SRAv",
+ "^(S|U)SHL(L|L2)v")>;
+
+//==========
+// SIMD floating-point and integer conversion instructions
+//==========
+// same as above conversion
+
+//==========
+// SIMD reduce (acoss vector lanes) instructions
+//==========
+
+def : InstRW<[ORYONWrite_2Cyc_FP0123], (instregex "^ADDVv",
+ "^(FMAX|FMIN)(V|NMV)v",
+ "^(S|U)ADDLVv",
+ "^(S|U)(MAX|MIN)Vv")>;
+//==========
+// SIMD pairwise arithmetic instructions
+//==========
+
+def : InstRW<[ORYONWrite_2Cyc_FP0123], (instregex "^ADDPv", "^FADDPv",
+ "^(FMAX|FMIN)(NMP|P)v",
+ "^(S|U)(MIN|MAX)Pv")>;
+//==========
+// SIMD dot prodcut instructions
+//==========
+
+def : InstRW<[ORYONWrite_3Cyc_FP0123], (instregex "^(U|S)DOTv")>;
+
+//==========
+// SIMD table lookup instructions
+//==========
+// TBL 1-reg/2-reg; TBX 1-reg, 1uOp, throughput=4 latency=2
+def : InstRW<[ORYONWrite_2Cyc_FP0123], (instrs TBLv8i8One, TBLv16i8One,
+ TBXv8i8One, TBXv16i8One,
+ TBLv8i8Two, TBLv16i8Two)>;
+
+// TBL 3-reg/4-reg, 3uops, throughtput=4/3=1.33 latency=4
+def : InstRW<[ORYONWrite_4Cyc_FP0123_FP0123_FP0123_RC],
+ (instrs TBLv8i8Three, TBLv16i8Three,
+ TBLv8i8Four, TBLv16i8Four)>;
+
+
+// TBX 2-reg 2 uOps, throughput=2 latency=4
+def : InstRW<[ORYONWrite_4Cyc_FP0123_FP0123_RC], (instrs TBXv8i8Two, TBXv16i8Two)>;
+
+// TBX 3-reg/4-reg, 4uOps, throughput=1, latency=6
+def : InstRW<[ORYONWrite_6Cyc_FP0123_FP0123_FP0123_FP0123_RC],
+ (instrs TBXv8i8Three, TBXv16i8Three,
+ TBXv8i8Four, TBXv16i8Four)>;
+
+
+//==========
+// SIMD complex number arithmetic instructions
+//==========
+
+def : InstRW<[ORYONWrite_4Cyc_FP0123], (instregex "^FCADDv", "^FCMLAv")>;
+
+//==========
+// SIMD cryptographic instructions
+//==========
+// 3,4 on IMLA, CRYP
+def : InstRW<[ORYONWrite_3Cyc_FP0123], (instregex "^AES[DE]",
+ "^SM3(TT1|TT2)(A|B)")>;
+
+// 2,4 on CRYP
+def : InstRW<[ORYONWrite_2Cyc_FP0123], (instregex "^AESI?MC",
+ "^EOR3",
+ "^RAX1",
+ "^XAR",
+ "^BCAX",
+ "^SM3SS1",
+ "^SM3PART(W1|W2)")>;
+// 5,1 on CRYP
+def : InstRW<[ORYONWrite_5Cyc_FP1], (instregex "^SM4E",
+ "^SM4EKEY")>;
+
+// 2,1 on CRYP
+def : InstRW<[ORYONWrite_2Cyc_FP1], (instregex "^SHA1(H|SU0|SU1)",
+ "^SHA256SU0",
+ "^SHA512(SU0|SU1)")>;
+
+// 3,1 on CRYP
+def : InstRW<[ORYONWrite_3Cyc_FP1], (instregex "^SHA256SU1",
+ "^SHA512(H|H2)")>;
+
+// 4,0.25 on CRYP
+def : InstRW<[ORYONWrite_4Cyc_FP1_RC4], (instregex "^SHA1(C|P|M)",
+ "^SHA256(H|H2)")>;
+
+//==========
+// SIMD v8.6 instructions
+//==========
+// 4,2 on IMLA
+def : InstRW<[ORYONWrite_4Cyc_FP0123_RC], (instregex "^(S|U|US)MMLA$")>;
+
+// 4,0.5 on IMLA
+def : InstRW<[ORYONWrite_8Cyc_FP0123_RC], (instregex "^BFMMLA$")>;
+
+// 4,0.5 on IMLA
+def : InstRW<[ORYONWrite_8Cyc_FP0123_RC], (instregex "^BFMLAL(B|T)")>;
+
+// 3,4
+def : InstRW<[ORYONWrite_3Cyc_FP0123], (instregex "^(US|SU)DOTv")>;
+
+// 3,1
+def : InstRW<[ORYONWrite_4Cyc_FP0123], (instregex "^BF(16)?DOTv")>;
+
+// 3,4
+def : InstRW<[ORYONWrite_3Cyc_FP0123], (instregex "^BFCVT(N|N2)?$")>;
+
+
+} // SchedModel = OryonModel
diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
index 8bc26eeef34d..93ea729e2550 100644
--- a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
+++ b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
@@ -299,6 +299,13 @@ void AArch64Subtarget::initializeProperties(bool HasMinSize) {
PrefLoopAlignment = Align(64);
MaxInterleaveFactor = 4;
break;
+ case Oryon:
+ CacheLineSize = 64;
+ PrefFunctionAlignment = Align(16);
+ MaxInterleaveFactor = 4;
+ PrefetchDistance = 128;
+ MinPrefetchStride = 1024;
+ break;
}
if (AArch64MinimumJumpTableEntries.getNumOccurrences() > 0 || !HasMinSize)
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index f49c73dc7951..9f5756fc7e40 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -58,6 +58,9 @@ static cl::opt<unsigned> InlineCallPenaltyChangeSM(
static cl::opt<bool> EnableOrLikeSelectOpt("enable-aarch64-or-like-select",
cl::init(true), cl::Hidden);
+static cl::opt<bool> EnableLSRCostOpt("enable-aarch64-lsr-cost-opt",
+ cl::init(true), cl::Hidden);
+
namespace {
class TailFoldingOption {
// These bitfields will only ever be set to something non-zero in operator=,
@@ -4216,3 +4219,19 @@ bool AArch64TTIImpl::shouldTreatInstructionLikeSelect(const Instruction *I) {
return true;
return BaseT::shouldTreatInstructionLikeSelect(I);
}
+
+bool AArch64TTIImpl::isLSRCostLess(const TargetTransformInfo::LSRCost &C1,
+ const TargetTransformInfo::LSRCost &C2) {
+ // AArch64 specific here is adding the number of instructions to the
+ // comparison (though not as the first consideration, as some targets do)
+ // along with changing the priority of the base additions.
+ // TODO: Maybe a more nuanced tradeoff between instruction count
+ // and number of registers? To be investigated at a later date.
+ if (EnableLSRCostOpt)
+ return std::tie(C1.NumRegs, C1.Insns, C1.NumBaseAdds, C1.AddRecCost,
+ C1.NumIVMuls, C1.ScaleCost, C1.ImmCost, C1.SetupCost) <
+ std::tie(C2.NumRegs, C2.Insns, C2.NumBaseAdds, C2.AddRecCost,
+ C2.NumIVMuls, C2.ScaleCost, C2.ImmCost, C2.SetupCost);
+
+ return TargetTransformInfoImplBase::isLSRCostLess(C1, C2);
+} \ No newline at end of file
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
index 2f44aaa3e26a..feec1a4289c3 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
@@ -425,6 +425,9 @@ public:
}
std::optional<unsigned> getMinPageSize() const { return 4096; }
+
+ bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1,
+ const TargetTransformInfo::LSRCost &C2);
};
} // end namespace llvm
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index 8e302786c746..d0d7a9dc1724 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -1534,6 +1534,12 @@ def FeatureISAVersion11_5_1 : FeatureSet<
FeatureVGPRSingleUseHintInsts,
Feature1_5xVGPRs])>;
+def FeatureISAVersion11_5_2 : FeatureSet<
+ !listconcat(FeatureISAVersion11_Common.Features,
+ [FeatureSALUFloatInsts,
+ FeatureDPPSrc1SGPR,
+ FeatureVGPRSingleUseHintInsts])>;
+
def FeatureISAVersion12 : FeatureSet<
[FeatureGFX12,
FeatureLDSBankCount32,
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp
index 625ac0230f16..2bdbf4151dd9 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp
@@ -1017,7 +1017,7 @@ public:
//
// TODO: We could filter out subgraphs that do not access LDS globals.
for (Function *F : KernelsThatAllocateTableLDS)
- removeFnAttrFromReachable(CG, F, "amdgpu-no-lds-kernel-id");
+ removeFnAttrFromReachable(CG, F, {"amdgpu-no-lds-kernel-id"});
}
DenseMap<Function *, GlobalVariable *> KernelToCreatedDynamicLDS =
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 9c94ca1e4708..17c961578382 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -57,6 +57,7 @@
#include "llvm/Transforms/HipStdPar/HipStdPar.h"
#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/IPO/AlwaysInliner.h"
+#include "llvm/Transforms/IPO/ExpandVariadics.h"
#include "llvm/Transforms/IPO/GlobalDCE.h"
#include "llvm/Transforms/IPO/Internalize.h"
#include "llvm/Transforms/Scalar.h"
@@ -992,6 +993,10 @@ void AMDGPUPassConfig::addIRPasses() {
if (isPassEnabled(EnableImageIntrinsicOptimizer))
addPass(createAMDGPUImageIntrinsicOptimizerPass(&TM));
+ // This can be disabled by passing ::Disable here or on the command line
+ // with --expand-variadics-override=disable.
+ addPass(createExpandVariadicsPass(ExpandVariadicsMode::Lowering));
+
// Function calls are not supported, so make sure we inline everything.
addPass(createAMDGPUAlwaysInlinePass());
addPass(createAlwaysInlinerLegacyPass());
diff --git a/llvm/lib/Target/AMDGPU/GCNProcessors.td b/llvm/lib/Target/AMDGPU/GCNProcessors.td
index 2ada981a77cd..d218ffeb1fec 100644
--- a/llvm/lib/Target/AMDGPU/GCNProcessors.td
+++ b/llvm/lib/Target/AMDGPU/GCNProcessors.td
@@ -295,7 +295,11 @@ def : ProcessorModel<"gfx1151", GFX11SpeedModel,
FeatureISAVersion11_5_1.Features
>;
-// [gfx1100, gfx1101, gfx1102, gfx1103, gfx1150, gfx1151]
+def : ProcessorModel<"gfx1152", GFX11SpeedModel,
+ FeatureISAVersion11_5_2.Features
+>;
+
+// [gfx1100, gfx1101, gfx1102, gfx1103, gfx1150, gfx1151, gfx1152]
def : ProcessorModel<"gfx11-generic", GFX11SpeedModel,
FeatureISAVersion11_Generic.Features
>;
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
index d7d6e00d2389..e805e964ffe4 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
@@ -113,6 +113,7 @@ StringRef AMDGPUTargetStreamer::getArchNameFromElfMach(unsigned ElfMach) {
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1103: AK = GK_GFX1103; break;
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1150: AK = GK_GFX1150; break;
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1151: AK = GK_GFX1151; break;
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1152: AK = GK_GFX1152; break;
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1200: AK = GK_GFX1200; break;
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1201: AK = GK_GFX1201; break;
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC: AK = GK_GFX9_GENERIC; break;
@@ -196,6 +197,7 @@ unsigned AMDGPUTargetStreamer::getElfMach(StringRef GPU) {
case GK_GFX1103: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1103;
case GK_GFX1150: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1150;
case GK_GFX1151: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1151;
+ case GK_GFX1152: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1152;
case GK_GFX1200: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1200;
case GK_GFX1201: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1201;
case GK_GFX9_GENERIC: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC;
diff --git a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
index c47eea20563d..8b42d4a1dee7 100644
--- a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
+++ b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
@@ -2052,9 +2052,6 @@ bool SILoadStoreOptimizer::promoteConstantOffsetToImm(
MemInfoMap &Visited,
SmallPtrSet<MachineInstr *, 4> &AnchorList) const {
- if (!(MI.mayLoad() ^ MI.mayStore()))
- return false;
-
if (!STM->hasFlatInstOffsets() || !SIInstrInfo::isFLAT(MI))
return false;
@@ -2065,10 +2062,6 @@ bool SILoadStoreOptimizer::promoteConstantOffsetToImm(
unsigned AS = SIInstrInfo::isFLATGlobal(MI) ? AMDGPUAS::GLOBAL_ADDRESS
: AMDGPUAS::FLAT_ADDRESS;
- if (MI.mayLoad() &&
- TII->getNamedOperand(MI, AMDGPU::OpName::vdata) != nullptr)
- return false;
-
if (AnchorList.count(&MI))
return false;
diff --git a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
index f178324dbbe2..5dc3457b5bfa 100644
--- a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
+++ b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
@@ -103,8 +103,6 @@ private:
MachineBasicBlock *emitEndCf(MachineInstr &MI);
- void lowerInitExec(MachineBasicBlock *MBB, MachineInstr &MI);
-
void findMaskOperands(MachineInstr &MI, unsigned OpNo,
SmallVectorImpl<MachineOperand> &Src) const;
@@ -709,95 +707,6 @@ MachineBasicBlock *SILowerControlFlow::process(MachineInstr &MI) {
return SplitBB;
}
-void SILowerControlFlow::lowerInitExec(MachineBasicBlock *MBB,
- MachineInstr &MI) {
- MachineFunction &MF = *MBB->getParent();
- const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
- bool IsWave32 = ST.isWave32();
-
- if (MI.getOpcode() == AMDGPU::SI_INIT_EXEC) {
- // This should be before all vector instructions.
- MachineInstr *InitMI = BuildMI(*MBB, MBB->begin(), MI.getDebugLoc(),
- TII->get(IsWave32 ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64), Exec)
- .addImm(MI.getOperand(0).getImm());
- if (LIS) {
- LIS->RemoveMachineInstrFromMaps(MI);
- LIS->InsertMachineInstrInMaps(*InitMI);
- }
- MI.eraseFromParent();
- return;
- }
-
- // Extract the thread count from an SGPR input and set EXEC accordingly.
- // Since BFM can't shift by 64, handle that case with CMP + CMOV.
- //
- // S_BFE_U32 count, input, {shift, 7}
- // S_BFM_B64 exec, count, 0
- // S_CMP_EQ_U32 count, 64
- // S_CMOV_B64 exec, -1
- Register InputReg = MI.getOperand(0).getReg();
- MachineInstr *FirstMI = &*MBB->begin();
- if (InputReg.isVirtual()) {
- MachineInstr *DefInstr = MRI->getVRegDef(InputReg);
- assert(DefInstr && DefInstr->isCopy());
- if (DefInstr->getParent() == MBB) {
- if (DefInstr != FirstMI) {
- // If the `InputReg` is defined in current block, we also need to
- // move that instruction to the beginning of the block.
- DefInstr->removeFromParent();
- MBB->insert(FirstMI, DefInstr);
- if (LIS)
- LIS->handleMove(*DefInstr);
- } else {
- // If first instruction is definition then move pointer after it.
- FirstMI = &*std::next(FirstMI->getIterator());
- }
- }
- }
-
- // Insert instruction sequence at block beginning (before vector operations).
- const DebugLoc DL = MI.getDebugLoc();
- const unsigned WavefrontSize = ST.getWavefrontSize();
- const unsigned Mask = (WavefrontSize << 1) - 1;
- Register CountReg = MRI->createVirtualRegister(&AMDGPU::SGPR_32RegClass);
- auto BfeMI = BuildMI(*MBB, FirstMI, DL, TII->get(AMDGPU::S_BFE_U32), CountReg)
- .addReg(InputReg)
- .addImm((MI.getOperand(1).getImm() & Mask) | 0x70000);
- if (LV)
- LV->recomputeForSingleDefVirtReg(InputReg);
- auto BfmMI =
- BuildMI(*MBB, FirstMI, DL,
- TII->get(IsWave32 ? AMDGPU::S_BFM_B32 : AMDGPU::S_BFM_B64), Exec)
- .addReg(CountReg)
- .addImm(0);
- auto CmpMI = BuildMI(*MBB, FirstMI, DL, TII->get(AMDGPU::S_CMP_EQ_U32))
- .addReg(CountReg, RegState::Kill)
- .addImm(WavefrontSize);
- if (LV)
- LV->getVarInfo(CountReg).Kills.push_back(CmpMI);
- auto CmovMI =
- BuildMI(*MBB, FirstMI, DL,
- TII->get(IsWave32 ? AMDGPU::S_CMOV_B32 : AMDGPU::S_CMOV_B64),
- Exec)
- .addImm(-1);
-
- if (!LIS) {
- MI.eraseFromParent();
- return;
- }
-
- LIS->RemoveMachineInstrFromMaps(MI);
- MI.eraseFromParent();
-
- LIS->InsertMachineInstrInMaps(*BfeMI);
- LIS->InsertMachineInstrInMaps(*BfmMI);
- LIS->InsertMachineInstrInMaps(*CmpMI);
- LIS->InsertMachineInstrInMaps(*CmovMI);
-
- RecomputeRegs.insert(InputReg);
- LIS->createAndComputeVirtRegInterval(CountReg);
-}
-
bool SILowerControlFlow::removeMBBifRedundant(MachineBasicBlock &MBB) {
for (auto &I : MBB.instrs()) {
if (!I.isDebugInstr() && !I.isUnconditionalBranch())
@@ -927,18 +836,6 @@ bool SILowerControlFlow::runOnMachineFunction(MachineFunction &MF) {
SplitMBB = process(MI);
Changed = true;
break;
-
- // FIXME: find a better place for this
- case AMDGPU::SI_INIT_EXEC:
- case AMDGPU::SI_INIT_EXEC_FROM_INPUT:
- lowerInitExec(MBB, MI);
- if (LIS)
- LIS->removeAllRegUnitsForPhysReg(AMDGPU::EXEC);
- Changed = true;
- break;
-
- default:
- break;
}
if (SplitMBB != MBB) {
diff --git a/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp b/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
index 09dc1c781e2f..5b4c44302fa6 100644
--- a/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
+++ b/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
@@ -177,6 +177,7 @@ private:
SmallVector<MachineInstr *, 4> LowerToMovInstrs;
SmallVector<MachineInstr *, 4> LowerToCopyInstrs;
SmallVector<MachineInstr *, 4> KillInstrs;
+ SmallVector<MachineInstr *, 4> InitExecInstrs;
void printInfo();
@@ -223,6 +224,8 @@ private:
void lowerLiveMaskQueries();
void lowerCopyInstrs();
void lowerKillInstrs(bool IsWQM);
+ void lowerInitExec(MachineInstr &MI);
+ void lowerInitExecInstrs();
public:
static char ID;
@@ -580,6 +583,9 @@ char SIWholeQuadMode::scanInstructions(MachineFunction &MF,
Opcode == AMDGPU::SI_DEMOTE_I1) {
KillInstrs.push_back(&MI);
BBI.NeedsLowering = true;
+ } else if (Opcode == AMDGPU::SI_INIT_EXEC ||
+ Opcode == AMDGPU::SI_INIT_EXEC_FROM_INPUT) {
+ InitExecInstrs.push_back(&MI);
} else if (WQMOutputs) {
// The function is in machine SSA form, which means that physical
// VGPRs correspond to shader inputs and outputs. Inputs are
@@ -1556,6 +1562,97 @@ void SIWholeQuadMode::lowerKillInstrs(bool IsWQM) {
}
}
+void SIWholeQuadMode::lowerInitExec(MachineInstr &MI) {
+ MachineBasicBlock *MBB = MI.getParent();
+ bool IsWave32 = ST->isWave32();
+
+ if (MI.getOpcode() == AMDGPU::SI_INIT_EXEC) {
+ // This should be before all vector instructions.
+ MachineInstr *InitMI =
+ BuildMI(*MBB, MBB->begin(), MI.getDebugLoc(),
+ TII->get(IsWave32 ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64),
+ Exec)
+ .addImm(MI.getOperand(0).getImm());
+ if (LIS) {
+ LIS->RemoveMachineInstrFromMaps(MI);
+ LIS->InsertMachineInstrInMaps(*InitMI);
+ }
+ MI.eraseFromParent();
+ return;
+ }
+
+ // Extract the thread count from an SGPR input and set EXEC accordingly.
+ // Since BFM can't shift by 64, handle that case with CMP + CMOV.
+ //
+ // S_BFE_U32 count, input, {shift, 7}
+ // S_BFM_B64 exec, count, 0
+ // S_CMP_EQ_U32 count, 64
+ // S_CMOV_B64 exec, -1
+ Register InputReg = MI.getOperand(0).getReg();
+ MachineInstr *FirstMI = &*MBB->begin();
+ if (InputReg.isVirtual()) {
+ MachineInstr *DefInstr = MRI->getVRegDef(InputReg);
+ assert(DefInstr && DefInstr->isCopy());
+ if (DefInstr->getParent() == MBB) {
+ if (DefInstr != FirstMI) {
+ // If the `InputReg` is defined in current block, we also need to
+ // move that instruction to the beginning of the block.
+ DefInstr->removeFromParent();
+ MBB->insert(FirstMI, DefInstr);
+ if (LIS)
+ LIS->handleMove(*DefInstr);
+ } else {
+ // If first instruction is definition then move pointer after it.
+ FirstMI = &*std::next(FirstMI->getIterator());
+ }
+ }
+ }
+
+ // Insert instruction sequence at block beginning (before vector operations).
+ const DebugLoc DL = MI.getDebugLoc();
+ const unsigned WavefrontSize = ST->getWavefrontSize();
+ const unsigned Mask = (WavefrontSize << 1) - 1;
+ Register CountReg = MRI->createVirtualRegister(&AMDGPU::SGPR_32RegClass);
+ auto BfeMI = BuildMI(*MBB, FirstMI, DL, TII->get(AMDGPU::S_BFE_U32), CountReg)
+ .addReg(InputReg)
+ .addImm((MI.getOperand(1).getImm() & Mask) | 0x70000);
+ auto BfmMI =
+ BuildMI(*MBB, FirstMI, DL,
+ TII->get(IsWave32 ? AMDGPU::S_BFM_B32 : AMDGPU::S_BFM_B64), Exec)
+ .addReg(CountReg)
+ .addImm(0);
+ auto CmpMI = BuildMI(*MBB, FirstMI, DL, TII->get(AMDGPU::S_CMP_EQ_U32))
+ .addReg(CountReg, RegState::Kill)
+ .addImm(WavefrontSize);
+ auto CmovMI =
+ BuildMI(*MBB, FirstMI, DL,
+ TII->get(IsWave32 ? AMDGPU::S_CMOV_B32 : AMDGPU::S_CMOV_B64),
+ Exec)
+ .addImm(-1);
+
+ if (!LIS) {
+ MI.eraseFromParent();
+ return;
+ }
+
+ LIS->RemoveMachineInstrFromMaps(MI);
+ MI.eraseFromParent();
+
+ LIS->InsertMachineInstrInMaps(*BfeMI);
+ LIS->InsertMachineInstrInMaps(*BfmMI);
+ LIS->InsertMachineInstrInMaps(*CmpMI);
+ LIS->InsertMachineInstrInMaps(*CmovMI);
+
+ LIS->removeInterval(InputReg);
+ LIS->createAndComputeVirtRegInterval(InputReg);
+ LIS->createAndComputeVirtRegInterval(CountReg);
+}
+
+void SIWholeQuadMode::lowerInitExecInstrs() {
+ for (MachineInstr *MI : InitExecInstrs)
+ lowerInitExec(*MI);
+}
+
bool SIWholeQuadMode::runOnMachineFunction(MachineFunction &MF) {
LLVM_DEBUG(dbgs() << "SI Whole Quad Mode on " << MF.getName()
<< " ------------- \n");
@@ -1567,6 +1664,7 @@ bool SIWholeQuadMode::runOnMachineFunction(MachineFunction &MF) {
LowerToCopyInstrs.clear();
LowerToMovInstrs.clear();
KillInstrs.clear();
+ InitExecInstrs.clear();
StateTransition.clear();
ST = &MF.getSubtarget<GCNSubtarget>();
@@ -1606,10 +1704,13 @@ bool SIWholeQuadMode::runOnMachineFunction(MachineFunction &MF) {
// Shader is simple does not need any state changes or any complex lowering
if (!(GlobalFlags & (StateWQM | StateStrict)) && LowerToCopyInstrs.empty() &&
LowerToMovInstrs.empty() && KillInstrs.empty()) {
+ lowerInitExecInstrs();
lowerLiveMaskQueries();
- return !LiveMaskQueries.empty();
+ return !InitExecInstrs.empty() || !LiveMaskQueries.empty();
}
+ lowerInitExecInstrs();
+
MachineBasicBlock &Entry = MF.front();
MachineBasicBlock::iterator EntryMI = Entry.getFirstNonPHI();
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.cpp
index 239e0ee70572..04c6e940e6ed 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.cpp
@@ -235,8 +235,9 @@ LDSUsesInfoTy getTransitiveUsesOfLDS(const CallGraph &CG, Module &M) {
}
void removeFnAttrFromReachable(CallGraph &CG, Function *KernelRoot,
- StringRef FnAttr) {
- KernelRoot->removeFnAttr(FnAttr);
+ ArrayRef<StringRef> FnAttrs) {
+ for (StringRef Attr : FnAttrs)
+ KernelRoot->removeFnAttr(Attr);
SmallVector<Function *> WorkList = {CG[KernelRoot]->getFunction()};
SmallPtrSet<Function *, 8> Visited;
@@ -261,12 +262,15 @@ void removeFnAttrFromReachable(CallGraph &CG, Function *KernelRoot,
Function *PotentialCallee =
ExternalCallRecord.second->getFunction();
assert(PotentialCallee);
- if (!isKernelLDS(PotentialCallee))
- PotentialCallee->removeFnAttr(FnAttr);
+ if (!isKernelLDS(PotentialCallee)) {
+ for (StringRef Attr : FnAttrs)
+ PotentialCallee->removeFnAttr(Attr);
+ }
}
}
} else {
- Callee->removeFnAttr(FnAttr);
+ for (StringRef Attr : FnAttrs)
+ Callee->removeFnAttr(Attr);
if (Visited.insert(Callee).second)
WorkList.push_back(Callee);
}
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.h
index 4d3ad328e131..e1cd4d03052b 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.h
@@ -9,6 +9,7 @@
#ifndef LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUMEMORYUTILS_H
#define LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUMEMORYUTILS_H
+#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
@@ -54,7 +55,7 @@ LDSUsesInfoTy getTransitiveUsesOfLDS(const CallGraph &CG, Module &M);
/// Strip FnAttr attribute from any functions where we may have
/// introduced its use.
void removeFnAttrFromReachable(CallGraph &CG, Function *KernelRoot,
- StringRef FnAttr);
+ ArrayRef<StringRef> FnAttrs);
/// Given a \p Def clobbering a load from \p Ptr according to the MSSA check
/// if this is actually a memory update or an artificial clobber to facilitate
diff --git a/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp b/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp
index a46c383115e2..919828753f45 100644
--- a/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp
+++ b/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp
@@ -115,6 +115,12 @@ static bool shouldInspect(MachineInstr &MI) {
return isDomainMVE(&MI) || isVectorPredicate(&MI) || hasVPRUse(MI);
}
+static bool isHorizontalReduction(const MachineInstr &MI) {
+ const MCInstrDesc &MCID = MI.getDesc();
+ uint64_t Flags = MCID.TSFlags;
+ return (Flags & ARMII::HorizontalReduction) != 0;
+}
+
namespace {
using InstSet = SmallPtrSetImpl<MachineInstr *>;
@@ -275,6 +281,16 @@ namespace {
if (VPT->getOpcode() == ARM::MVE_VPST)
return false;
+ // If the VPT block does not define something that is an "output", then
+ // the tail-predicated version will just perform a subset of the original
+ // vpt block, where the last lanes should not be used.
+ if (isVPTOpcode(VPT->getOpcode()) &&
+ all_of(Block.getInsts(), [](const MachineInstr *MI) {
+ return !MI->mayStore() && !MI->mayLoad() &&
+ !isHorizontalReduction(*MI) && !isVCTP(MI);
+ }))
+ return true;
+
auto IsOperandPredicated = [&](MachineInstr *MI, unsigned Idx) {
MachineInstr *Op = RDA.getMIOperand(MI, MI->getOperand(Idx));
return Op && PredicatedInsts.count(Op) && isPredicatedOnVCTP(Op);
@@ -813,12 +829,6 @@ static bool producesDoubleWidthResult(const MachineInstr &MI) {
return (Flags & ARMII::DoubleWidthResult) != 0;
}
-static bool isHorizontalReduction(const MachineInstr &MI) {
- const MCInstrDesc &MCID = MI.getDesc();
- uint64_t Flags = MCID.TSFlags;
- return (Flags & ARMII::HorizontalReduction) != 0;
-}
-
// Can this instruction generate a non-zero result when given only zeroed
// operands? This allows us to know that, given operands with false bytes
// zeroed by masked loads, that the result will also contain zeros in those
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index 8b2e8480e29f..caa3760c301f 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -171,6 +171,8 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
// Set operations for 'F' feature.
if (Subtarget.hasBasicF()) {
+ setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
+ setTruncStoreAction(MVT::f32, MVT::f16, Expand);
setCondCodeAction(FPCCToExpand, MVT::f32, Expand);
setOperationAction(ISD::SELECT_CC, MVT::f32, Expand);
@@ -186,6 +188,8 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
setOperationAction(ISD::FPOW, MVT::f32, Expand);
setOperationAction(ISD::FREM, MVT::f32, Expand);
+ setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand);
+ setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand);
if (Subtarget.is64Bit())
setOperationAction(ISD::FRINT, MVT::f32, Legal);
@@ -202,7 +206,9 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
// Set operations for 'D' feature.
if (Subtarget.hasBasicD()) {
+ setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
+ setTruncStoreAction(MVT::f64, MVT::f16, Expand);
setTruncStoreAction(MVT::f64, MVT::f32, Expand);
setCondCodeAction(FPCCToExpand, MVT::f64, Expand);
@@ -219,6 +225,8 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
setOperationAction(ISD::FPOW, MVT::f64, Expand);
setOperationAction(ISD::FREM, MVT::f64, Expand);
+ setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
+ setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand);
if (Subtarget.is64Bit())
setOperationAction(ISD::FRINT, MVT::f64, Legal);
diff --git a/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp b/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp
index 83466d53f84d..c29c1b593321 100644
--- a/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp
@@ -46,7 +46,7 @@ static cl::opt<bool>
static std::string computeDataLayout(const Triple &TT) {
if (TT.isArch64Bit())
- return "e-m:e-p:64:64-i64:64-i128:128-n64-S128";
+ return "e-m:e-p:64:64-i64:64-i128:128-n32:64-S128";
assert(TT.isArch32Bit() && "only LA32 and LA64 are currently supported");
return "e-m:e-p:32:32-i64:64-n32-S128";
}
diff --git a/llvm/lib/Target/NVPTX/NVPTX.h b/llvm/lib/Target/NVPTX/NVPTX.h
index 5eefab59a6ab..b0cb24c63c3c 100644
--- a/llvm/lib/Target/NVPTX/NVPTX.h
+++ b/llvm/lib/Target/NVPTX/NVPTX.h
@@ -40,7 +40,7 @@ FunctionPass *createNVPTXISelDag(NVPTXTargetMachine &TM,
ModulePass *createNVPTXAssignValidGlobalNamesPass();
ModulePass *createGenericToNVVMLegacyPass();
ModulePass *createNVPTXCtorDtorLoweringLegacyPass();
-FunctionPass *createNVVMIntrRangePass(unsigned int SmVersion);
+FunctionPass *createNVVMIntrRangePass();
FunctionPass *createNVVMReflectPass(unsigned int SmVersion);
MachineFunctionPass *createNVPTXPrologEpilogPass();
MachineFunctionPass *createNVPTXReplaceImageHandlesPass();
@@ -53,12 +53,7 @@ MachineFunctionPass *createNVPTXPeephole();
MachineFunctionPass *createNVPTXProxyRegErasurePass();
struct NVVMIntrRangePass : PassInfoMixin<NVVMIntrRangePass> {
- NVVMIntrRangePass();
- NVVMIntrRangePass(unsigned SmVersion) : SmVersion(SmVersion) {}
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
-
-private:
- unsigned SmVersion;
};
struct NVVMReflectPass : PassInfoMixin<NVVMReflectPass> {
diff --git a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
index f63697916d90..82770f866085 100644
--- a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
@@ -542,30 +542,24 @@ void NVPTXAsmPrinter::emitKernelFunctionDirectives(const Function &F,
// If the NVVM IR has some of reqntid* specified, then output
// the reqntid directive, and set the unspecified ones to 1.
// If none of Reqntid* is specified, don't output reqntid directive.
- unsigned Reqntidx, Reqntidy, Reqntidz;
- Reqntidx = Reqntidy = Reqntidz = 1;
- bool ReqSpecified = false;
- ReqSpecified |= getReqNTIDx(F, Reqntidx);
- ReqSpecified |= getReqNTIDy(F, Reqntidy);
- ReqSpecified |= getReqNTIDz(F, Reqntidz);
+ std::optional<unsigned> Reqntidx = getReqNTIDx(F);
+ std::optional<unsigned> Reqntidy = getReqNTIDy(F);
+ std::optional<unsigned> Reqntidz = getReqNTIDz(F);
- if (ReqSpecified)
- O << ".reqntid " << Reqntidx << ", " << Reqntidy << ", " << Reqntidz
- << "\n";
+ if (Reqntidx || Reqntidy || Reqntidz)
+ O << ".reqntid " << Reqntidx.value_or(1) << ", " << Reqntidy.value_or(1)
+ << ", " << Reqntidz.value_or(1) << "\n";
// If the NVVM IR has some of maxntid* specified, then output
// the maxntid directive, and set the unspecified ones to 1.
// If none of maxntid* is specified, don't output maxntid directive.
- unsigned Maxntidx, Maxntidy, Maxntidz;
- Maxntidx = Maxntidy = Maxntidz = 1;
- bool MaxSpecified = false;
- MaxSpecified |= getMaxNTIDx(F, Maxntidx);
- MaxSpecified |= getMaxNTIDy(F, Maxntidy);
- MaxSpecified |= getMaxNTIDz(F, Maxntidz);
-
- if (MaxSpecified)
- O << ".maxntid " << Maxntidx << ", " << Maxntidy << ", " << Maxntidz
- << "\n";
+ std::optional<unsigned> Maxntidx = getMaxNTIDx(F);
+ std::optional<unsigned> Maxntidy = getMaxNTIDy(F);
+ std::optional<unsigned> Maxntidz = getMaxNTIDz(F);
+
+ if (Maxntidx || Maxntidy || Maxntidz)
+ O << ".maxntid " << Maxntidx.value_or(1) << ", " << Maxntidy.value_or(1)
+ << ", " << Maxntidz.value_or(1) << "\n";
unsigned Mincta = 0;
if (getMinCTASm(F, Mincta))
diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
index 4dc3cea4bd8e..b60a1d747af7 100644
--- a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
@@ -233,9 +233,9 @@ void NVPTXTargetMachine::registerPassBuilderCallbacks(
[this](ModulePassManager &PM, OptimizationLevel Level) {
FunctionPassManager FPM;
FPM.addPass(NVVMReflectPass(Subtarget.getSmVersion()));
- // FIXME: NVVMIntrRangePass is causing numerical discrepancies,
- // investigate and re-enable.
- // FPM.addPass(NVVMIntrRangePass(Subtarget.getSmVersion()));
+ // Note: NVVMIntrRangePass was causing numerical discrepancies at one
+ // point, if issues crop up, consider disabling.
+ FPM.addPass(NVVMIntrRangePass());
PM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
});
}
diff --git a/llvm/lib/Target/NVPTX/NVPTXUtilities.cpp b/llvm/lib/Target/NVPTX/NVPTXUtilities.cpp
index 013afe916e86..3a536db1c972 100644
--- a/llvm/lib/Target/NVPTX/NVPTXUtilities.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXUtilities.cpp
@@ -128,6 +128,14 @@ bool findOneNVVMAnnotation(const GlobalValue *gv, const std::string &prop,
return true;
}
+static std::optional<unsigned>
+findOneNVVMAnnotation(const GlobalValue &GV, const std::string &PropName) {
+ unsigned RetVal;
+ if (findOneNVVMAnnotation(&GV, PropName, RetVal))
+ return RetVal;
+ return std::nullopt;
+}
+
bool findAllNVVMAnnotation(const GlobalValue *gv, const std::string &prop,
std::vector<unsigned> &retval) {
auto &AC = getAnnotationCache();
@@ -252,32 +260,57 @@ std::string getSamplerName(const Value &val) {
return std::string(val.getName());
}
-bool getMaxNTIDx(const Function &F, unsigned &x) {
- return findOneNVVMAnnotation(&F, "maxntidx", x);
+std::optional<unsigned> getMaxNTIDx(const Function &F) {
+ return findOneNVVMAnnotation(F, "maxntidx");
}
-bool getMaxNTIDy(const Function &F, unsigned &y) {
- return findOneNVVMAnnotation(&F, "maxntidy", y);
+std::optional<unsigned> getMaxNTIDy(const Function &F) {
+ return findOneNVVMAnnotation(F, "maxntidy");
}
-bool getMaxNTIDz(const Function &F, unsigned &z) {
- return findOneNVVMAnnotation(&F, "maxntidz", z);
+std::optional<unsigned> getMaxNTIDz(const Function &F) {
+ return findOneNVVMAnnotation(F, "maxntidz");
+}
+
+std::optional<unsigned> getMaxNTID(const Function &F) {
+ // Note: The semantics here are a bit strange. The PTX ISA states the
+ // following (11.4.2. Performance-Tuning Directives: .maxntid):
+ //
+ // Note that this directive guarantees that the total number of threads does
+ // not exceed the maximum, but does not guarantee that the limit in any
+ // particular dimension is not exceeded.
+ std::optional<unsigned> MaxNTIDx = getMaxNTIDx(F);
+ std::optional<unsigned> MaxNTIDy = getMaxNTIDy(F);
+ std::optional<unsigned> MaxNTIDz = getMaxNTIDz(F);
+ if (MaxNTIDx || MaxNTIDy || MaxNTIDz)
+ return MaxNTIDx.value_or(1) * MaxNTIDy.value_or(1) * MaxNTIDz.value_or(1);
+ return std::nullopt;
}
bool getMaxClusterRank(const Function &F, unsigned &x) {
return findOneNVVMAnnotation(&F, "maxclusterrank", x);
}
-bool getReqNTIDx(const Function &F, unsigned &x) {
- return findOneNVVMAnnotation(&F, "reqntidx", x);
+std::optional<unsigned> getReqNTIDx(const Function &F) {
+ return findOneNVVMAnnotation(F, "reqntidx");
+}
+
+std::optional<unsigned> getReqNTIDy(const Function &F) {
+ return findOneNVVMAnnotation(F, "reqntidy");
}
-bool getReqNTIDy(const Function &F, unsigned &y) {
- return findOneNVVMAnnotation(&F, "reqntidy", y);
+std::optional<unsigned> getReqNTIDz(const Function &F) {
+ return findOneNVVMAnnotation(F, "reqntidz");
}
-bool getReqNTIDz(const Function &F, unsigned &z) {
- return findOneNVVMAnnotation(&F, "reqntidz", z);
+std::optional<unsigned> getReqNTID(const Function &F) {
+ // Note: The semantics here are a bit strange. See getMaxNTID.
+ std::optional<unsigned> ReqNTIDx = getReqNTIDx(F);
+ std::optional<unsigned> ReqNTIDy = getReqNTIDy(F);
+ std::optional<unsigned> ReqNTIDz = getReqNTIDz(F);
+ if (ReqNTIDx || ReqNTIDy || ReqNTIDz)
+ return ReqNTIDx.value_or(1) * ReqNTIDy.value_or(1) * ReqNTIDz.value_or(1);
+ return std::nullopt;
}
bool getMinCTASm(const Function &F, unsigned &x) {
diff --git a/llvm/lib/Target/NVPTX/NVPTXUtilities.h b/llvm/lib/Target/NVPTX/NVPTXUtilities.h
index 2872db9fa213..e020bc0f02e9 100644
--- a/llvm/lib/Target/NVPTX/NVPTXUtilities.h
+++ b/llvm/lib/Target/NVPTX/NVPTXUtilities.h
@@ -48,13 +48,15 @@ std::string getTextureName(const Value &);
std::string getSurfaceName(const Value &);
std::string getSamplerName(const Value &);
-bool getMaxNTIDx(const Function &, unsigned &);
-bool getMaxNTIDy(const Function &, unsigned &);
-bool getMaxNTIDz(const Function &, unsigned &);
-
-bool getReqNTIDx(const Function &, unsigned &);
-bool getReqNTIDy(const Function &, unsigned &);
-bool getReqNTIDz(const Function &, unsigned &);
+std::optional<unsigned> getMaxNTIDx(const Function &);
+std::optional<unsigned> getMaxNTIDy(const Function &);
+std::optional<unsigned> getMaxNTIDz(const Function &);
+std::optional<unsigned> getMaxNTID(const Function &F);
+
+std::optional<unsigned> getReqNTIDx(const Function &);
+std::optional<unsigned> getReqNTIDy(const Function &);
+std::optional<unsigned> getReqNTIDz(const Function &);
+std::optional<unsigned> getReqNTID(const Function &);
bool getMaxClusterRank(const Function &, unsigned &);
bool getMinCTASm(const Function &, unsigned &);
diff --git a/llvm/lib/Target/NVPTX/NVVMIntrRange.cpp b/llvm/lib/Target/NVPTX/NVVMIntrRange.cpp
index 5381646434eb..f9d21b38a7ec 100644
--- a/llvm/lib/Target/NVPTX/NVVMIntrRange.cpp
+++ b/llvm/lib/Target/NVPTX/NVVMIntrRange.cpp
@@ -1,4 +1,4 @@
-//===- NVVMIntrRange.cpp - Set !range metadata for NVVM intrinsics --------===//
+//===- NVVMIntrRange.cpp - Set range attributes for NVVM intrinsics -------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -6,19 +6,21 @@
//
//===----------------------------------------------------------------------===//
//
-// This pass adds appropriate !range metadata for calls to NVVM
+// This pass adds appropriate range attributes for calls to NVVM
// intrinsics that return a limited range of values.
//
//===----------------------------------------------------------------------===//
#include "NVPTX.h"
-#include "llvm/IR/Constants.h"
+#include "NVPTXUtilities.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/IntrinsicsNVPTX.h"
#include "llvm/IR/PassManager.h"
#include "llvm/Support/CommandLine.h"
+#include <cstdint>
using namespace llvm;
@@ -26,31 +28,20 @@ using namespace llvm;
namespace llvm { void initializeNVVMIntrRangePass(PassRegistry &); }
-// Add !range metadata based on limits of given SM variant.
-static cl::opt<unsigned> NVVMIntrRangeSM("nvvm-intr-range-sm", cl::init(20),
- cl::Hidden, cl::desc("SM variant"));
-
namespace {
class NVVMIntrRange : public FunctionPass {
- private:
- unsigned SmVersion;
-
- public:
- static char ID;
- NVVMIntrRange() : NVVMIntrRange(NVVMIntrRangeSM) {}
- NVVMIntrRange(unsigned int SmVersion)
- : FunctionPass(ID), SmVersion(SmVersion) {
+public:
+ static char ID;
+ NVVMIntrRange() : FunctionPass(ID) {
- initializeNVVMIntrRangePass(*PassRegistry::getPassRegistry());
- }
+ initializeNVVMIntrRangePass(*PassRegistry::getPassRegistry());
+ }
- bool runOnFunction(Function &) override;
+ bool runOnFunction(Function &) override;
};
-}
+} // namespace
-FunctionPass *llvm::createNVVMIntrRangePass(unsigned int SmVersion) {
- return new NVVMIntrRange(SmVersion);
-}
+FunctionPass *llvm::createNVVMIntrRangePass() { return new NVVMIntrRange(); }
char NVVMIntrRange::ID = 0;
INITIALIZE_PASS(NVVMIntrRange, "nvvm-intr-range",
@@ -58,112 +49,110 @@ INITIALIZE_PASS(NVVMIntrRange, "nvvm-intr-range",
// Adds the passed-in [Low,High) range information as metadata to the
// passed-in call instruction.
-static bool addRangeMetadata(uint64_t Low, uint64_t High, CallInst *C) {
- // This call already has range metadata, nothing to do.
- if (C->getMetadata(LLVMContext::MD_range))
+static bool addRangeAttr(uint64_t Low, uint64_t High, IntrinsicInst *II) {
+ if (II->getMetadata(LLVMContext::MD_range))
return false;
- LLVMContext &Context = C->getParent()->getContext();
- IntegerType *Int32Ty = Type::getInt32Ty(Context);
- Metadata *LowAndHigh[] = {
- ConstantAsMetadata::get(ConstantInt::get(Int32Ty, Low)),
- ConstantAsMetadata::get(ConstantInt::get(Int32Ty, High))};
- C->setMetadata(LLVMContext::MD_range, MDNode::get(Context, LowAndHigh));
+ const uint64_t BitWidth = II->getType()->getIntegerBitWidth();
+ ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High));
+
+ if (auto CurrentRange = II->getRange())
+ Range = Range.intersectWith(CurrentRange.value());
+
+ II->addRangeRetAttr(Range);
return true;
}
-static bool runNVVMIntrRange(Function &F, unsigned SmVersion) {
+static bool runNVVMIntrRange(Function &F) {
struct {
unsigned x, y, z;
} MaxBlockSize, MaxGridSize;
- MaxBlockSize.x = 1024;
- MaxBlockSize.y = 1024;
- MaxBlockSize.z = 64;
- MaxGridSize.x = SmVersion >= 30 ? 0x7fffffff : 0xffff;
+ const unsigned MetadataNTID = getReqNTID(F).value_or(
+ getMaxNTID(F).value_or(std::numeric_limits<unsigned>::max()));
+
+ MaxBlockSize.x = std::min(1024u, MetadataNTID);
+ MaxBlockSize.y = std::min(1024u, MetadataNTID);
+ MaxBlockSize.z = std::min(64u, MetadataNTID);
+
+ MaxGridSize.x = 0x7fffffff;
MaxGridSize.y = 0xffff;
MaxGridSize.z = 0xffff;
// Go through the calls in this function.
bool Changed = false;
for (Instruction &I : instructions(F)) {
- CallInst *Call = dyn_cast<CallInst>(&I);
- if (!Call)
+ IntrinsicInst *II = dyn_cast<IntrinsicInst>(&I);
+ if (!II)
continue;
- if (Function *Callee = Call->getCalledFunction()) {
- switch (Callee->getIntrinsicID()) {
- // Index within block
- case Intrinsic::nvvm_read_ptx_sreg_tid_x:
- Changed |= addRangeMetadata(0, MaxBlockSize.x, Call);
- break;
- case Intrinsic::nvvm_read_ptx_sreg_tid_y:
- Changed |= addRangeMetadata(0, MaxBlockSize.y, Call);
- break;
- case Intrinsic::nvvm_read_ptx_sreg_tid_z:
- Changed |= addRangeMetadata(0, MaxBlockSize.z, Call);
- break;
-
- // Block size
- case Intrinsic::nvvm_read_ptx_sreg_ntid_x:
- Changed |= addRangeMetadata(1, MaxBlockSize.x+1, Call);
- break;
- case Intrinsic::nvvm_read_ptx_sreg_ntid_y:
- Changed |= addRangeMetadata(1, MaxBlockSize.y+1, Call);
- break;
- case Intrinsic::nvvm_read_ptx_sreg_ntid_z:
- Changed |= addRangeMetadata(1, MaxBlockSize.z+1, Call);
- break;
-
- // Index within grid
- case Intrinsic::nvvm_read_ptx_sreg_ctaid_x:
- Changed |= addRangeMetadata(0, MaxGridSize.x, Call);
- break;
- case Intrinsic::nvvm_read_ptx_sreg_ctaid_y:
- Changed |= addRangeMetadata(0, MaxGridSize.y, Call);
- break;
- case Intrinsic::nvvm_read_ptx_sreg_ctaid_z:
- Changed |= addRangeMetadata(0, MaxGridSize.z, Call);
- break;
-
- // Grid size
- case Intrinsic::nvvm_read_ptx_sreg_nctaid_x:
- Changed |= addRangeMetadata(1, MaxGridSize.x+1, Call);
- break;
- case Intrinsic::nvvm_read_ptx_sreg_nctaid_y:
- Changed |= addRangeMetadata(1, MaxGridSize.y+1, Call);
- break;
- case Intrinsic::nvvm_read_ptx_sreg_nctaid_z:
- Changed |= addRangeMetadata(1, MaxGridSize.z+1, Call);
- break;
-
- // warp size is constant 32.
- case Intrinsic::nvvm_read_ptx_sreg_warpsize:
- Changed |= addRangeMetadata(32, 32+1, Call);
- break;
-
- // Lane ID is [0..warpsize)
- case Intrinsic::nvvm_read_ptx_sreg_laneid:
- Changed |= addRangeMetadata(0, 32, Call);
- break;
-
- default:
- break;
- }
+ switch (II->getIntrinsicID()) {
+ // Index within block
+ case Intrinsic::nvvm_read_ptx_sreg_tid_x:
+ Changed |= addRangeAttr(0, MaxBlockSize.x, II);
+ break;
+ case Intrinsic::nvvm_read_ptx_sreg_tid_y:
+ Changed |= addRangeAttr(0, MaxBlockSize.y, II);
+ break;
+ case Intrinsic::nvvm_read_ptx_sreg_tid_z:
+ Changed |= addRangeAttr(0, MaxBlockSize.z, II);
+ break;
+
+ // Block size
+ case Intrinsic::nvvm_read_ptx_sreg_ntid_x:
+ Changed |= addRangeAttr(1, MaxBlockSize.x + 1, II);
+ break;
+ case Intrinsic::nvvm_read_ptx_sreg_ntid_y:
+ Changed |= addRangeAttr(1, MaxBlockSize.y + 1, II);
+ break;
+ case Intrinsic::nvvm_read_ptx_sreg_ntid_z:
+ Changed |= addRangeAttr(1, MaxBlockSize.z + 1, II);
+ break;
+
+ // Index within grid
+ case Intrinsic::nvvm_read_ptx_sreg_ctaid_x:
+ Changed |= addRangeAttr(0, MaxGridSize.x, II);
+ break;
+ case Intrinsic::nvvm_read_ptx_sreg_ctaid_y:
+ Changed |= addRangeAttr(0, MaxGridSize.y, II);
+ break;
+ case Intrinsic::nvvm_read_ptx_sreg_ctaid_z:
+ Changed |= addRangeAttr(0, MaxGridSize.z, II);
+ break;
+
+ // Grid size
+ case Intrinsic::nvvm_read_ptx_sreg_nctaid_x:
+ Changed |= addRangeAttr(1, MaxGridSize.x + 1, II);
+ break;
+ case Intrinsic::nvvm_read_ptx_sreg_nctaid_y:
+ Changed |= addRangeAttr(1, MaxGridSize.y + 1, II);
+ break;
+ case Intrinsic::nvvm_read_ptx_sreg_nctaid_z:
+ Changed |= addRangeAttr(1, MaxGridSize.z + 1, II);
+ break;
+
+ // warp size is constant 32.
+ case Intrinsic::nvvm_read_ptx_sreg_warpsize:
+ Changed |= addRangeAttr(32, 32 + 1, II);
+ break;
+
+ // Lane ID is [0..warpsize)
+ case Intrinsic::nvvm_read_ptx_sreg_laneid:
+ Changed |= addRangeAttr(0, 32, II);
+ break;
+
+ default:
+ break;
}
}
return Changed;
}
-bool NVVMIntrRange::runOnFunction(Function &F) {
- return runNVVMIntrRange(F, SmVersion);
-}
-
-NVVMIntrRangePass::NVVMIntrRangePass() : NVVMIntrRangePass(NVVMIntrRangeSM) {}
+bool NVVMIntrRange::runOnFunction(Function &F) { return runNVVMIntrRange(F); }
PreservedAnalyses NVVMIntrRangePass::run(Function &F,
FunctionAnalysisManager &AM) {
- return runNVVMIntrRange(F, SmVersion) ? PreservedAnalyses::none()
- : PreservedAnalyses::all();
+ return runNVVMIntrRange(F) ? PreservedAnalyses::none()
+ : PreservedAnalyses::all();
}
diff --git a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
index f4e84ade3b5a..bc0ae7a32c05 100644
--- a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -1079,13 +1079,13 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
assert(IsAIX && TM.getCodeModel() == CodeModel::Small &&
"PseudoOp only valid for small code model AIX");
- // Transform %rN = ADDItoc/8 @op1, %r2.
+ // Transform %rN = ADDItoc/8 %r2, @op1.
LowerPPCMachineInstrToMCInst(MI, TmpInst, *this);
// Change the opcode to load address.
TmpInst.setOpcode((!IsPPC64) ? (PPC::LA) : (PPC::LA8));
- const MachineOperand &MO = MI->getOperand(1);
+ const MachineOperand &MO = MI->getOperand(2);
assert(MO.isGlobal() && "Invalid operand for ADDItoc[8].");
// Map the operand to its corresponding MCSymbol.
@@ -1094,7 +1094,6 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
const MCExpr *Exp =
MCSymbolRefExpr::create(MOSymbol, MCSymbolRefExpr::VK_None, OutContext);
- TmpInst.getOperand(1) = TmpInst.getOperand(2);
TmpInst.getOperand(2) = MCOperand::createExpr(Exp);
EmitToStreamer(*OutStreamer, TmpInst);
return;
diff --git a/llvm/lib/Target/PowerPC/PPCFastISel.cpp b/llvm/lib/Target/PowerPC/PPCFastISel.cpp
index 735050641adf..a07954bd0d8b 100644
--- a/llvm/lib/Target/PowerPC/PPCFastISel.cpp
+++ b/llvm/lib/Target/PowerPC/PPCFastISel.cpp
@@ -2080,13 +2080,15 @@ unsigned PPCFastISel::PPCMaterializeGV(const GlobalValue *GV, MVT VT) {
cast<GlobalVariable>(GV)->hasAttribute("toc-data");
// For small code model, generate a simple TOC load.
- if (CModel == CodeModel::Small)
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
- IsAIXTocData ? TII.get(PPC::ADDItoc8) : TII.get(PPC::LDtoc),
- DestReg)
- .addGlobalAddress(GV)
- .addReg(PPC::X2);
- else {
+ if (CModel == CodeModel::Small) {
+ auto MIB = BuildMI(
+ *FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
+ IsAIXTocData ? TII.get(PPC::ADDItoc8) : TII.get(PPC::LDtoc), DestReg);
+ if (IsAIXTocData)
+ MIB.addReg(PPC::X2).addGlobalAddress(GV);
+ else
+ MIB.addGlobalAddress(GV).addReg(PPC::X2);
+ } else {
// If the address is an externally defined symbol, a symbol with common
// or externally available linkage, a non-local function address, or a
// jump table address (not yet needed), or if we are generating code
diff --git a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index 275b3337a276..1a69d1e89313 100644
--- a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -6102,8 +6102,15 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
EVT OperandTy) {
SDValue GA = TocEntry->getOperand(0);
SDValue TocBase = TocEntry->getOperand(1);
- SDNode *MN = CurDAG->getMachineNode(OpCode, dl, OperandTy, GA, TocBase);
- transferMemOperands(TocEntry, MN);
+ SDNode *MN = nullptr;
+ if (OpCode == PPC::ADDItoc || OpCode == PPC::ADDItoc8)
+ // toc-data access doesn't involve in loading from got, no need to
+ // keep memory operands.
+ MN = CurDAG->getMachineNode(OpCode, dl, OperandTy, TocBase, GA);
+ else {
+ MN = CurDAG->getMachineNode(OpCode, dl, OperandTy, GA, TocBase);
+ transferMemOperands(TocEntry, MN);
+ }
ReplaceNode(TocEntry, MN);
};
diff --git a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
index 9af8ada78376..eda5eb975e70 100644
--- a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -1485,11 +1485,9 @@ def ADDItocL8: PPCEmitTimePseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, tocentry:
}
// Local Data Transform
-def ADDItoc8 : PPCEmitTimePseudo<(outs g8rc:$rD), (ins tocentry:$disp, g8rc_nox0:$reg),
+def ADDItoc8 : PPCEmitTimePseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, tocentry:$disp),
"#ADDItoc8",
- [(set i64:$rD,
- (PPCtoc_entry tglobaladdr:$disp, i64:$reg))]>, isPPC64;
-
+ []>, isPPC64;
let mayLoad = 1 in
def LDtocL: PPCEmitTimePseudo<(outs g8rc:$rD), (ins tocentry:$disp, g8rc_nox0:$reg),
"#LDtocL", []>, isPPC64;
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
index df6b2bf1a7b7..09f829943528 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
@@ -3345,10 +3345,8 @@ def LWZtocL : PPCEmitTimePseudo<(outs gprc:$rD), (ins tocentry32:$disp, gprc_nor
def ADDIStocHA : PPCEmitTimePseudo<(outs gprc:$rD), (ins gprc_nor0:$reg, tocentry32:$disp),
"#ADDIStocHA", []>;
// TOC Data Transform on AIX
-def ADDItoc : PPCEmitTimePseudo<(outs gprc:$rD), (ins tocentry32:$disp, gprc:$reg),
- "#ADDItoc",
- [(set i32:$rD,
- (PPCtoc_entry tglobaladdr:$disp, i32:$reg))]>;
+def ADDItoc : PPCEmitTimePseudo<(outs gprc:$rD), (ins gprc:$reg, tocentry32:$disp),
+ "#ADDItoc", []>;
def ADDItocL : PPCEmitTimePseudo<(outs gprc:$rD), (ins gprc_nor0:$reg, tocentry32:$disp),
"#ADDItocL", []>;
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 4051279fdbf8..8ace5d79af07 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -1102,15 +1102,6 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
ISD::EXTRACT_SUBVECTOR},
VT, Custom);
setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom);
- if (Subtarget.hasStdExtZfbfmin()) {
- if (Subtarget.hasVInstructionsF16())
- setOperationAction(ISD::SPLAT_VECTOR, VT, Legal);
- else if (Subtarget.hasVInstructionsF16Minimal())
- setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
- }
- setOperationAction({ISD::VP_MERGE, ISD::VP_SELECT, ISD::SELECT}, VT,
- Custom);
- setOperationAction(ISD::SELECT_CC, VT, Expand);
// TODO: Promote to fp32.
}
}
@@ -1340,15 +1331,6 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
ISD::EXTRACT_SUBVECTOR},
VT, Custom);
setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom);
- if (Subtarget.hasStdExtZfbfmin()) {
- if (Subtarget.hasVInstructionsF16())
- setOperationAction(ISD::SPLAT_VECTOR, VT, Legal);
- else if (Subtarget.hasVInstructionsF16Minimal())
- setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
- }
- setOperationAction(
- {ISD::VP_MERGE, ISD::VP_SELECT, ISD::VSELECT, ISD::SELECT}, VT,
- Custom);
// TODO: Promote to fp32.
continue;
}
@@ -6722,16 +6704,10 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
case ISD::BUILD_VECTOR:
return lowerBUILD_VECTOR(Op, DAG, Subtarget);
case ISD::SPLAT_VECTOR:
- if ((Op.getValueType().getScalarType() == MVT::f16 &&
- (Subtarget.hasVInstructionsF16Minimal() &&
- Subtarget.hasStdExtZfhminOrZhinxmin() &&
- !Subtarget.hasVInstructionsF16())) ||
- (Op.getValueType().getScalarType() == MVT::bf16 &&
- (Subtarget.hasVInstructionsBF16() && Subtarget.hasStdExtZfbfmin() &&
- Subtarget.hasVInstructionsF16Minimal() &&
- !Subtarget.hasVInstructionsF16()))) {
- if (Op.getValueType() == MVT::nxv32f16 ||
- Op.getValueType() == MVT::nxv32bf16)
+ if (Op.getValueType().getScalarType() == MVT::f16 &&
+ (Subtarget.hasVInstructionsF16Minimal() &&
+ !Subtarget.hasVInstructionsF16())) {
+ if (Op.getValueType() == MVT::nxv32f16)
return SplitVectorOp(Op, DAG);
SDLoc DL(Op);
SDValue NewScalar =
diff --git a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
index a96768240a93..82358cdd45ed 100644
--- a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
@@ -932,11 +932,11 @@ RISCVInsertVSETVLI::getInfoForVSETVLI(const MachineInstr &MI) const {
"Can't handle X0, X0 vsetvli yet");
if (AVLReg == RISCV::X0)
NewInfo.setAVLVLMAX();
- else if (VNInfo *VNI = getVNInfoFromReg(AVLReg, MI, LIS))
- NewInfo.setAVLRegDef(VNI, AVLReg);
- else {
- assert(MI.getOperand(1).isUndef());
+ else if (MI.getOperand(1).isUndef())
NewInfo.setAVLIgnored();
+ else {
+ VNInfo *VNI = getVNInfoFromReg(AVLReg, MI, LIS);
+ NewInfo.setAVLRegDef(VNI, AVLReg);
}
}
NewInfo.setVTYPE(MI.getOperand(2).getImm());
@@ -1008,11 +1008,11 @@ RISCVInsertVSETVLI::computeInfoForInstr(const MachineInstr &MI) const {
}
else
InstrInfo.setAVLImm(Imm);
- } else if (VNInfo *VNI = getVNInfoFromReg(VLOp.getReg(), MI, LIS)) {
- InstrInfo.setAVLRegDef(VNI, VLOp.getReg());
- } else {
- assert(VLOp.isUndef());
+ } else if (VLOp.isUndef()) {
InstrInfo.setAVLIgnored();
+ } else {
+ VNInfo *VNI = getVNInfoFromReg(VLOp.getReg(), MI, LIS);
+ InstrInfo.setAVLRegDef(VNI, VLOp.getReg());
}
} else {
assert(isScalarExtractInstr(MI));
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
index fe4d839e4fdc..b0949f5fc1d7 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
@@ -382,20 +382,7 @@ class GetIntVTypeInfo<VTypeInfo vti> {
// Equivalent integer vector type. Eg.
// VI8M1 → VI8M1 (identity)
// VF64M4 → VI64M4
- VTypeInfo Vti = !cast<VTypeInfo>(!subst("VBF", "VI",
- !subst("VF", "VI",
- !cast<string>(vti))));
-}
-
-// This functor is used to obtain the fp vector type that has the same SEW and
-// multiplier as the input parameter type.
-class GetFpVTypeInfo<VTypeInfo vti> {
- // Equivalent integer vector type. Eg.
- // VF16M1 → VF16M1 (identity)
- // VBF16M1 → VF16M1
- VTypeInfo Vti = !cast<VTypeInfo>(!subst("VBF", "VF",
- !subst("VI", "VF",
- !cast<string>(vti))));
+ VTypeInfo Vti = !cast<VTypeInfo>(!subst("VF", "VI", !cast<string>(vti)));
}
class MTypeInfo<ValueType Mas, LMULInfo M, string Bx> {
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td
index 497c4aadf753..3163e4bafd4b 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td
@@ -1394,7 +1394,7 @@ defm : VPatFPSetCCSDNode_VV_VF_FV<SETOLE, "PseudoVMFLE", "PseudoVMFGE">;
// Floating-point vselects:
// 11.15. Vector Integer Merge Instructions
// 13.15. Vector Floating-Point Merge Instruction
-foreach fvti = !listconcat(AllFloatVectors, AllBFloatVectors) in {
+foreach fvti = AllFloatVectors in {
defvar ivti = GetIntVTypeInfo<fvti>.Vti;
let Predicates = GetVTypePredicates<ivti>.Predicates in {
def : Pat<(fvti.Vector (vselect (fvti.Mask V0), fvti.RegClass:$rs1,
@@ -1412,9 +1412,7 @@ foreach fvti = !listconcat(AllFloatVectors, AllBFloatVectors) in {
fvti.RegClass:$rs2, 0, (fvti.Mask V0), fvti.AVL, fvti.Log2SEW)>;
}
-
- let Predicates = !listconcat(GetVTypePredicates<GetFpVTypeInfo<fvti>.Vti>.Predicates,
- GetVTypeScalarPredicates<fvti>.Predicates) in
+ let Predicates = GetVTypePredicates<fvti>.Predicates in
def : Pat<(fvti.Vector (vselect (fvti.Mask V0),
(SplatFPOp fvti.ScalarRegClass:$rs1),
fvti.RegClass:$rs2)),
@@ -1477,7 +1475,7 @@ foreach fvtiToFWti = AllWidenableBFloatToFloatVectors in {
//===----------------------------------------------------------------------===//
foreach fvti = !listconcat(AllFloatVectors, AllBFloatVectors) in {
- let Predicates = !listconcat(GetVTypePredicates<GetFpVTypeInfo<fvti>.Vti>.Predicates,
+ let Predicates = !listconcat(GetVTypePredicates<fvti>.Predicates,
GetVTypeScalarPredicates<fvti>.Predicates) in
def : Pat<(fvti.Vector (riscv_vfmv_v_f_vl undef, fvti.ScalarRegClass:$rs1, srcvalue)),
(!cast<Instruction>("PseudoVFMV_V_"#fvti.ScalarSuffix#"_"#fvti.LMul.MX)
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
index 70d8265e7be4..ce8133a5a297 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
@@ -2604,7 +2604,7 @@ foreach vti = AllFloatVectors in {
}
}
-foreach fvti = !listconcat(AllFloatVectors, AllBFloatVectors) in {
+foreach fvti = AllFloatVectors in {
// Floating-point vselects:
// 11.15. Vector Integer Merge Instructions
// 13.15. Vector Floating-Point Merge Instruction
@@ -2639,8 +2639,7 @@ foreach fvti = !listconcat(AllFloatVectors, AllBFloatVectors) in {
GPR:$vl, fvti.Log2SEW)>;
}
- let Predicates = !listconcat(GetVTypePredicates<GetFpVTypeInfo<fvti>.Vti>.Predicates,
- GetVTypeScalarPredicates<fvti>.Predicates) in {
+ let Predicates = GetVTypePredicates<fvti>.Predicates in {
def : Pat<(fvti.Vector (riscv_vmerge_vl (fvti.Mask V0),
(SplatFPOp fvti.ScalarRegClass:$rs1),
fvti.RegClass:$rs2,
diff --git a/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp b/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp
index 956b851fce6c..49838e685a6d 100644
--- a/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp
@@ -1459,11 +1459,22 @@ static bool generateImageSizeQueryInst(const SPIRV::IncomingCall *Call,
Component == 3 ? NumActualRetComponents - 1 : Component;
assert(ExtractedComposite < NumActualRetComponents &&
"Invalid composite index!");
+ Register TypeReg = GR->getSPIRVTypeID(Call->ReturnType);
+ SPIRVType *NewType = nullptr;
+ if (QueryResultType->getOpcode() == SPIRV::OpTypeVector) {
+ Register NewTypeReg = QueryResultType->getOperand(1).getReg();
+ if (TypeReg != NewTypeReg &&
+ (NewType = GR->getSPIRVTypeForVReg(NewTypeReg)) != nullptr)
+ TypeReg = NewTypeReg;
+ }
MIRBuilder.buildInstr(SPIRV::OpCompositeExtract)
.addDef(Call->ReturnRegister)
- .addUse(GR->getSPIRVTypeID(Call->ReturnType))
+ .addUse(TypeReg)
.addUse(QueryResult)
.addImm(ExtractedComposite);
+ if (NewType != nullptr)
+ insertAssignInstr(Call->ReturnRegister, nullptr, NewType, GR, MIRBuilder,
+ MIRBuilder.getMF().getRegInfo());
} else {
// More than 1 component is expected, fill a new vector.
auto MIB = MIRBuilder.buildInstr(SPIRV::OpVectorShuffle)
@@ -2063,16 +2074,30 @@ static bool generateAsyncCopy(const SPIRV::IncomingCall *Call,
auto Scope = buildConstantIntReg(SPIRV::Scope::Workgroup, MIRBuilder, GR);
switch (Opcode) {
- case SPIRV::OpGroupAsyncCopy:
- return MIRBuilder.buildInstr(Opcode)
- .addDef(Call->ReturnRegister)
- .addUse(GR->getSPIRVTypeID(Call->ReturnType))
- .addUse(Scope)
- .addUse(Call->Arguments[0])
- .addUse(Call->Arguments[1])
- .addUse(Call->Arguments[2])
- .addUse(buildConstantIntReg(1, MIRBuilder, GR))
- .addUse(Call->Arguments[3]);
+ case SPIRV::OpGroupAsyncCopy: {
+ SPIRVType *NewType =
+ Call->ReturnType->getOpcode() == SPIRV::OpTypeEvent
+ ? nullptr
+ : GR->getOrCreateSPIRVTypeByName("spirv.Event", MIRBuilder);
+ Register TypeReg = GR->getSPIRVTypeID(NewType ? NewType : Call->ReturnType);
+ unsigned NumArgs = Call->Arguments.size();
+ Register EventReg = Call->Arguments[NumArgs - 1];
+ bool Res = MIRBuilder.buildInstr(Opcode)
+ .addDef(Call->ReturnRegister)
+ .addUse(TypeReg)
+ .addUse(Scope)
+ .addUse(Call->Arguments[0])
+ .addUse(Call->Arguments[1])
+ .addUse(Call->Arguments[2])
+ .addUse(Call->Arguments.size() > 4
+ ? Call->Arguments[3]
+ : buildConstantIntReg(1, MIRBuilder, GR))
+ .addUse(EventReg);
+ if (NewType != nullptr)
+ insertAssignInstr(Call->ReturnRegister, nullptr, NewType, GR, MIRBuilder,
+ MIRBuilder.getMF().getRegInfo());
+ return Res;
+ }
case SPIRV::OpGroupWaitEvents:
return MIRBuilder.buildInstr(Opcode)
.addUse(Scope)
diff --git a/llvm/lib/Target/SPIRV/SPIRVBuiltins.td b/llvm/lib/Target/SPIRV/SPIRVBuiltins.td
index 24c6c2688642..edc9e1a33d9f 100644
--- a/llvm/lib/Target/SPIRV/SPIRVBuiltins.td
+++ b/llvm/lib/Target/SPIRV/SPIRVBuiltins.td
@@ -586,6 +586,7 @@ defm : DemangledNativeBuiltin<"__spirv_SpecConstantComposite", OpenCL_std, SpecC
// Async Copy and Prefetch builtin records:
defm : DemangledNativeBuiltin<"async_work_group_copy", OpenCL_std, AsyncCopy, 4, 4, OpGroupAsyncCopy>;
+defm : DemangledNativeBuiltin<"async_work_group_strided_copy", OpenCL_std, AsyncCopy, 5, 5, OpGroupAsyncCopy>;
defm : DemangledNativeBuiltin<"__spirv_GroupAsyncCopy", OpenCL_std, AsyncCopy, 6, 6, OpGroupAsyncCopy>;
defm : DemangledNativeBuiltin<"wait_group_events", OpenCL_std, AsyncCopy, 2, 2, OpGroupWaitEvents>;
defm : DemangledNativeBuiltin<"__spirv_GroupWaitEvents", OpenCL_std, AsyncCopy, 3, 3, OpGroupWaitEvents>;
diff --git a/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp b/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp
index 5ef0be1cab72..bbd25dc85f52 100644
--- a/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp
@@ -61,9 +61,6 @@ class SPIRVEmitIntrinsics
DenseMap<Instruction *, Type *> AggrConstTypes;
DenseSet<Instruction *> AggrStores;
- // a registry of created Intrinsic::spv_assign_ptr_type instructions
- DenseMap<Value *, CallInst *> AssignPtrTypeInstr;
-
// deduce element type of untyped pointers
Type *deduceElementType(Value *I);
Type *deduceElementTypeHelper(Value *I);
@@ -98,14 +95,16 @@ class SPIRVEmitIntrinsics
return B.CreateIntrinsic(IntrID, {Types}, Args);
}
+ void buildAssignType(IRBuilder<> &B, Type *ElemTy, Value *Arg);
void buildAssignPtr(IRBuilder<> &B, Type *ElemTy, Value *Arg);
+ void updateAssignType(CallInst *AssignCI, Value *Arg, Value *OfType);
void replaceMemInstrUses(Instruction *Old, Instruction *New, IRBuilder<> &B);
void processInstrAfterVisit(Instruction *I, IRBuilder<> &B);
void insertAssignPtrTypeIntrs(Instruction *I, IRBuilder<> &B);
void insertAssignTypeIntrs(Instruction *I, IRBuilder<> &B);
- void insertAssignTypeInstrForTargetExtTypes(TargetExtType *AssignedType,
- Value *V, IRBuilder<> &B);
+ void insertAssignPtrTypeTargetExt(TargetExtType *AssignedType, Value *V,
+ IRBuilder<> &B);
void replacePointerOperandWithPtrCast(Instruction *I, Value *Pointer,
Type *ExpectedElementType,
unsigned OperandToReplace,
@@ -218,15 +217,39 @@ static inline void reportFatalOnTokenType(const Instruction *I) {
false);
}
+void SPIRVEmitIntrinsics::buildAssignType(IRBuilder<> &B, Type *Ty,
+ Value *Arg) {
+ Value *OfType = PoisonValue::get(Ty);
+ CallInst *AssignCI = buildIntrWithMD(Intrinsic::spv_assign_type,
+ {Arg->getType()}, OfType, Arg, {}, B);
+ GR->addAssignPtrTypeInstr(Arg, AssignCI);
+}
+
void SPIRVEmitIntrinsics::buildAssignPtr(IRBuilder<> &B, Type *ElemTy,
Value *Arg) {
- CallInst *AssignPtrTyCI =
- buildIntrWithMD(Intrinsic::spv_assign_ptr_type, {Arg->getType()},
- Constant::getNullValue(ElemTy), Arg,
- {B.getInt32(getPointerAddressSpace(Arg->getType()))}, B);
+ Value *OfType = PoisonValue::get(ElemTy);
+ CallInst *AssignPtrTyCI = buildIntrWithMD(
+ Intrinsic::spv_assign_ptr_type, {Arg->getType()}, OfType, Arg,
+ {B.getInt32(getPointerAddressSpace(Arg->getType()))}, B);
GR->addDeducedElementType(AssignPtrTyCI, ElemTy);
GR->addDeducedElementType(Arg, ElemTy);
- AssignPtrTypeInstr[Arg] = AssignPtrTyCI;
+ GR->addAssignPtrTypeInstr(Arg, AssignPtrTyCI);
+}
+
+void SPIRVEmitIntrinsics::updateAssignType(CallInst *AssignCI, Value *Arg,
+ Value *OfType) {
+ LLVMContext &Ctx = Arg->getContext();
+ AssignCI->setArgOperand(
+ 1, MetadataAsValue::get(
+ Ctx, MDNode::get(Ctx, ValueAsMetadata::getConstant(OfType))));
+ if (cast<IntrinsicInst>(AssignCI)->getIntrinsicID() !=
+ Intrinsic::spv_assign_ptr_type)
+ return;
+
+ // update association with the pointee type
+ Type *ElemTy = OfType->getType();
+ GR->addDeducedElementType(AssignCI, ElemTy);
+ GR->addDeducedElementType(Arg, ElemTy);
}
// Set element pointer type to the given value of ValueTy and tries to
@@ -513,19 +536,16 @@ void SPIRVEmitIntrinsics::deduceOperandElementType(Instruction *I) {
if (!Ty) {
GR->addDeducedElementType(Op, KnownElemTy);
// check if there is existing Intrinsic::spv_assign_ptr_type instruction
- auto It = AssignPtrTypeInstr.find(Op);
- if (It == AssignPtrTypeInstr.end()) {
+ CallInst *AssignCI = GR->findAssignPtrTypeInstr(Op);
+ if (AssignCI == nullptr) {
Instruction *User = dyn_cast<Instruction>(Op->use_begin()->get());
setInsertPointSkippingPhis(B, User ? User->getNextNode() : I);
CallInst *CI =
buildIntrWithMD(Intrinsic::spv_assign_ptr_type, {OpTy}, OpTyVal, Op,
{B.getInt32(getPointerAddressSpace(OpTy))}, B);
- AssignPtrTypeInstr[Op] = CI;
+ GR->addAssignPtrTypeInstr(Op, CI);
} else {
- It->second->setArgOperand(
- 1,
- MetadataAsValue::get(
- Ctx, MDNode::get(Ctx, ValueAsMetadata::getConstant(OpTyVal))));
+ updateAssignType(AssignCI, Op, OpTyVal);
}
} else {
if (auto *OpI = dyn_cast<Instruction>(Op)) {
@@ -559,7 +579,9 @@ void SPIRVEmitIntrinsics::replaceMemInstrUses(Instruction *Old,
if (isAssignTypeInstr(U)) {
B.SetInsertPoint(U);
SmallVector<Value *, 2> Args = {New, U->getOperand(1)};
- B.CreateIntrinsic(Intrinsic::spv_assign_type, {New->getType()}, Args);
+ CallInst *AssignCI =
+ B.CreateIntrinsic(Intrinsic::spv_assign_type, {New->getType()}, Args);
+ GR->addAssignPtrTypeInstr(New, AssignCI);
U->eraseFromParent();
} else if (isMemInstrToReplace(U) || isa<ReturnInst>(U) ||
isa<CallInst>(U)) {
@@ -751,33 +773,39 @@ Instruction *SPIRVEmitIntrinsics::visitBitCastInst(BitCastInst &I) {
return NewI;
}
-void SPIRVEmitIntrinsics::insertAssignTypeInstrForTargetExtTypes(
+void SPIRVEmitIntrinsics::insertAssignPtrTypeTargetExt(
TargetExtType *AssignedType, Value *V, IRBuilder<> &B) {
- // Do not emit spv_assign_type if the V is of the AssignedType already.
- if (V->getType() == AssignedType)
- return;
+ Type *VTy = V->getType();
- // Do not emit spv_assign_type if there is one already targetting V. If the
- // found spv_assign_type assigns a type different than AssignedType, report an
- // error. Builtin types cannot be redeclared or casted.
- for (auto User : V->users()) {
- auto *II = dyn_cast<IntrinsicInst>(User);
- if (!II || II->getIntrinsicID() != Intrinsic::spv_assign_type)
- continue;
+ // A couple of sanity checks.
+ assert(isPointerTy(VTy) && "Expect a pointer type!");
+ if (auto PType = dyn_cast<TypedPointerType>(VTy))
+ if (PType->getElementType() != AssignedType)
+ report_fatal_error("Unexpected pointer element type!");
- MetadataAsValue *VMD = cast<MetadataAsValue>(II->getOperand(1));
- Type *BuiltinType =
- dyn_cast<ConstantAsMetadata>(VMD->getMetadata())->getType();
- if (BuiltinType != AssignedType)
- report_fatal_error("Type mismatch " + BuiltinType->getTargetExtName() +
- "/" + AssignedType->getTargetExtName() +
- " for value " + V->getName(),
- false);
+ CallInst *AssignCI = GR->findAssignPtrTypeInstr(V);
+ if (!AssignCI) {
+ buildAssignType(B, AssignedType, V);
return;
}
- Constant *Const = UndefValue::get(AssignedType);
- buildIntrWithMD(Intrinsic::spv_assign_type, {V->getType()}, Const, V, {}, B);
+ Type *CurrentType =
+ dyn_cast<ConstantAsMetadata>(
+ cast<MetadataAsValue>(AssignCI->getOperand(1))->getMetadata())
+ ->getType();
+ if (CurrentType == AssignedType)
+ return;
+
+ // Builtin types cannot be redeclared or casted.
+ if (CurrentType->isTargetExtTy())
+ report_fatal_error("Type mismatch " + CurrentType->getTargetExtName() +
+ "/" + AssignedType->getTargetExtName() +
+ " for value " + V->getName(),
+ false);
+
+ // Our previous guess about the type seems to be wrong, let's update
+ // inferred type according to a new, more precise type information.
+ updateAssignType(AssignCI, V, PoisonValue::get(AssignedType));
}
void SPIRVEmitIntrinsics::replacePointerOperandWithPtrCast(
@@ -850,7 +878,7 @@ void SPIRVEmitIntrinsics::replacePointerOperandWithPtrCast(
ExpectedElementTypeConst, Pointer, {B.getInt32(AddressSpace)}, B);
GR->addDeducedElementType(CI, ExpectedElementType);
GR->addDeducedElementType(Pointer, ExpectedElementType);
- AssignPtrTypeInstr[Pointer] = CI;
+ GR->addAssignPtrTypeInstr(Pointer, CI);
return;
}
@@ -929,8 +957,7 @@ void SPIRVEmitIntrinsics::insertPtrCastOrAssignTypeInstr(Instruction *I,
for (unsigned OpIdx = 0; OpIdx < CI->arg_size(); OpIdx++) {
Value *ArgOperand = CI->getArgOperand(OpIdx);
- if (!isa<PointerType>(ArgOperand->getType()) &&
- !isa<TypedPointerType>(ArgOperand->getType()))
+ if (!isPointerTy(ArgOperand->getType()))
continue;
// Constants (nulls/undefs) are handled in insertAssignPtrTypeIntrs()
@@ -952,8 +979,8 @@ void SPIRVEmitIntrinsics::insertPtrCastOrAssignTypeInstr(Instruction *I,
continue;
if (ExpectedType->isTargetExtTy())
- insertAssignTypeInstrForTargetExtTypes(cast<TargetExtType>(ExpectedType),
- ArgOperand, B);
+ insertAssignPtrTypeTargetExt(cast<TargetExtType>(ExpectedType),
+ ArgOperand, B);
else
replacePointerOperandWithPtrCast(CI, ArgOperand, ExpectedType, OpIdx, B);
}
@@ -1145,7 +1172,7 @@ void SPIRVEmitIntrinsics::insertAssignPtrTypeIntrs(Instruction *I,
CallInst *CI = buildIntrWithMD(Intrinsic::spv_assign_ptr_type, {I->getType()},
EltTyConst, I, {B.getInt32(AddressSpace)}, B);
GR->addDeducedElementType(CI, ElemTy);
- AssignPtrTypeInstr[I] = CI;
+ GR->addAssignPtrTypeInstr(I, CI);
}
void SPIRVEmitIntrinsics::insertAssignTypeIntrs(Instruction *I,
@@ -1164,20 +1191,32 @@ void SPIRVEmitIntrinsics::insertAssignTypeIntrs(Instruction *I,
TypeToAssign = It->second;
}
}
- Constant *Const = UndefValue::get(TypeToAssign);
- buildIntrWithMD(Intrinsic::spv_assign_type, {Ty}, Const, I, {}, B);
+ buildAssignType(B, TypeToAssign, I);
}
for (const auto &Op : I->operands()) {
if (isa<ConstantPointerNull>(Op) || isa<UndefValue>(Op) ||
// Check GetElementPtrConstantExpr case.
(isa<ConstantExpr>(Op) && isa<GEPOperator>(Op))) {
setInsertPointSkippingPhis(B, I);
- if (isa<UndefValue>(Op) && Op->getType()->isAggregateType())
- buildIntrWithMD(Intrinsic::spv_assign_type, {B.getInt32Ty()}, Op,
- UndefValue::get(B.getInt32Ty()), {}, B);
- else if (!isa<Instruction>(Op))
- buildIntrWithMD(Intrinsic::spv_assign_type, {Op->getType()}, Op, Op, {},
- B);
+ Type *OpTy = Op->getType();
+ if (isa<UndefValue>(Op) && OpTy->isAggregateType()) {
+ CallInst *AssignCI =
+ buildIntrWithMD(Intrinsic::spv_assign_type, {B.getInt32Ty()}, Op,
+ UndefValue::get(B.getInt32Ty()), {}, B);
+ GR->addAssignPtrTypeInstr(Op, AssignCI);
+ } else if (!isa<Instruction>(Op)) {
+ Type *OpTy = Op->getType();
+ if (auto PType = dyn_cast<TypedPointerType>(OpTy)) {
+ buildAssignPtr(B, PType->getElementType(), Op);
+ } else if (isPointerTy(OpTy)) {
+ Type *ElemTy = GR->findDeducedElementType(Op);
+ buildAssignPtr(B, ElemTy ? ElemTy : deduceElementType(Op), Op);
+ } else {
+ CallInst *AssignCI = buildIntrWithMD(Intrinsic::spv_assign_type,
+ {OpTy}, Op, Op, {}, B);
+ GR->addAssignPtrTypeInstr(Op, AssignCI);
+ }
+ }
}
}
}
@@ -1368,14 +1407,12 @@ bool SPIRVEmitIntrinsics::runOnFunction(Function &Func) {
continue;
insertAssignPtrTypeIntrs(I, B);
+ deduceOperandElementType(I);
insertAssignTypeIntrs(I, B);
insertPtrCastOrAssignTypeInstr(I, B);
insertSpirvDecorations(I, B);
}
- for (auto &I : instructions(Func))
- deduceOperandElementType(&I);
-
for (auto *I : Worklist) {
TrackConstants = true;
if (!I->getType()->isVoidTy() || isa<StoreInst>(I))
diff --git a/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.h b/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.h
index ef0973d03d15..db01f68f48de 100644
--- a/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.h
+++ b/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.h
@@ -73,8 +73,11 @@ class SPIRVGlobalRegistry {
// untyped pointers.
DenseMap<Value *, Type *> DeducedElTys;
// Maps composite values to deduced types where untyped pointers are replaced
- // with typed ones
+ // with typed ones.
DenseMap<Value *, Type *> DeducedNestedTys;
+ // Maps values to "assign type" calls, thus being a registry of created
+ // Intrinsic::spv_assign_ptr_type instructions.
+ DenseMap<Value *, CallInst *> AssignPtrTypeInstr;
// Add a new OpTypeXXX instruction without checking for duplicates.
SPIRVType *createSPIRVType(const Type *Type, MachineIRBuilder &MIRBuilder,
@@ -149,6 +152,17 @@ public:
return It == FunResPointerTypes.end() ? nullptr : It->second;
}
+ // A registry of "assign type" records:
+ // - Add a record.
+ void addAssignPtrTypeInstr(Value *Val, CallInst *AssignPtrTyCI) {
+ AssignPtrTypeInstr[Val] = AssignPtrTyCI;
+ }
+ // - Find a record.
+ CallInst *findAssignPtrTypeInstr(const Value *Val) {
+ auto It = AssignPtrTypeInstr.find(Val);
+ return It == AssignPtrTypeInstr.end() ? nullptr : It->second;
+ }
+
// Deduced element types of untyped pointers and composites:
// - Add a record to the map of deduced element types.
void addDeducedElementType(Value *Val, Type *Ty) { DeducedElTys[Val] = Ty; }
diff --git a/llvm/lib/Target/SPIRV/SPIRVMergeRegionExitTargets.cpp b/llvm/lib/Target/SPIRV/SPIRVMergeRegionExitTargets.cpp
index 4b0e7c421df3..2744c25d1bc7 100644
--- a/llvm/lib/Target/SPIRV/SPIRVMergeRegionExitTargets.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVMergeRegionExitTargets.cpp
@@ -106,7 +106,7 @@ public:
const std::unordered_set<BasicBlock *> ToReplace,
BasicBlock *NewTarget) {
auto *T = BB->getTerminator();
- if (auto *RI = dyn_cast<ReturnInst>(T))
+ if (isa<ReturnInst>(T))
return;
if (auto *BI = dyn_cast<BranchInst>(T)) {
diff --git a/llvm/lib/Target/SPIRV/SPIRVPreLegalizer.cpp b/llvm/lib/Target/SPIRV/SPIRVPreLegalizer.cpp
index 3d536085b78a..a0a253c23b1e 100644
--- a/llvm/lib/Target/SPIRV/SPIRVPreLegalizer.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVPreLegalizer.cpp
@@ -417,7 +417,8 @@ generateAssignInstrs(MachineFunction &MF, SPIRVGlobalRegistry *GR,
MachineInstr *Def = MRI.getVRegDef(Reg);
assert(Def && "Expecting an instruction that defines the register");
// G_GLOBAL_VALUE already has type info.
- if (Def->getOpcode() != TargetOpcode::G_GLOBAL_VALUE)
+ if (Def->getOpcode() != TargetOpcode::G_GLOBAL_VALUE &&
+ Def->getOpcode() != SPIRV::ASSIGN_TYPE)
insertAssignInstr(Reg, nullptr, AssignedPtrType, GR, MIB,
MF.getRegInfo());
ToErase.push_back(&MI);
@@ -427,7 +428,8 @@ generateAssignInstrs(MachineFunction &MF, SPIRVGlobalRegistry *GR,
MachineInstr *Def = MRI.getVRegDef(Reg);
assert(Def && "Expecting an instruction that defines the register");
// G_GLOBAL_VALUE already has type info.
- if (Def->getOpcode() != TargetOpcode::G_GLOBAL_VALUE)
+ if (Def->getOpcode() != TargetOpcode::G_GLOBAL_VALUE &&
+ Def->getOpcode() != SPIRV::ASSIGN_TYPE)
insertAssignInstr(Reg, Ty, nullptr, GR, MIB, MF.getRegInfo());
ToErase.push_back(&MI);
} else if (MIOp == TargetOpcode::G_CONSTANT ||
diff --git a/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp b/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp
index 8e2063121e00..f5bc584ac4e1 100644
--- a/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp
+++ b/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp
@@ -178,14 +178,15 @@ static wasm::WasmLimits DefaultLimits() {
}
static MCSymbolWasm *GetOrCreateFunctionTableSymbol(MCContext &Ctx,
- const StringRef &Name) {
+ const StringRef &Name,
+ bool is64) {
MCSymbolWasm *Sym = cast_or_null<MCSymbolWasm>(Ctx.lookupSymbol(Name));
if (Sym) {
if (!Sym->isFunctionTable())
Ctx.reportError(SMLoc(), "symbol is not a wasm funcref table");
} else {
Sym = cast<MCSymbolWasm>(Ctx.getOrCreateSymbol(Name));
- Sym->setFunctionTable();
+ Sym->setFunctionTable(is64);
// The default function table is synthesized by the linker.
Sym->setUndefined();
}
@@ -258,7 +259,7 @@ public:
MCAsmParserExtension::Initialize(Parser);
DefaultFunctionTable = GetOrCreateFunctionTableSymbol(
- getContext(), "__indirect_function_table");
+ getContext(), "__indirect_function_table", is64);
if (!STI->checkFeatures("+reference-types"))
DefaultFunctionTable->setOmitFromLinkingSection();
}
@@ -508,7 +509,7 @@ public:
auto &Tok = Lexer.getTok();
if (Tok.is(AsmToken::Identifier)) {
auto *Sym =
- GetOrCreateFunctionTableSymbol(getContext(), Tok.getString());
+ GetOrCreateFunctionTableSymbol(getContext(), Tok.getString(), is64);
const auto *Val = MCSymbolRefExpr::create(Sym, getContext());
*Op = std::make_unique<WebAssemblyOperand>(
WebAssemblyOperand::Symbol, Tok.getLoc(), Tok.getEndLoc(),
@@ -836,6 +837,9 @@ public:
// symbol
auto WasmSym = cast<MCSymbolWasm>(Ctx.getOrCreateSymbol(SymName));
WasmSym->setType(wasm::WASM_SYMBOL_TYPE_TABLE);
+ if (is64) {
+ Limits.Flags |= wasm::WASM_LIMITS_FLAG_IS_64;
+ }
wasm::WasmTableType Type = {*ElemType, Limits};
WasmSym->setTableType(Type);
TOut.emitTableType(WasmSym);
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyUtilities.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyUtilities.cpp
index 5e7279808cce..c5a047ee47d7 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyUtilities.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyUtilities.cpp
@@ -108,8 +108,9 @@ MCSymbolWasm *WebAssembly::getOrCreateFunctionTableSymbol(
if (!Sym->isFunctionTable())
Ctx.reportError(SMLoc(), "symbol is not a wasm funcref table");
} else {
+ bool is64 = Subtarget && Subtarget->getTargetTriple().isArch64Bit();
Sym = cast<MCSymbolWasm>(Ctx.getOrCreateSymbol(Name));
- Sym->setFunctionTable();
+ Sym->setFunctionTable(is64);
// The default function table is synthesized by the linker.
Sym->setUndefined();
}
diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
index 0bf3294af92a..3933e82b718f 100644
--- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -5120,6 +5120,9 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
case Intrinsic::x86_tileloaddt164_internal: {
if (!Subtarget->hasAMXTILE())
break;
+ auto *MFI =
+ CurDAG->getMachineFunction().getInfo<X86MachineFunctionInfo>();
+ MFI->setAMXProgModel(AMXProgModelEnum::ManagedRA);
unsigned Opc = IntNo == Intrinsic::x86_tileloadd64_internal
? X86::PTILELOADDV
: X86::PTILELOADDT1V;
@@ -5201,6 +5204,9 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
break;
}
case Intrinsic::x86_tilestored64_internal: {
+ auto *MFI =
+ CurDAG->getMachineFunction().getInfo<X86MachineFunctionInfo>();
+ MFI->setAMXProgModel(AMXProgModelEnum::ManagedRA);
unsigned Opc = X86::PTILESTOREDV;
// _tile_stored_internal(row, col, buf, STRIDE, c)
SDValue Base = Node->getOperand(4);
@@ -5228,6 +5234,9 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
case Intrinsic::x86_tilestored64: {
if (!Subtarget->hasAMXTILE())
break;
+ auto *MFI =
+ CurDAG->getMachineFunction().getInfo<X86MachineFunctionInfo>();
+ MFI->setAMXProgModel(AMXProgModelEnum::DirectReg);
unsigned Opc;
switch (IntNo) {
default: llvm_unreachable("Unexpected intrinsic!");
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index af1e45d25aac..3fbab3af32bb 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -26790,7 +26790,7 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget &Subtarget,
case Intrinsic::swift_async_context_addr: {
SDLoc dl(Op);
auto &MF = DAG.getMachineFunction();
- auto X86FI = MF.getInfo<X86MachineFunctionInfo>();
+ auto *X86FI = MF.getInfo<X86MachineFunctionInfo>();
if (X86::isExtendedSwiftAsyncFrameSupported(Subtarget, MF)) {
MF.getFrameInfo().setFrameAddressIsTaken(true);
X86FI->setHasSwiftAsyncContext(true);
@@ -36795,7 +36795,7 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
}
case TargetOpcode::PREALLOCATED_SETUP: {
assert(Subtarget.is32Bit() && "preallocated only used in 32-bit");
- auto MFI = MF->getInfo<X86MachineFunctionInfo>();
+ auto *MFI = MF->getInfo<X86MachineFunctionInfo>();
MFI->setHasPreallocatedCall(true);
int64_t PreallocatedId = MI.getOperand(0).getImm();
size_t StackAdjustment = MFI->getPreallocatedStackSize(PreallocatedId);
@@ -36812,7 +36812,7 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
assert(Subtarget.is32Bit() && "preallocated calls only used in 32-bit");
int64_t PreallocatedId = MI.getOperand(1).getImm();
int64_t ArgIdx = MI.getOperand(2).getImm();
- auto MFI = MF->getInfo<X86MachineFunctionInfo>();
+ auto *MFI = MF->getInfo<X86MachineFunctionInfo>();
size_t ArgOffset = MFI->getPreallocatedArgOffsets(PreallocatedId)[ArgIdx];
LLVM_DEBUG(dbgs() << "PREALLOCATED_ARG arg index " << ArgIdx
<< ", arg offset " << ArgOffset << "\n");
@@ -36855,6 +36855,13 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
unsigned Imm = MI.getOperand(0).getImm();
BuildMI(*BB, MI, MIMD, TII->get(X86::TILEZERO), TMMImmToTMMReg(Imm));
MI.eraseFromParent(); // The pseudo is gone now.
+ auto *MFI = MF->getInfo<X86MachineFunctionInfo>();
+ MFI->setAMXProgModel(AMXProgModelEnum::DirectReg);
+ return BB;
+ }
+ case X86::PTILEZEROV: {
+ auto *MFI = MF->getInfo<X86MachineFunctionInfo>();
+ MFI->setAMXProgModel(AMXProgModelEnum::ManagedRA);
return BB;
}
case X86::PTILELOADD:
diff --git a/llvm/lib/Target/X86/X86InstrAMX.td b/llvm/lib/Target/X86/X86InstrAMX.td
index c47bee070e04..99deacc811a1 100644
--- a/llvm/lib/Target/X86/X86InstrAMX.td
+++ b/llvm/lib/Target/X86/X86InstrAMX.td
@@ -74,7 +74,7 @@ let SchedRW = [WriteSystem] in {
GR16:$src2, opaquemem:$src3,
TILE:$src4), []>;
let isPseudo = true, isReMaterializable = 1, isAsCheapAsAMove = 1,
- canFoldAsLoad = 1 in
+ canFoldAsLoad = 1, usesCustomInserter = 1 in
def PTILEZEROV : PseudoI<(outs TILE:$dst), (ins GR16:$src1, GR16:$src2),
[(set TILE:$dst, (int_x86_tilezero_internal
GR16:$src1, GR16:$src2))]>;
diff --git a/llvm/lib/Target/X86/X86LowerAMXType.cpp b/llvm/lib/Target/X86/X86LowerAMXType.cpp
index b69058787a4e..079ac983a8a0 100644
--- a/llvm/lib/Target/X86/X86LowerAMXType.cpp
+++ b/llvm/lib/Target/X86/X86LowerAMXType.cpp
@@ -92,6 +92,14 @@ static bool isAMXIntrinsic(Value *I) {
return false;
}
+static bool containsAMXCode(Function &F) {
+ for (BasicBlock &BB : F)
+ for (Instruction &I : BB)
+ if (I.getType()->isX86_AMXTy())
+ return true;
+ return false;
+}
+
static AllocaInst *createAllocaInstAtEntry(IRBuilder<> &Builder, BasicBlock *BB,
Type *Ty) {
Function &F = *BB->getParent();
@@ -1230,6 +1238,14 @@ public:
}
bool runOnFunction(Function &F) override {
+ // Performance optimization: most code doesn't use AMX, so return early if
+ // there are no instructions that produce AMX values. This is sufficient, as
+ // AMX arguments and constants are not allowed -- so any producer of an AMX
+ // value must be an instruction.
+ // TODO: find a cheaper way for this, without looking at all instructions.
+ if (!containsAMXCode(F))
+ return false;
+
bool C = false;
TargetMachine *TM = &getAnalysis<TargetPassConfig>().getTM<TargetMachine>();
TargetLibraryInfo *TLI =
diff --git a/llvm/lib/Target/X86/X86LowerTileCopy.cpp b/llvm/lib/Target/X86/X86LowerTileCopy.cpp
index f27676a27e86..613722b398f4 100644
--- a/llvm/lib/Target/X86/X86LowerTileCopy.cpp
+++ b/llvm/lib/Target/X86/X86LowerTileCopy.cpp
@@ -19,6 +19,7 @@
#include "X86.h"
#include "X86InstrBuilder.h"
#include "X86InstrInfo.h"
+#include "X86MachineFunctionInfo.h"
#include "X86Subtarget.h"
#include "llvm/CodeGen/LiveRegUnits.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
@@ -71,6 +72,10 @@ FunctionPass *llvm::createX86LowerTileCopyPass() {
}
bool X86LowerTileCopy::runOnMachineFunction(MachineFunction &MF) {
+ X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
+ if (FuncInfo->getAMXProgModel() != AMXProgModelEnum::ManagedRA)
+ return false;
+
const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>();
const X86InstrInfo *TII = ST.getInstrInfo();
const TargetRegisterInfo *TRI = ST.getRegisterInfo();
@@ -81,26 +86,8 @@ bool X86LowerTileCopy::runOnMachineFunction(MachineFunction &MF) {
bool Changed = false;
for (MachineBasicBlock &MBB : MF) {
- // There won't be a tile copy if neither tile register live in nor live out.
- bool HasTileCopy = false;
- for (const auto &LI : MBB.liveins()) {
- if (TILERegs.test(LI.PhysReg)) {
- HasTileCopy = true;
- break;
- }
- }
LiveRegUnits UsedRegs(*TRI);
UsedRegs.addLiveOuts(MBB);
- if (!HasTileCopy) {
- for (auto RegT : TILERegs.set_bits()) {
- if (UsedRegs.available(RegT)) {
- HasTileCopy = true;
- break;
- }
- }
- }
- if (!HasTileCopy)
- continue;
for (MachineInstr &MI : llvm::make_early_inc_range(reverse(MBB))) {
UsedRegs.stepBackward(MI);
if (!MI.isCopy())
diff --git a/llvm/lib/Target/X86/X86MachineFunctionInfo.h b/llvm/lib/Target/X86/X86MachineFunctionInfo.h
index f6e853270e07..8aaa49945f9d 100644
--- a/llvm/lib/Target/X86/X86MachineFunctionInfo.h
+++ b/llvm/lib/Target/X86/X86MachineFunctionInfo.h
@@ -21,6 +21,8 @@
namespace llvm {
+enum AMXProgModelEnum { None = 0, DirectReg = 1, ManagedRA = 2 };
+
/// X86MachineFunctionInfo - This class is derived from MachineFunction and
/// contains private X86 target-specific information for each MachineFunction.
class X86MachineFunctionInfo : public MachineFunctionInfo {
@@ -96,6 +98,9 @@ class X86MachineFunctionInfo : public MachineFunctionInfo {
/// used to address arguments in a function using a base pointer.
int SEHFramePtrSaveIndex = 0;
+ /// The AMX programing model used in the function.
+ AMXProgModelEnum AMXProgModel = AMXProgModelEnum::None;
+
/// True if this function has a subset of CSRs that is handled explicitly via
/// copies.
bool IsSplitCSR = false;
@@ -219,6 +224,13 @@ public:
int getSEHFramePtrSaveIndex() const { return SEHFramePtrSaveIndex; }
void setSEHFramePtrSaveIndex(int Index) { SEHFramePtrSaveIndex = Index; }
+ AMXProgModelEnum getAMXProgModel() const { return AMXProgModel; }
+ void setAMXProgModel(AMXProgModelEnum Model) {
+ assert((AMXProgModel == AMXProgModelEnum::None || AMXProgModel == Model) &&
+ "mixed model is not supported");
+ AMXProgModel = Model;
+ }
+
SmallVectorImpl<ForwardedRegister> &getForwardedMustTailRegParms() {
return ForwardedMustTailRegParms;
}
diff --git a/llvm/lib/Target/X86/X86SchedIceLake.td b/llvm/lib/Target/X86/X86SchedIceLake.td
index 2d296771b1c0..186d4d84c251 100644
--- a/llvm/lib/Target/X86/X86SchedIceLake.td
+++ b/llvm/lib/Target/X86/X86SchedIceLake.td
@@ -620,11 +620,11 @@ def : WriteRes<WriteNop, []>;
// Horizontal add/sub instructions.
////////////////////////////////////////////////////////////////////////////////
-defm : ICXWriteResPair<WriteFHAdd, [ICXPort5,ICXPort015], 6, [2,1], 3, 6>;
-defm : ICXWriteResPair<WriteFHAddY, [ICXPort5,ICXPort015], 6, [2,1], 3, 7>;
+defm : ICXWriteResPair<WriteFHAdd, [ICXPort5,ICXPort01], 6, [2,1], 3, 6>;
+defm : ICXWriteResPair<WriteFHAddY, [ICXPort5,ICXPort01], 6, [2,1], 3, 7>;
defm : ICXWriteResPair<WritePHAdd, [ICXPort5,ICXPort05], 3, [2,1], 3, 5>;
-defm : ICXWriteResPair<WritePHAddX, [ICXPort5,ICXPort015], 3, [2,1], 3, 6>;
-defm : ICXWriteResPair<WritePHAddY, [ICXPort5,ICXPort015], 3, [2,1], 3, 7>;
+defm : ICXWriteResPair<WritePHAddX, [ICXPort15,ICXPort015], 3, [2,1], 3, 6>;
+defm : ICXWriteResPair<WritePHAddY, [ICXPort15,ICXPort015], 3, [2,1], 3, 7>;
// Remaining instrs.
@@ -886,7 +886,7 @@ def ICXWriteResGroup37 : SchedWriteRes<[ICXPort0,ICXPort5]> {
}
def: InstRW<[ICXWriteResGroup37], (instregex "MMX_PH(ADD|SUB)SWrr")>;
-def ICXWriteResGroup38 : SchedWriteRes<[ICXPort5,ICXPort01]> {
+def ICXWriteResGroup38 : SchedWriteRes<[ICXPort15,ICXPort01]> {
let Latency = 3;
let NumMicroOps = 3;
let ReleaseAtCycles = [2,1];
@@ -1739,13 +1739,13 @@ def ICXWriteResGroup137 : SchedWriteRes<[ICXPort23,ICXPort01]> {
def: InstRW<[ICXWriteResGroup137], (instregex "MMX_CVT(T?)PS2PIrm",
"(V?)CVTPS2PDrm")>;
-def ICXWriteResGroup143 : SchedWriteRes<[ICXPort5,ICXPort01,ICXPort23]> {
+def ICXWriteResGroup143 : SchedWriteRes<[ICXPort15,ICXPort01,ICXPort23]> {
let Latency = 9;
let NumMicroOps = 4;
let ReleaseAtCycles = [2,1,1];
}
-def: InstRW<[ICXWriteResGroup143], (instregex "(V?)PHADDSWrm",
- "(V?)PHSUBSWrm")>;
+def: InstRW<[ICXWriteResGroup143], (instrs PHADDSWrm, VPHADDSWrm,
+ PHSUBSWrm, VPHSUBSWrm)>;
def ICXWriteResGroup146 : SchedWriteRes<[ICXPort1,ICXPort6,ICXPort23,ICXPort0156]> {
let Latency = 9;
@@ -1842,7 +1842,7 @@ def: InstRW<[ICXWriteResGroup151], (instregex "VEXPANDPDZ128rm(b?)",
"VPEXPANDDZ128rm(b?)",
"VPEXPANDQZ128rm(b?)")>;
-def ICXWriteResGroup154 : SchedWriteRes<[ICXPort5,ICXPort01,ICXPort23]> {
+def ICXWriteResGroup154 : SchedWriteRes<[ICXPort15,ICXPort01,ICXPort23]> {
let Latency = 10;
let NumMicroOps = 4;
let ReleaseAtCycles = [2,1,1];
diff --git a/llvm/lib/Target/X86/X86SchedSkylakeServer.td b/llvm/lib/Target/X86/X86SchedSkylakeServer.td
index a7dff0ecbcd9..4fded44085e8 100644
--- a/llvm/lib/Target/X86/X86SchedSkylakeServer.td
+++ b/llvm/lib/Target/X86/X86SchedSkylakeServer.td
@@ -615,8 +615,8 @@ def : WriteRes<WriteNop, []>;
// Horizontal add/sub instructions.
////////////////////////////////////////////////////////////////////////////////
-defm : SKXWriteResPair<WriteFHAdd, [SKXPort5,SKXPort015], 6, [2,1], 3, 6>;
-defm : SKXWriteResPair<WriteFHAddY, [SKXPort5,SKXPort015], 6, [2,1], 3, 7>;
+defm : SKXWriteResPair<WriteFHAdd, [SKXPort5,SKXPort01], 6, [2,1], 3, 6>;
+defm : SKXWriteResPair<WriteFHAddY, [SKXPort5,SKXPort01], 6, [2,1], 3, 7>;
defm : SKXWriteResPair<WritePHAdd, [SKXPort5,SKXPort05], 3, [2,1], 3, 5>;
defm : SKXWriteResPair<WritePHAddX, [SKXPort5,SKXPort015], 3, [2,1], 3, 6>;
defm : SKXWriteResPair<WritePHAddY, [SKXPort5,SKXPort015], 3, [2,1], 3, 7>;
diff --git a/llvm/lib/TargetParser/Host.cpp b/llvm/lib/TargetParser/Host.cpp
index 68155acd9e5b..b3b8486c604b 100644
--- a/llvm/lib/TargetParser/Host.cpp
+++ b/llvm/lib/TargetParser/Host.cpp
@@ -302,6 +302,7 @@ StringRef sys::detail::getHostCPUNameForARM(StringRef ProcCpuinfoContent) {
.Case("0x805", "cortex-a76") // Kryo 4xx/5xx Silver
.Case("0xc00", "falkor")
.Case("0xc01", "saphira")
+ .Case("0x001", "oryon-1")
.Default("generic");
if (Implementer == "0x53") { // Samsung Electronics Co., Ltd.
// The Exynos chips have a convoluted ID scheme that doesn't seem to follow
diff --git a/llvm/lib/TargetParser/TargetParser.cpp b/llvm/lib/TargetParser/TargetParser.cpp
index 7464237d26d4..60a784ef002f 100644
--- a/llvm/lib/TargetParser/TargetParser.cpp
+++ b/llvm/lib/TargetParser/TargetParser.cpp
@@ -124,6 +124,7 @@ constexpr GPUInfo AMDGCNGPUs[] = {
{{"gfx1103"}, {"gfx1103"}, GK_GFX1103, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP},
{{"gfx1150"}, {"gfx1150"}, GK_GFX1150, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP},
{{"gfx1151"}, {"gfx1151"}, GK_GFX1151, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP},
+ {{"gfx1152"}, {"gfx1152"}, GK_GFX1152, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP},
{{"gfx1200"}, {"gfx1200"}, GK_GFX1200, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP},
{{"gfx1201"}, {"gfx1201"}, GK_GFX1201, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP},
@@ -275,6 +276,7 @@ AMDGPU::IsaVersion AMDGPU::getIsaVersion(StringRef GPU) {
case GK_GFX1103: return {11, 0, 3};
case GK_GFX1150: return {11, 5, 0};
case GK_GFX1151: return {11, 5, 1};
+ case GK_GFX1152: return {11, 5, 2};
case GK_GFX1200: return {12, 0, 0};
case GK_GFX1201: return {12, 0, 1};
@@ -341,6 +343,7 @@ void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T,
Features["image-insts"] = true;
Features["fp8-conversion-insts"] = true;
break;
+ case GK_GFX1152:
case GK_GFX1151:
case GK_GFX1150:
case GK_GFX1103:
@@ -542,6 +545,7 @@ static bool isWave32Capable(StringRef GPU, const Triple &T) {
switch (parseArchAMDGCN(GPU)) {
case GK_GFX1201:
case GK_GFX1200:
+ case GK_GFX1152:
case GK_GFX1151:
case GK_GFX1150:
case GK_GFX1103:
diff --git a/llvm/lib/Transforms/IPO/CMakeLists.txt b/llvm/lib/Transforms/IPO/CMakeLists.txt
index 5fbdbc3a014f..92a9697720ef 100644
--- a/llvm/lib/Transforms/IPO/CMakeLists.txt
+++ b/llvm/lib/Transforms/IPO/CMakeLists.txt
@@ -12,6 +12,7 @@ add_llvm_component_library(LLVMipo
DeadArgumentElimination.cpp
ElimAvailExtern.cpp
EmbedBitcodePass.cpp
+ ExpandVariadics.cpp
ExtractGV.cpp
ForceFunctionAttrs.cpp
FunctionAttrs.cpp
diff --git a/llvm/lib/Transforms/IPO/ExpandVariadics.cpp b/llvm/lib/Transforms/IPO/ExpandVariadics.cpp
new file mode 100644
index 000000000000..d340bc041ccd
--- /dev/null
+++ b/llvm/lib/Transforms/IPO/ExpandVariadics.cpp
@@ -0,0 +1,1012 @@
+//===-- ExpandVariadicsPass.cpp --------------------------------*- C++ -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This is an optimization pass for variadic functions. If called from codegen,
+// it can serve as the implementation of variadic functions for a given target.
+//
+// The strategy is to turn the ... part of a variadic function into a va_list
+// and fix up the call sites. The majority of the pass is target independent.
+// The exceptions are the va_list type itself and the rules for where to store
+// variables in memory such that va_arg can iterate over them given a va_list.
+//
+// The majority of the plumbing is splitting the variadic function into a
+// single basic block that packs the variadic arguments into a va_list and
+// a second function that does the work of the original. That packing is
+// exactly what is done by va_start. Further, the transform from ... to va_list
+// replaced va_start with an operation to copy a va_list from the new argument,
+// which is exactly a va_copy. This is useful for reducing target-dependence.
+//
+// A va_list instance is a forward iterator, where the primary operation va_arg
+// is dereference-then-increment. This interface forces significant convergent
+// evolution between target specific implementations. The variation in runtime
+// data layout is limited to that representable by the iterator, parameterised
+// by the type passed to the va_arg instruction.
+//
+// Therefore the majority of the target specific subtlety is packing arguments
+// into a stack allocated buffer such that a va_list can be initialised with it
+// and the va_arg expansion for the target will find the arguments at runtime.
+//
+// The aggregate effect is to unblock other transforms, most critically the
+// general purpose inliner. Known calls to variadic functions become zero cost.
+//
+// Consistency with clang is primarily tested by emitting va_arg using clang
+// then expanding the variadic functions using this pass, followed by trying
+// to constant fold the functions to no-ops.
+//
+// Target specific behaviour is tested in IR - mainly checking that values are
+// put into positions in call frames that make sense for that particular target.
+//
+// There is one "clever" invariant in use. va_start intrinsics that are not
+// within a varidic functions are an error in the IR verifier. When this
+// transform moves blocks from a variadic function into a fixed arity one, it
+// moves va_start intrinsics along with everything else. That means that the
+// va_start intrinsics that need to be rewritten to use the trailing argument
+// are exactly those that are in non-variadic functions so no further state
+// is needed to distinguish those that need to be rewritten.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/IPO/ExpandVariadics.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/PassManager.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/TargetParser/Triple.h"
+#include "llvm/Transforms/Utils/ModuleUtils.h"
+
+#define DEBUG_TYPE "expand-variadics"
+
+using namespace llvm;
+
+namespace {
+
+cl::opt<ExpandVariadicsMode> ExpandVariadicsModeOption(
+ DEBUG_TYPE "-override", cl::desc("Override the behaviour of " DEBUG_TYPE),
+ cl::init(ExpandVariadicsMode::Unspecified),
+ cl::values(clEnumValN(ExpandVariadicsMode::Unspecified, "unspecified",
+ "Use the implementation defaults"),
+ clEnumValN(ExpandVariadicsMode::Disable, "disable",
+ "Disable the pass entirely"),
+ clEnumValN(ExpandVariadicsMode::Optimize, "optimize",
+ "Optimise without changing ABI"),
+ clEnumValN(ExpandVariadicsMode::Lowering, "lowering",
+ "Change variadic calling convention")));
+
+bool commandLineOverride() {
+ return ExpandVariadicsModeOption != ExpandVariadicsMode::Unspecified;
+}
+
+// Instances of this class encapsulate the target-dependant behaviour as a
+// function of triple. Implementing a new ABI is adding a case to the switch
+// in create(llvm::Triple) at the end of this file.
+// This class may end up instantiated in TargetMachine instances, keeping it
+// here for now until enough targets are implemented for the API to evolve.
+class VariadicABIInfo {
+protected:
+ VariadicABIInfo() = default;
+
+public:
+ static std::unique_ptr<VariadicABIInfo> create(const Triple &T);
+
+ // Allow overriding whether the pass runs on a per-target basis
+ virtual bool enableForTarget() = 0;
+
+ // Whether a valist instance is passed by value or by address
+ // I.e. does it need to be alloca'ed and stored into, or can
+ // it be passed directly in a SSA register
+ virtual bool vaListPassedInSSARegister() = 0;
+
+ // The type of a va_list iterator object
+ virtual Type *vaListType(LLVMContext &Ctx) = 0;
+
+ // The type of a va_list as a function argument as lowered by C
+ virtual Type *vaListParameterType(Module &M) = 0;
+
+ // Initialize an allocated va_list object to point to an already
+ // initialized contiguous memory region.
+ // Return the value to pass as the va_list argument
+ virtual Value *initializeVaList(Module &M, LLVMContext &Ctx,
+ IRBuilder<> &Builder, AllocaInst *VaList,
+ Value *Buffer) = 0;
+
+ struct VAArgSlotInfo {
+ Align DataAlign; // With respect to the call frame
+ bool Indirect; // Passed via a pointer
+ };
+ virtual VAArgSlotInfo slotInfo(const DataLayout &DL, Type *Parameter) = 0;
+
+ // Targets implemented so far all have the same trivial lowering for these
+ bool vaEndIsNop() { return true; }
+ bool vaCopyIsMemcpy() { return true; }
+
+ virtual ~VariadicABIInfo() = default;
+};
+
+// Module implements getFunction() which returns nullptr on missing declaration
+// and getOrInsertFunction which creates one when absent. Intrinsics.h only
+// implements getDeclaration which creates one when missing. Checking whether
+// an intrinsic exists thus inserts it in the module and it then needs to be
+// deleted again to clean up.
+// The right name for the two functions on intrinsics would match Module::,
+// but doing that in a single change would introduce nullptr dereferences
+// where currently there are none. The minimal collateral damage approach
+// would split the change over a release to help downstream branches. As it
+// is unclear what approach will be preferred, implementing the trivial
+// function here in the meantime to decouple from that discussion.
+Function *getPreexistingDeclaration(Module *M, Intrinsic::ID Id,
+ ArrayRef<Type *> Tys = {}) {
+ auto *FT = Intrinsic::getType(M->getContext(), Id, Tys);
+ return M->getFunction(Tys.empty() ? Intrinsic::getName(Id)
+ : Intrinsic::getName(Id, Tys, M, FT));
+}
+
+class ExpandVariadics : public ModulePass {
+
+ // The pass construction sets the default to optimize when called from middle
+ // end and lowering when called from the backend. The command line variable
+ // overrides that. This is useful for testing and debugging. It also allows
+ // building an applications with variadic functions wholly removed if one
+ // has sufficient control over the dependencies, e.g. a statically linked
+ // clang that has no variadic function calls remaining in the binary.
+
+public:
+ static char ID;
+ const ExpandVariadicsMode Mode;
+ std::unique_ptr<VariadicABIInfo> ABI;
+
+ ExpandVariadics(ExpandVariadicsMode Mode)
+ : ModulePass(ID),
+ Mode(commandLineOverride() ? ExpandVariadicsModeOption : Mode) {}
+
+ StringRef getPassName() const override { return "Expand variadic functions"; }
+
+ bool rewriteABI() { return Mode == ExpandVariadicsMode::Lowering; }
+
+ bool runOnModule(Module &M) override;
+
+ bool runOnFunction(Module &M, IRBuilder<> &Builder, Function *F);
+
+ Function *replaceAllUsesWithNewDeclaration(Module &M,
+ Function *OriginalFunction);
+
+ Function *deriveFixedArityReplacement(Module &M, IRBuilder<> &Builder,
+ Function *OriginalFunction);
+
+ Function *defineVariadicWrapper(Module &M, IRBuilder<> &Builder,
+ Function *VariadicWrapper,
+ Function *FixedArityReplacement);
+
+ bool expandCall(Module &M, IRBuilder<> &Builder, CallBase *CB, FunctionType *,
+ Function *NF);
+
+ // The intrinsic functions va_copy and va_end are removed unconditionally.
+ // They correspond to a memcpy and a no-op on all implemented targets.
+ // The va_start intrinsic is removed from basic blocks that were not created
+ // by this pass, some may remain if needed to maintain the external ABI.
+
+ template <Intrinsic::ID ID, typename InstructionType>
+ bool expandIntrinsicUsers(Module &M, IRBuilder<> &Builder,
+ PointerType *IntrinsicArgType) {
+ bool Changed = false;
+ const DataLayout &DL = M.getDataLayout();
+ if (Function *Intrinsic =
+ getPreexistingDeclaration(&M, ID, {IntrinsicArgType})) {
+ for (User *U : make_early_inc_range(Intrinsic->users()))
+ if (auto *I = dyn_cast<InstructionType>(U))
+ Changed |= expandVAIntrinsicCall(Builder, DL, I);
+
+ if (Intrinsic->use_empty())
+ Intrinsic->eraseFromParent();
+ }
+ return Changed;
+ }
+
+ bool expandVAIntrinsicUsersWithAddrspace(Module &M, IRBuilder<> &Builder,
+ unsigned Addrspace) {
+ auto &Ctx = M.getContext();
+ PointerType *IntrinsicArgType = PointerType::get(Ctx, Addrspace);
+ bool Changed = false;
+
+ // expand vastart before vacopy as vastart may introduce a vacopy
+ Changed |= expandIntrinsicUsers<Intrinsic::vastart, VAStartInst>(
+ M, Builder, IntrinsicArgType);
+ Changed |= expandIntrinsicUsers<Intrinsic::vaend, VAEndInst>(
+ M, Builder, IntrinsicArgType);
+ Changed |= expandIntrinsicUsers<Intrinsic::vacopy, VACopyInst>(
+ M, Builder, IntrinsicArgType);
+ return Changed;
+ }
+
+ bool expandVAIntrinsicCall(IRBuilder<> &Builder, const DataLayout &DL,
+ VAStartInst *Inst);
+
+ bool expandVAIntrinsicCall(IRBuilder<> &, const DataLayout &,
+ VAEndInst *Inst);
+
+ bool expandVAIntrinsicCall(IRBuilder<> &Builder, const DataLayout &DL,
+ VACopyInst *Inst);
+
+ FunctionType *inlinableVariadicFunctionType(Module &M, FunctionType *FTy) {
+ // The type of "FTy" with the ... removed and a va_list appended
+ SmallVector<Type *> ArgTypes(FTy->param_begin(), FTy->param_end());
+ ArgTypes.push_back(ABI->vaListParameterType(M));
+ return FunctionType::get(FTy->getReturnType(), ArgTypes,
+ /*IsVarArgs=*/false);
+ }
+
+ static ConstantInt *sizeOfAlloca(LLVMContext &Ctx, const DataLayout &DL,
+ AllocaInst *Alloced) {
+ std::optional<TypeSize> AllocaTypeSize = Alloced->getAllocationSize(DL);
+ uint64_t AsInt = AllocaTypeSize ? AllocaTypeSize->getFixedValue() : 0;
+ return ConstantInt::get(Type::getInt64Ty(Ctx), AsInt);
+ }
+
+ bool expansionApplicableToFunction(Module &M, Function *F) {
+ if (F->isIntrinsic() || !F->isVarArg() ||
+ F->hasFnAttribute(Attribute::Naked))
+ return false;
+
+ if (F->getCallingConv() != CallingConv::C)
+ return false;
+
+ if (rewriteABI())
+ return true;
+
+ if (!F->hasExactDefinition())
+ return false;
+
+ return true;
+ }
+
+ bool expansionApplicableToFunctionCall(CallBase *CB) {
+ if (CallInst *CI = dyn_cast<CallInst>(CB)) {
+ if (CI->isMustTailCall()) {
+ // Cannot expand musttail calls
+ return false;
+ }
+
+ if (CI->getCallingConv() != CallingConv::C)
+ return false;
+
+ return true;
+ }
+
+ if (isa<InvokeInst>(CB)) {
+ // Invoke not implemented in initial implementation of pass
+ return false;
+ }
+
+ // Other unimplemented derivative of CallBase
+ return false;
+ }
+
+ class ExpandedCallFrame {
+ // Helper for constructing an alloca instance containing the arguments bound
+ // to the variadic ... parameter, rearranged to allow indexing through a
+ // va_list iterator
+ enum { N = 4 };
+ SmallVector<Type *, N> FieldTypes;
+ enum Tag { Store, Memcpy, Padding };
+ SmallVector<std::tuple<Value *, uint64_t, Tag>, N> Source;
+
+ template <Tag tag> void append(Type *FieldType, Value *V, uint64_t Bytes) {
+ FieldTypes.push_back(FieldType);
+ Source.push_back({V, Bytes, tag});
+ }
+
+ public:
+ void store(LLVMContext &Ctx, Type *T, Value *V) { append<Store>(T, V, 0); }
+
+ void memcpy(LLVMContext &Ctx, Type *T, Value *V, uint64_t Bytes) {
+ append<Memcpy>(T, V, Bytes);
+ }
+
+ void padding(LLVMContext &Ctx, uint64_t By) {
+ append<Padding>(ArrayType::get(Type::getInt8Ty(Ctx), By), nullptr, 0);
+ }
+
+ size_t size() const { return FieldTypes.size(); }
+ bool empty() const { return FieldTypes.empty(); }
+
+ StructType *asStruct(LLVMContext &Ctx, StringRef Name) {
+ const bool IsPacked = true;
+ return StructType::create(Ctx, FieldTypes,
+ (Twine(Name) + ".vararg").str(), IsPacked);
+ }
+
+ void initializeStructAlloca(const DataLayout &DL, IRBuilder<> &Builder,
+ AllocaInst *Alloced) {
+
+ StructType *VarargsTy = cast<StructType>(Alloced->getAllocatedType());
+
+ for (size_t I = 0; I < size(); I++) {
+
+ auto [V, bytes, tag] = Source[I];
+
+ if (tag == Padding) {
+ assert(V == nullptr);
+ continue;
+ }
+
+ auto Dst = Builder.CreateStructGEP(VarargsTy, Alloced, I);
+
+ assert(V != nullptr);
+
+ if (tag == Store)
+ Builder.CreateStore(V, Dst);
+
+ if (tag == Memcpy)
+ Builder.CreateMemCpy(Dst, {}, V, {}, bytes);
+ }
+ }
+ };
+};
+
+bool ExpandVariadics::runOnModule(Module &M) {
+ bool Changed = false;
+ if (Mode == ExpandVariadicsMode::Disable)
+ return Changed;
+
+ Triple TT(M.getTargetTriple());
+ ABI = VariadicABIInfo::create(TT);
+ if (!ABI)
+ return Changed;
+
+ if (!ABI->enableForTarget())
+ return Changed;
+
+ auto &Ctx = M.getContext();
+ const DataLayout &DL = M.getDataLayout();
+ IRBuilder<> Builder(Ctx);
+
+ // Lowering needs to run on all functions exactly once.
+ // Optimize could run on functions containing va_start exactly once.
+ for (Function &F : make_early_inc_range(M))
+ Changed |= runOnFunction(M, Builder, &F);
+
+ // After runOnFunction, all known calls to known variadic functions have been
+ // replaced. va_start intrinsics are presently (and invalidly!) only present
+ // in functions that used to be variadic and have now been replaced to take a
+ // va_list instead. If lowering as opposed to optimising, calls to unknown
+ // variadic functions have also been replaced.
+
+ {
+ // 0 and AllocaAddrSpace are sufficient for the targets implemented so far
+ unsigned Addrspace = 0;
+ Changed |= expandVAIntrinsicUsersWithAddrspace(M, Builder, Addrspace);
+
+ Addrspace = DL.getAllocaAddrSpace();
+ if (Addrspace != 0)
+ Changed |= expandVAIntrinsicUsersWithAddrspace(M, Builder, Addrspace);
+ }
+
+ if (Mode != ExpandVariadicsMode::Lowering)
+ return Changed;
+
+ for (Function &F : make_early_inc_range(M)) {
+ if (F.isDeclaration())
+ continue;
+
+ // Now need to track down indirect calls. Can't find those
+ // by walking uses of variadic functions, need to crawl the instruction
+ // stream. Fortunately this is only necessary for the ABI rewrite case.
+ for (BasicBlock &BB : F) {
+ for (Instruction &I : make_early_inc_range(BB)) {
+ if (CallBase *CB = dyn_cast<CallBase>(&I)) {
+ if (CB->isIndirectCall()) {
+ FunctionType *FTy = CB->getFunctionType();
+ if (FTy->isVarArg())
+ Changed |= expandCall(M, Builder, CB, FTy, 0);
+ }
+ }
+ }
+ }
+ }
+
+ return Changed;
+}
+
+bool ExpandVariadics::runOnFunction(Module &M, IRBuilder<> &Builder,
+ Function *OriginalFunction) {
+ bool Changed = false;
+
+ if (!expansionApplicableToFunction(M, OriginalFunction))
+ return Changed;
+
+ [[maybe_unused]] const bool OriginalFunctionIsDeclaration =
+ OriginalFunction->isDeclaration();
+ assert(rewriteABI() || !OriginalFunctionIsDeclaration);
+
+ // Declare a new function and redirect every use to that new function
+ Function *VariadicWrapper =
+ replaceAllUsesWithNewDeclaration(M, OriginalFunction);
+ assert(VariadicWrapper->isDeclaration());
+ assert(OriginalFunction->use_empty());
+
+ // Create a new function taking va_list containing the implementation of the
+ // original
+ Function *FixedArityReplacement =
+ deriveFixedArityReplacement(M, Builder, OriginalFunction);
+ assert(OriginalFunction->isDeclaration());
+ assert(FixedArityReplacement->isDeclaration() ==
+ OriginalFunctionIsDeclaration);
+ assert(VariadicWrapper->isDeclaration());
+
+ // Create a single block forwarding wrapper that turns a ... into a va_list
+ [[maybe_unused]] Function *VariadicWrapperDefine =
+ defineVariadicWrapper(M, Builder, VariadicWrapper, FixedArityReplacement);
+ assert(VariadicWrapperDefine == VariadicWrapper);
+ assert(!VariadicWrapper->isDeclaration());
+
+ // We now have:
+ // 1. the original function, now as a declaration with no uses
+ // 2. a variadic function that unconditionally calls a fixed arity replacement
+ // 3. a fixed arity function equivalent to the original function
+
+ // Replace known calls to the variadic with calls to the va_list equivalent
+ for (User *U : make_early_inc_range(VariadicWrapper->users())) {
+ if (CallBase *CB = dyn_cast<CallBase>(U)) {
+ Value *calledOperand = CB->getCalledOperand();
+ if (VariadicWrapper == calledOperand)
+ Changed |=
+ expandCall(M, Builder, CB, VariadicWrapper->getFunctionType(),
+ FixedArityReplacement);
+ }
+ }
+
+ // The original function will be erased.
+ // One of the two new functions will become a replacement for the original.
+ // When preserving the ABI, the other is an internal implementation detail.
+ // When rewriting the ABI, RAUW then the variadic one.
+ Function *const ExternallyAccessible =
+ rewriteABI() ? FixedArityReplacement : VariadicWrapper;
+ Function *const InternalOnly =
+ rewriteABI() ? VariadicWrapper : FixedArityReplacement;
+
+ // The external function is the replacement for the original
+ ExternallyAccessible->setLinkage(OriginalFunction->getLinkage());
+ ExternallyAccessible->setVisibility(OriginalFunction->getVisibility());
+ ExternallyAccessible->setComdat(OriginalFunction->getComdat());
+ ExternallyAccessible->takeName(OriginalFunction);
+
+ // Annotate the internal one as internal
+ InternalOnly->setVisibility(GlobalValue::DefaultVisibility);
+ InternalOnly->setLinkage(GlobalValue::InternalLinkage);
+
+ // The original is unused and obsolete
+ OriginalFunction->eraseFromParent();
+
+ InternalOnly->removeDeadConstantUsers();
+
+ if (rewriteABI()) {
+ // All known calls to the function have been removed by expandCall
+ // Resolve everything else by replaceAllUsesWith
+ VariadicWrapper->replaceAllUsesWith(FixedArityReplacement);
+ VariadicWrapper->eraseFromParent();
+ }
+
+ return Changed;
+}
+
+Function *
+ExpandVariadics::replaceAllUsesWithNewDeclaration(Module &M,
+ Function *OriginalFunction) {
+ auto &Ctx = M.getContext();
+ Function &F = *OriginalFunction;
+ FunctionType *FTy = F.getFunctionType();
+ Function *NF = Function::Create(FTy, F.getLinkage(), F.getAddressSpace());
+
+ NF->setName(F.getName() + ".varargs");
+ NF->IsNewDbgInfoFormat = F.IsNewDbgInfoFormat;
+
+ F.getParent()->getFunctionList().insert(F.getIterator(), NF);
+
+ AttrBuilder ParamAttrs(Ctx);
+ AttributeList Attrs = NF->getAttributes();
+ Attrs = Attrs.addParamAttributes(Ctx, FTy->getNumParams(), ParamAttrs);
+ NF->setAttributes(Attrs);
+
+ OriginalFunction->replaceAllUsesWith(NF);
+ return NF;
+}
+
+Function *
+ExpandVariadics::deriveFixedArityReplacement(Module &M, IRBuilder<> &Builder,
+ Function *OriginalFunction) {
+ Function &F = *OriginalFunction;
+ // The purpose here is split the variadic function F into two functions
+ // One is a variadic function that bundles the passed argument into a va_list
+ // and passes it to the second function. The second function does whatever
+ // the original F does, except that it takes a va_list instead of the ...
+
+ assert(expansionApplicableToFunction(M, &F));
+
+ auto &Ctx = M.getContext();
+
+ // Returned value isDeclaration() is equal to F.isDeclaration()
+ // but that property is not invariant throughout this function
+ const bool FunctionIsDefinition = !F.isDeclaration();
+
+ FunctionType *FTy = F.getFunctionType();
+ SmallVector<Type *> ArgTypes(FTy->param_begin(), FTy->param_end());
+ ArgTypes.push_back(ABI->vaListParameterType(M));
+
+ FunctionType *NFTy = inlinableVariadicFunctionType(M, FTy);
+ Function *NF = Function::Create(NFTy, F.getLinkage(), F.getAddressSpace());
+
+ // Note - same attribute handling as DeadArgumentElimination
+ NF->copyAttributesFrom(&F);
+ NF->setComdat(F.getComdat());
+ F.getParent()->getFunctionList().insert(F.getIterator(), NF);
+ NF->setName(F.getName() + ".valist");
+ NF->IsNewDbgInfoFormat = F.IsNewDbgInfoFormat;
+
+ AttrBuilder ParamAttrs(Ctx);
+
+ AttributeList Attrs = NF->getAttributes();
+ Attrs = Attrs.addParamAttributes(Ctx, NFTy->getNumParams() - 1, ParamAttrs);
+ NF->setAttributes(Attrs);
+
+ // Splice the implementation into the new function with minimal changes
+ if (FunctionIsDefinition) {
+ NF->splice(NF->begin(), &F);
+
+ auto NewArg = NF->arg_begin();
+ for (Argument &Arg : F.args()) {
+ Arg.replaceAllUsesWith(NewArg);
+ NewArg->setName(Arg.getName()); // takeName without killing the old one
+ ++NewArg;
+ }
+ NewArg->setName("varargs");
+ }
+
+ SmallVector<std::pair<unsigned, MDNode *>, 1> MDs;
+ F.getAllMetadata(MDs);
+ for (auto [KindID, Node] : MDs)
+ NF->addMetadata(KindID, *Node);
+ F.clearMetadata();
+
+ return NF;
+}
+
+Function *
+ExpandVariadics::defineVariadicWrapper(Module &M, IRBuilder<> &Builder,
+ Function *VariadicWrapper,
+ Function *FixedArityReplacement) {
+ auto &Ctx = Builder.getContext();
+ const DataLayout &DL = M.getDataLayout();
+ assert(VariadicWrapper->isDeclaration());
+ Function &F = *VariadicWrapper;
+
+ assert(F.isDeclaration());
+ Type *VaListTy = ABI->vaListType(Ctx);
+
+ auto *BB = BasicBlock::Create(Ctx, "entry", &F);
+ Builder.SetInsertPoint(BB);
+
+ AllocaInst *VaListInstance =
+ Builder.CreateAlloca(VaListTy, nullptr, "va_start");
+
+ Builder.CreateLifetimeStart(VaListInstance,
+ sizeOfAlloca(Ctx, DL, VaListInstance));
+
+ Builder.CreateIntrinsic(Intrinsic::vastart, {DL.getAllocaPtrType(Ctx)},
+ {VaListInstance});
+
+ SmallVector<Value *> Args;
+ for (Argument &A : F.args())
+ Args.push_back(&A);
+
+ Type *ParameterType = ABI->vaListParameterType(M);
+ if (ABI->vaListPassedInSSARegister())
+ Args.push_back(Builder.CreateLoad(ParameterType, VaListInstance));
+ else
+ Args.push_back(Builder.CreateAddrSpaceCast(VaListInstance, ParameterType));
+
+ CallInst *Result = Builder.CreateCall(FixedArityReplacement, Args);
+
+ Builder.CreateIntrinsic(Intrinsic::vaend, {DL.getAllocaPtrType(Ctx)},
+ {VaListInstance});
+ Builder.CreateLifetimeEnd(VaListInstance,
+ sizeOfAlloca(Ctx, DL, VaListInstance));
+
+ if (Result->getType()->isVoidTy())
+ Builder.CreateRetVoid();
+ else
+ Builder.CreateRet(Result);
+
+ return VariadicWrapper;
+}
+
+bool ExpandVariadics::expandCall(Module &M, IRBuilder<> &Builder, CallBase *CB,
+ FunctionType *VarargFunctionType,
+ Function *NF) {
+ bool Changed = false;
+ const DataLayout &DL = M.getDataLayout();
+
+ if (!expansionApplicableToFunctionCall(CB)) {
+ if (rewriteABI())
+ report_fatal_error("Cannot lower callbase instruction");
+ return Changed;
+ }
+
+ // This is tricky. The call instruction's function type might not match
+ // the type of the caller. When optimising, can leave it unchanged.
+ // Webassembly detects that inconsistency and repairs it.
+ FunctionType *FuncType = CB->getFunctionType();
+ if (FuncType != VarargFunctionType) {
+ if (!rewriteABI())
+ return Changed;
+ FuncType = VarargFunctionType;
+ }
+
+ auto &Ctx = CB->getContext();
+
+ Align MaxFieldAlign(1);
+
+ // The strategy is to allocate a call frame containing the variadic
+ // arguments laid out such that a target specific va_list can be initialized
+ // with it, such that target specific va_arg instructions will correctly
+ // iterate over it. This means getting the alignment right and sometimes
+ // embedding a pointer to the value instead of embedding the value itself.
+
+ Function *CBF = CB->getParent()->getParent();
+
+ ExpandedCallFrame Frame;
+
+ uint64_t CurrentOffset = 0;
+
+ for (unsigned I = FuncType->getNumParams(), E = CB->arg_size(); I < E; ++I) {
+ Value *ArgVal = CB->getArgOperand(I);
+ const bool IsByVal = CB->paramHasAttr(I, Attribute::ByVal);
+ const bool IsByRef = CB->paramHasAttr(I, Attribute::ByRef);
+
+ // The type of the value being passed, decoded from byval/byref metadata if
+ // required
+ Type *const UnderlyingType = IsByVal ? CB->getParamByValType(I)
+ : IsByRef ? CB->getParamByRefType(I)
+ : ArgVal->getType();
+ const uint64_t UnderlyingSize =
+ DL.getTypeAllocSize(UnderlyingType).getFixedValue();
+
+ // The type to be written into the call frame
+ Type *FrameFieldType = UnderlyingType;
+
+ // The value to copy from when initialising the frame alloca
+ Value *SourceValue = ArgVal;
+
+ VariadicABIInfo::VAArgSlotInfo SlotInfo = ABI->slotInfo(DL, UnderlyingType);
+
+ if (SlotInfo.Indirect) {
+ // The va_arg lowering loads through a pointer. Set up an alloca to aim
+ // that pointer at.
+ Builder.SetInsertPointPastAllocas(CBF);
+ Builder.SetCurrentDebugLocation(CB->getStableDebugLoc());
+ Value *CallerCopy =
+ Builder.CreateAlloca(UnderlyingType, nullptr, "IndirectAlloca");
+
+ Builder.SetInsertPoint(CB);
+ if (IsByVal)
+ Builder.CreateMemCpy(CallerCopy, {}, ArgVal, {}, UnderlyingSize);
+ else
+ Builder.CreateStore(ArgVal, CallerCopy);
+
+ // Indirection now handled, pass the alloca ptr by value
+ FrameFieldType = DL.getAllocaPtrType(Ctx);
+ SourceValue = CallerCopy;
+ }
+
+ // Alignment of the value within the frame
+ // This probably needs to be controllable as a function of type
+ Align DataAlign = SlotInfo.DataAlign;
+
+ MaxFieldAlign = std::max(MaxFieldAlign, DataAlign);
+
+ uint64_t DataAlignV = DataAlign.value();
+ if (uint64_t Rem = CurrentOffset % DataAlignV) {
+ // Inject explicit padding to deal with alignment requirements
+ uint64_t Padding = DataAlignV - Rem;
+ Frame.padding(Ctx, Padding);
+ CurrentOffset += Padding;
+ }
+
+ if (SlotInfo.Indirect) {
+ Frame.store(Ctx, FrameFieldType, SourceValue);
+ } else {
+ if (IsByVal)
+ Frame.memcpy(Ctx, FrameFieldType, SourceValue, UnderlyingSize);
+ else
+ Frame.store(Ctx, FrameFieldType, SourceValue);
+ }
+
+ CurrentOffset += DL.getTypeAllocSize(FrameFieldType).getFixedValue();
+ }
+
+ if (Frame.empty()) {
+ // Not passing any arguments, hopefully va_arg won't try to read any
+ // Creating a single byte frame containing nothing to point the va_list
+ // instance as that is less special-casey in the compiler and probably
+ // easier to interpret in a debugger.
+ Frame.padding(Ctx, 1);
+ }
+
+ StructType *VarargsTy = Frame.asStruct(Ctx, CBF->getName());
+
+ // The struct instance needs to be at least MaxFieldAlign for the alignment of
+ // the fields to be correct at runtime. Use the native stack alignment instead
+ // if that's greater as that tends to give better codegen.
+ // This is an awkward way to guess whether there is a known stack alignment
+ // without hitting an assert in DL.getStackAlignment, 1024 is an arbitrary
+ // number likely to be greater than the natural stack alignment.
+ // TODO: DL.getStackAlignment could return a MaybeAlign instead of assert
+ Align AllocaAlign = MaxFieldAlign;
+ if (DL.exceedsNaturalStackAlignment(Align(1024)))
+ AllocaAlign = std::max(AllocaAlign, DL.getStackAlignment());
+
+ // Put the alloca to hold the variadic args in the entry basic block.
+ Builder.SetInsertPointPastAllocas(CBF);
+
+ // SetCurrentDebugLocation when the builder SetInsertPoint method does not
+ Builder.SetCurrentDebugLocation(CB->getStableDebugLoc());
+
+ // The awkward construction here is to set the alignment on the instance
+ AllocaInst *Alloced = Builder.Insert(
+ new AllocaInst(VarargsTy, DL.getAllocaAddrSpace(), nullptr, AllocaAlign),
+ "vararg_buffer");
+ Changed = true;
+ assert(Alloced->getAllocatedType() == VarargsTy);
+
+ // Initialize the fields in the struct
+ Builder.SetInsertPoint(CB);
+ Builder.CreateLifetimeStart(Alloced, sizeOfAlloca(Ctx, DL, Alloced));
+ Frame.initializeStructAlloca(DL, Builder, Alloced);
+
+ const unsigned NumArgs = FuncType->getNumParams();
+ SmallVector<Value *> Args(CB->arg_begin(), CB->arg_begin() + NumArgs);
+
+ // Initialize a va_list pointing to that struct and pass it as the last
+ // argument
+ AllocaInst *VaList = nullptr;
+ {
+ if (!ABI->vaListPassedInSSARegister()) {
+ Type *VaListTy = ABI->vaListType(Ctx);
+ Builder.SetInsertPointPastAllocas(CBF);
+ Builder.SetCurrentDebugLocation(CB->getStableDebugLoc());
+ VaList = Builder.CreateAlloca(VaListTy, nullptr, "va_argument");
+ Builder.SetInsertPoint(CB);
+ Builder.CreateLifetimeStart(VaList, sizeOfAlloca(Ctx, DL, VaList));
+ }
+ Builder.SetInsertPoint(CB);
+ Args.push_back(ABI->initializeVaList(M, Ctx, Builder, VaList, Alloced));
+ }
+
+ // Attributes excluding any on the vararg arguments
+ AttributeList PAL = CB->getAttributes();
+ if (!PAL.isEmpty()) {
+ SmallVector<AttributeSet, 8> ArgAttrs;
+ for (unsigned ArgNo = 0; ArgNo < NumArgs; ArgNo++)
+ ArgAttrs.push_back(PAL.getParamAttrs(ArgNo));
+ PAL =
+ AttributeList::get(Ctx, PAL.getFnAttrs(), PAL.getRetAttrs(), ArgAttrs);
+ }
+
+ SmallVector<OperandBundleDef, 1> OpBundles;
+ CB->getOperandBundlesAsDefs(OpBundles);
+
+ CallBase *NewCB = nullptr;
+
+ if (CallInst *CI = dyn_cast<CallInst>(CB)) {
+ Value *Dst = NF ? NF : CI->getCalledOperand();
+ FunctionType *NFTy = inlinableVariadicFunctionType(M, VarargFunctionType);
+
+ NewCB = CallInst::Create(NFTy, Dst, Args, OpBundles, "", CI);
+
+ CallInst::TailCallKind TCK = CI->getTailCallKind();
+ assert(TCK != CallInst::TCK_MustTail);
+
+ // Can't tail call a function that is being passed a pointer to an alloca
+ if (TCK == CallInst::TCK_Tail)
+ TCK = CallInst::TCK_None;
+ CI->setTailCallKind(TCK);
+
+ } else {
+ llvm_unreachable("Unreachable when !expansionApplicableToFunctionCall()");
+ }
+
+ if (VaList)
+ Builder.CreateLifetimeEnd(VaList, sizeOfAlloca(Ctx, DL, VaList));
+
+ Builder.CreateLifetimeEnd(Alloced, sizeOfAlloca(Ctx, DL, Alloced));
+
+ NewCB->setAttributes(PAL);
+ NewCB->takeName(CB);
+ NewCB->setCallingConv(CB->getCallingConv());
+ NewCB->setDebugLoc(DebugLoc());
+
+ // DeadArgElim and ArgPromotion copy exactly this metadata
+ NewCB->copyMetadata(*CB, {LLVMContext::MD_prof, LLVMContext::MD_dbg});
+
+ CB->replaceAllUsesWith(NewCB);
+ CB->eraseFromParent();
+ return Changed;
+}
+
+bool ExpandVariadics::expandVAIntrinsicCall(IRBuilder<> &Builder,
+ const DataLayout &DL,
+ VAStartInst *Inst) {
+ // Only removing va_start instructions that are not in variadic functions.
+ // Those would be rejected by the IR verifier before this pass.
+ // After splicing basic blocks from a variadic function into a fixed arity
+ // one the va_start that used to refer to the ... parameter still exist.
+ // There are also variadic functions that this pass did not change and
+ // va_start instances in the created single block wrapper functions.
+ // Replace exactly the instances in non-variadic functions as those are
+ // the ones to be fixed up to use the va_list passed as the final argument.
+
+ Function *ContainingFunction = Inst->getFunction();
+ if (ContainingFunction->isVarArg()) {
+ return false;
+ }
+
+ // The last argument is a vaListParameterType, either a va_list
+ // or a pointer to one depending on the target.
+ bool PassedByValue = ABI->vaListPassedInSSARegister();
+ Argument *PassedVaList =
+ ContainingFunction->getArg(ContainingFunction->arg_size() - 1);
+
+ // va_start takes a pointer to a va_list, e.g. one on the stack
+ Value *VaStartArg = Inst->getArgList();
+
+ Builder.SetInsertPoint(Inst);
+
+ if (PassedByValue) {
+ // The general thing to do is create an alloca, store the va_list argument
+ // to it, then create a va_copy. When vaCopyIsMemcpy(), this optimises to a
+ // store to the VaStartArg.
+ assert(ABI->vaCopyIsMemcpy());
+ Builder.CreateStore(PassedVaList, VaStartArg);
+ } else {
+
+ // Otherwise emit a vacopy to pick up target-specific handling if any
+ auto &Ctx = Builder.getContext();
+
+ Builder.CreateIntrinsic(Intrinsic::vacopy, {DL.getAllocaPtrType(Ctx)},
+ {VaStartArg, PassedVaList});
+ }
+
+ Inst->eraseFromParent();
+ return true;
+}
+
+bool ExpandVariadics::expandVAIntrinsicCall(IRBuilder<> &, const DataLayout &,
+ VAEndInst *Inst) {
+ assert(ABI->vaEndIsNop());
+ Inst->eraseFromParent();
+ return true;
+}
+
+bool ExpandVariadics::expandVAIntrinsicCall(IRBuilder<> &Builder,
+ const DataLayout &DL,
+ VACopyInst *Inst) {
+ assert(ABI->vaCopyIsMemcpy());
+ Builder.SetInsertPoint(Inst);
+
+ auto &Ctx = Builder.getContext();
+ Type *VaListTy = ABI->vaListType(Ctx);
+ uint64_t Size = DL.getTypeAllocSize(VaListTy).getFixedValue();
+
+ Builder.CreateMemCpy(Inst->getDest(), {}, Inst->getSrc(), {},
+ Builder.getInt32(Size));
+
+ Inst->eraseFromParent();
+ return true;
+}
+
+struct Amdgpu final : public VariadicABIInfo {
+
+ bool enableForTarget() override { return true; }
+
+ bool vaListPassedInSSARegister() override { return true; }
+
+ Type *vaListType(LLVMContext &Ctx) override {
+ return PointerType::getUnqual(Ctx);
+ }
+
+ Type *vaListParameterType(Module &M) override {
+ return PointerType::getUnqual(M.getContext());
+ }
+
+ Value *initializeVaList(Module &M, LLVMContext &Ctx, IRBuilder<> &Builder,
+ AllocaInst * /*va_list*/, Value *Buffer) override {
+ // Given Buffer, which is an AllocInst of vararg_buffer
+ // need to return something usable as parameter type
+ return Builder.CreateAddrSpaceCast(Buffer, vaListParameterType(M));
+ }
+
+ VAArgSlotInfo slotInfo(const DataLayout &DL, Type *Parameter) override {
+ return {Align(4), false};
+ }
+};
+
+struct Wasm final : public VariadicABIInfo {
+
+ bool enableForTarget() override {
+ // Currently wasm is only used for testing.
+ return commandLineOverride();
+ }
+
+ bool vaListPassedInSSARegister() override { return true; }
+
+ Type *vaListType(LLVMContext &Ctx) override {
+ return PointerType::getUnqual(Ctx);
+ }
+
+ Type *vaListParameterType(Module &M) override {
+ return PointerType::getUnqual(M.getContext());
+ }
+
+ Value *initializeVaList(Module &M, LLVMContext &Ctx, IRBuilder<> &Builder,
+ AllocaInst * /*va_list*/, Value *Buffer) override {
+ return Buffer;
+ }
+
+ VAArgSlotInfo slotInfo(const DataLayout &DL, Type *Parameter) override {
+ LLVMContext &Ctx = Parameter->getContext();
+ const unsigned MinAlign = 4;
+ Align A = DL.getABITypeAlign(Parameter);
+ if (A < MinAlign)
+ A = Align(MinAlign);
+
+ if (auto s = dyn_cast<StructType>(Parameter)) {
+ if (s->getNumElements() > 1) {
+ return {DL.getABITypeAlign(PointerType::getUnqual(Ctx)), true};
+ }
+ }
+
+ return {A, false};
+ }
+};
+
+std::unique_ptr<VariadicABIInfo> VariadicABIInfo::create(const Triple &T) {
+ switch (T.getArch()) {
+ case Triple::r600:
+ case Triple::amdgcn: {
+ return std::make_unique<Amdgpu>();
+ }
+
+ case Triple::wasm32: {
+ return std::make_unique<Wasm>();
+ }
+
+ default:
+ return {};
+ }
+}
+
+} // namespace
+
+char ExpandVariadics::ID = 0;
+
+INITIALIZE_PASS(ExpandVariadics, DEBUG_TYPE, "Expand variadic functions", false,
+ false)
+
+ModulePass *llvm::createExpandVariadicsPass(ExpandVariadicsMode M) {
+ return new ExpandVariadics(M);
+}
+
+PreservedAnalyses ExpandVariadicsPass::run(Module &M, ModuleAnalysisManager &) {
+ return ExpandVariadics(Mode).runOnModule(M) ? PreservedAnalyses::none()
+ : PreservedAnalyses::all();
+}
+
+ExpandVariadicsPass::ExpandVariadicsPass(ExpandVariadicsMode M) : Mode(M) {}
diff --git a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
index 03923b83cf34..f033d2b0d6d0 100644
--- a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
+++ b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
@@ -262,8 +262,70 @@ public:
// TODO: Should this be a map (from Caller node) for more efficient lookup?
std::vector<std::shared_ptr<ContextEdge>> CallerEdges;
- // The set of IDs for contexts including this node.
- DenseSet<uint32_t> ContextIds;
+ // Get the list of edges from which we can compute allocation information
+ // such as the context ids and allocation type of this node.
+ const std::vector<std::shared_ptr<ContextEdge>> *
+ getEdgesWithAllocInfo() const {
+ // If node has any callees, compute from those, otherwise compute from
+ // callers (i.e. if this is the leaf allocation node).
+ if (!CalleeEdges.empty())
+ return &CalleeEdges;
+ if (!CallerEdges.empty()) {
+ // A node with caller edges but no callee edges must be the allocation
+ // node.
+ assert(IsAllocation);
+ return &CallerEdges;
+ }
+ return nullptr;
+ }
+
+ // Compute the context ids for this node from the union of its edge context
+ // ids.
+ DenseSet<uint32_t> getContextIds() const {
+ DenseSet<uint32_t> ContextIds;
+ auto *Edges = getEdgesWithAllocInfo();
+ if (!Edges)
+ return {};
+ unsigned Count = 0;
+ for (auto &Edge : *Edges)
+ Count += Edge->getContextIds().size();
+ ContextIds.reserve(Count);
+ for (auto &Edge : *Edges)
+ ContextIds.insert(Edge->getContextIds().begin(),
+ Edge->getContextIds().end());
+ return ContextIds;
+ }
+
+ // Compute the allocation type for this node from the OR of its edge
+ // allocation types.
+ uint8_t computeAllocType() const {
+ auto *Edges = getEdgesWithAllocInfo();
+ if (!Edges)
+ return (uint8_t)AllocationType::None;
+ uint8_t BothTypes =
+ (uint8_t)AllocationType::Cold | (uint8_t)AllocationType::NotCold;
+ uint8_t AllocType = (uint8_t)AllocationType::None;
+ for (auto &Edge : *Edges) {
+ AllocType |= Edge->AllocTypes;
+ // Bail early if alloc type reached both, no further refinement.
+ if (AllocType == BothTypes)
+ return AllocType;
+ }
+ return AllocType;
+ }
+
+ // The context ids set for this node is empty if its edge context ids are
+ // also all empty.
+ bool emptyContextIds() const {
+ auto *Edges = getEdgesWithAllocInfo();
+ if (!Edges)
+ return true;
+ for (auto &Edge : *Edges) {
+ if (!Edge->getContextIds().empty())
+ return false;
+ }
+ return true;
+ }
// List of clones of this ContextNode, initially empty.
std::vector<ContextNode *> Clones;
@@ -308,15 +370,11 @@ public:
void printCall(raw_ostream &OS) const { Call.print(OS); }
// True if this node was effectively removed from the graph, in which case
- // its context id set, caller edges, and callee edges should all be empty.
+ // it should have an allocation type of None and empty context ids.
bool isRemoved() const {
- // Note that we can have non-empty context ids with empty caller and
- // callee edges if the graph ends up with a single node.
- if (ContextIds.empty())
- assert(CalleeEdges.empty() && CallerEdges.empty() &&
- "Context ids empty but at least one of callee and caller edges "
- "were not!");
- return ContextIds.empty();
+ assert((AllocTypes == (uint8_t)AllocationType::None) ==
+ emptyContextIds());
+ return AllocTypes == (uint8_t)AllocationType::None;
}
void dump() const;
@@ -429,7 +487,8 @@ private:
/// else to its callers. Also updates OrigNode's edges to remove any context
/// ids moved to the newly created edge.
void connectNewNode(ContextNode *NewNode, ContextNode *OrigNode,
- bool TowardsCallee);
+ bool TowardsCallee,
+ DenseSet<uint32_t> RemainingContextIds);
/// Get the stack id corresponding to the given Id or Index (for IR this will
/// return itself, for a summary index this will return the id recorded in the
@@ -958,7 +1017,6 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::addStackNodesForMIB(
// Update alloc type and context ids for this MIB.
AllocNode->AllocTypes |= (uint8_t)AllocType;
- AllocNode->ContextIds.insert(LastContextId);
// Now add or update nodes for each stack id in alloc's context.
// Later when processing the stack ids on non-alloc callsites we will adjust
@@ -983,7 +1041,6 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::addStackNodesForMIB(
auto Ins = StackIdSet.insert(StackId);
if (!Ins.second)
StackNode->Recursive = true;
- StackNode->ContextIds.insert(LastContextId);
StackNode->AllocTypes |= (uint8_t)AllocType;
PrevNode->addOrUpdateCallerEdge(StackNode, AllocType, LastContextId);
PrevNode = StackNode;
@@ -1034,7 +1091,6 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
// it resulted in any added ids to NextNode.
if (!NewIdsToAdd.empty()) {
Edge->getContextIds().insert(NewIdsToAdd.begin(), NewIdsToAdd.end());
- NextNode->ContextIds.insert(NewIdsToAdd.begin(), NewIdsToAdd.end());
UpdateCallers(NextNode, Visited, UpdateCallers);
}
}
@@ -1043,21 +1099,16 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
DenseSet<const ContextEdge *> Visited;
for (auto &Entry : AllocationCallToContextNodeMap) {
auto *Node = Entry.second;
- // Update ids on the allocation nodes before calling the recursive
- // update along caller edges, since this simplifies the logic during
- // that traversal.
- DenseSet<uint32_t> NewIdsToAdd = GetNewIds(Node->ContextIds);
- Node->ContextIds.insert(NewIdsToAdd.begin(), NewIdsToAdd.end());
UpdateCallers(Node, Visited, UpdateCallers);
}
}
template <typename DerivedCCG, typename FuncTy, typename CallTy>
void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::connectNewNode(
- ContextNode *NewNode, ContextNode *OrigNode, bool TowardsCallee) {
- // Make a copy of the context ids, since this will be adjusted below as they
- // are moved.
- DenseSet<uint32_t> RemainingContextIds = NewNode->ContextIds;
+ ContextNode *NewNode, ContextNode *OrigNode, bool TowardsCallee,
+ // This must be passed by value to make a copy since it will be adjusted
+ // as ids are moved.
+ DenseSet<uint32_t> RemainingContextIds) {
auto &OrigEdges =
TowardsCallee ? OrigNode->CalleeEdges : OrigNode->CallerEdges;
// Increment iterator in loop so that we can remove edges as needed.
@@ -1104,6 +1155,51 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::connectNewNode(
}
template <typename DerivedCCG, typename FuncTy, typename CallTy>
+static void checkEdge(
+ const std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>> &Edge) {
+ // Confirm that alloc type is not None and that we have at least one context
+ // id.
+ assert(Edge->AllocTypes != (uint8_t)AllocationType::None);
+ assert(!Edge->ContextIds.empty());
+}
+
+template <typename DerivedCCG, typename FuncTy, typename CallTy>
+static void checkNode(const ContextNode<DerivedCCG, FuncTy, CallTy> *Node,
+ bool CheckEdges = true) {
+ if (Node->isRemoved())
+ return;
+#ifndef NDEBUG
+ // Compute node's context ids once for use in asserts.
+ auto NodeContextIds = Node->getContextIds();
+#endif
+ // Node's context ids should be the union of both its callee and caller edge
+ // context ids.
+ if (Node->CallerEdges.size()) {
+ DenseSet<uint32_t> CallerEdgeContextIds(
+ Node->CallerEdges.front()->ContextIds);
+ for (const auto &Edge : llvm::drop_begin(Node->CallerEdges)) {
+ if (CheckEdges)
+ checkEdge<DerivedCCG, FuncTy, CallTy>(Edge);
+ set_union(CallerEdgeContextIds, Edge->ContextIds);
+ }
+ // Node can have more context ids than callers if some contexts terminate at
+ // node and some are longer.
+ assert(NodeContextIds == CallerEdgeContextIds ||
+ set_is_subset(CallerEdgeContextIds, NodeContextIds));
+ }
+ if (Node->CalleeEdges.size()) {
+ DenseSet<uint32_t> CalleeEdgeContextIds(
+ Node->CalleeEdges.front()->ContextIds);
+ for (const auto &Edge : llvm::drop_begin(Node->CalleeEdges)) {
+ if (CheckEdges)
+ checkEdge<DerivedCCG, FuncTy, CallTy>(Edge);
+ set_union(CalleeEdgeContextIds, Edge->getContextIds());
+ }
+ assert(NodeContextIds == CalleeEdgeContextIds);
+ }
+}
+
+template <typename DerivedCCG, typename FuncTy, typename CallTy>
void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
assignStackNodesPostOrder(ContextNode *Node,
DenseSet<const ContextNode *> &Visited,
@@ -1178,7 +1274,7 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
// duplicated context ids. We have to recompute as we might have overlap
// overlap between the saved context ids for different last nodes, and
// removed them already during the post order traversal.
- set_intersect(SavedContextIds, FirstNode->ContextIds);
+ set_intersect(SavedContextIds, FirstNode->getContextIds());
ContextNode *PrevNode = nullptr;
for (auto Id : Ids) {
ContextNode *CurNode = getNodeForStackId(Id);
@@ -1211,18 +1307,17 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
ContextNode *NewNode = NodeOwner.back().get();
NodeToCallingFunc[NewNode] = Func;
NonAllocationCallToContextNodeMap[Call] = NewNode;
- NewNode->ContextIds = SavedContextIds;
- NewNode->AllocTypes = computeAllocType(NewNode->ContextIds);
+ NewNode->AllocTypes = computeAllocType(SavedContextIds);
// Connect to callees of innermost stack frame in inlined call chain.
// This updates context ids for FirstNode's callee's to reflect those
// moved to NewNode.
- connectNewNode(NewNode, FirstNode, /*TowardsCallee=*/true);
+ connectNewNode(NewNode, FirstNode, /*TowardsCallee=*/true, SavedContextIds);
// Connect to callers of outermost stack frame in inlined call chain.
// This updates context ids for FirstNode's caller's to reflect those
// moved to NewNode.
- connectNewNode(NewNode, LastNode, /*TowardsCallee=*/false);
+ connectNewNode(NewNode, LastNode, /*TowardsCallee=*/false, SavedContextIds);
// Now we need to remove context ids from edges/nodes between First and
// Last Node.
@@ -1234,18 +1329,32 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
// Remove the context ids moved to NewNode from CurNode, and the
// edge from the prior node.
- set_subtract(CurNode->ContextIds, NewNode->ContextIds);
if (PrevNode) {
auto *PrevEdge = CurNode->findEdgeFromCallee(PrevNode);
assert(PrevEdge);
- set_subtract(PrevEdge->getContextIds(), NewNode->ContextIds);
+ set_subtract(PrevEdge->getContextIds(), SavedContextIds);
if (PrevEdge->getContextIds().empty()) {
PrevNode->eraseCallerEdge(PrevEdge);
CurNode->eraseCalleeEdge(PrevEdge);
}
}
+ // Since we update the edges from leaf to tail, only look at the callee
+ // edges. This isn't an alloc node, so if there are no callee edges, the
+ // alloc type is None.
+ CurNode->AllocTypes = CurNode->CalleeEdges.empty()
+ ? (uint8_t)AllocationType::None
+ : CurNode->computeAllocType();
PrevNode = CurNode;
}
+ if (VerifyNodes) {
+ checkNode<DerivedCCG, FuncTy, CallTy>(NewNode, /*CheckEdges=*/true);
+ for (auto Id : Ids) {
+ ContextNode *CurNode = getNodeForStackId(Id);
+ // We should only have kept stack ids that had nodes.
+ assert(CurNode);
+ checkNode<DerivedCCG, FuncTy, CallTy>(CurNode, /*CheckEdges=*/true);
+ }
+ }
}
}
@@ -1319,7 +1428,7 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::updateStackNodes() {
// Initialize the context ids with the last node's. We will subsequently
// refine the context ids by computing the intersection along all edges.
- DenseSet<uint32_t> LastNodeContextIds = LastNode->ContextIds;
+ DenseSet<uint32_t> LastNodeContextIds = LastNode->getContextIds();
assert(!LastNodeContextIds.empty());
for (unsigned I = 0; I < Calls.size(); I++) {
@@ -1442,6 +1551,8 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::updateStackNodes() {
DenseSet<const ContextNode *> Visited;
for (auto &Entry : AllocationCallToContextNodeMap)
assignStackNodesPostOrder(Entry.second, Visited, StackIdToMatchingCalls);
+ if (VerifyCCG)
+ check();
}
uint64_t ModuleCallsiteContextGraph::getLastStackId(Instruction *Call) {
@@ -1786,8 +1897,6 @@ bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::calleesMatch(
// First check if we have already synthesized a node for this tail call.
if (TailCallToContextNodeMap.count(NewCall)) {
NewNode = TailCallToContextNodeMap[NewCall];
- NewNode->ContextIds.insert(Edge->ContextIds.begin(),
- Edge->ContextIds.end());
NewNode->AllocTypes |= Edge->AllocTypes;
} else {
FuncToCallsWithMetadata[Func].push_back({NewCall});
@@ -1797,7 +1906,6 @@ bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::calleesMatch(
NewNode = NodeOwner.back().get();
NodeToCallingFunc[NewNode] = Func;
TailCallToContextNodeMap[NewCall] = NewNode;
- NewNode->ContextIds = Edge->ContextIds;
NewNode->AllocTypes = Edge->AllocTypes;
}
@@ -2091,6 +2199,8 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::print(
OS << "\n";
OS << "\tAllocTypes: " << getAllocTypeString(AllocTypes) << "\n";
OS << "\tContextIds:";
+ // Make a copy of the computed context ids that we can sort for stability.
+ auto ContextIds = getContextIds();
std::vector<uint32_t> SortedIds(ContextIds.begin(), ContextIds.end());
std::sort(SortedIds.begin(), SortedIds.end());
for (auto Id : SortedIds)
@@ -2151,53 +2261,6 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::print(
}
template <typename DerivedCCG, typename FuncTy, typename CallTy>
-static void checkEdge(
- const std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>> &Edge) {
- // Confirm that alloc type is not None and that we have at least one context
- // id.
- assert(Edge->AllocTypes != (uint8_t)AllocationType::None);
- assert(!Edge->ContextIds.empty());
-}
-
-template <typename DerivedCCG, typename FuncTy, typename CallTy>
-static void checkNode(const ContextNode<DerivedCCG, FuncTy, CallTy> *Node,
- bool CheckEdges = true) {
- if (Node->isRemoved())
- return;
- // Node's context ids should be the union of both its callee and caller edge
- // context ids.
- if (Node->CallerEdges.size()) {
- auto EI = Node->CallerEdges.begin();
- auto &FirstEdge = *EI;
- EI++;
- DenseSet<uint32_t> CallerEdgeContextIds(FirstEdge->ContextIds);
- for (; EI != Node->CallerEdges.end(); EI++) {
- const auto &Edge = *EI;
- if (CheckEdges)
- checkEdge<DerivedCCG, FuncTy, CallTy>(Edge);
- set_union(CallerEdgeContextIds, Edge->ContextIds);
- }
- // Node can have more context ids than callers if some contexts terminate at
- // node and some are longer.
- assert(Node->ContextIds == CallerEdgeContextIds ||
- set_is_subset(CallerEdgeContextIds, Node->ContextIds));
- }
- if (Node->CalleeEdges.size()) {
- auto EI = Node->CalleeEdges.begin();
- auto &FirstEdge = *EI;
- EI++;
- DenseSet<uint32_t> CalleeEdgeContextIds(FirstEdge->ContextIds);
- for (; EI != Node->CalleeEdges.end(); EI++) {
- const auto &Edge = *EI;
- if (CheckEdges)
- checkEdge<DerivedCCG, FuncTy, CallTy>(Edge);
- set_union(CalleeEdgeContextIds, Edge->ContextIds);
- }
- assert(Node->ContextIds == CalleeEdgeContextIds);
- }
-}
-
-template <typename DerivedCCG, typename FuncTy, typename CallTy>
void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::check() const {
using GraphType = const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
for (const auto Node : nodes<GraphType>(this)) {
@@ -2284,7 +2347,7 @@ struct DOTGraphTraits<const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *>
static std::string getNodeAttributes(NodeRef Node, GraphType) {
std::string AttributeString = (Twine("tooltip=\"") + getNodeId(Node) + " " +
- getContextIds(Node->ContextIds) + "\"")
+ getContextIds(Node->getContextIds()) + "\"")
.str();
AttributeString +=
(Twine(",fillcolor=\"") + getColor(Node->AllocTypes) + "\"").str();
@@ -2443,16 +2506,6 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
set_subtract(Edge->ContextIds, ContextIdsToMove);
Edge->AllocTypes = computeAllocType(Edge->ContextIds);
}
- // Now perform some updates that are common to all cases: the NewCallee gets
- // the moved ids added, and we need to remove those ids from OldCallee and
- // update its alloc type (NewCallee alloc type updates handled above).
- NewCallee->ContextIds.insert(ContextIdsToMove.begin(),
- ContextIdsToMove.end());
- set_subtract(OldCallee->ContextIds, ContextIdsToMove);
- OldCallee->AllocTypes = computeAllocType(OldCallee->ContextIds);
- // OldCallee alloc type should be None iff its context id set is now empty.
- assert((OldCallee->AllocTypes == (uint8_t)AllocationType::None) ==
- OldCallee->ContextIds.empty());
// Now walk the old callee node's callee edges and move Edge's context ids
// over to the corresponding edge into the clone (which is created here if
// this is a newly created clone).
@@ -2484,6 +2537,12 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
NewCallee->CalleeEdges.push_back(NewEdge);
NewEdge->Callee->CallerEdges.push_back(NewEdge);
}
+ // Recompute the node alloc type now that its callee edges have been
+ // updated (since we will compute from those edges).
+ OldCallee->AllocTypes = OldCallee->computeAllocType();
+ // OldCallee alloc type should be None iff its context id set is now empty.
+ assert((OldCallee->AllocTypes == (uint8_t)AllocationType::None) ==
+ OldCallee->emptyContextIds());
if (VerifyCCG) {
checkNode<DerivedCCG, FuncTy, CallTy>(OldCallee, /*CheckEdges=*/false);
checkNode<DerivedCCG, FuncTy, CallTy>(NewCallee, /*CheckEdges=*/false);
@@ -2528,7 +2587,7 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::identifyClones() {
DenseSet<const ContextNode *> Visited;
for (auto &Entry : AllocationCallToContextNodeMap) {
Visited.clear();
- identifyClones(Entry.second, Visited, Entry.second->ContextIds);
+ identifyClones(Entry.second, Visited, Entry.second->getContextIds());
}
Visited.clear();
for (auto &Entry : AllocationCallToContextNodeMap)
@@ -2714,7 +2773,7 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::identifyClones(
}
// We should still have some context ids on the original Node.
- assert(!Node->ContextIds.empty());
+ assert(!Node->emptyContextIds());
// Sanity check that no alloc types on node or edges are None.
assert(Node->AllocTypes != (uint8_t)AllocationType::None);
@@ -2918,7 +2977,7 @@ bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::assignFunctions() {
// find additional cloning is required.
std::deque<ContextNode *> ClonesWorklist;
// Ignore original Node if we moved all of its contexts to clones.
- if (!Node->ContextIds.empty())
+ if (!Node->emptyContextIds())
ClonesWorklist.push_back(Node);
ClonesWorklist.insert(ClonesWorklist.end(), Node->Clones.begin(),
Node->Clones.end());
@@ -3258,7 +3317,7 @@ bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::assignFunctions() {
// Skip if either no call to update, or if we ended up with no context ids
// (we moved all edges onto other clones).
- if (!Node->hasCall() || Node->ContextIds.empty())
+ if (!Node->hasCall() || Node->emptyContextIds())
return;
if (Node->IsAllocation) {
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index 89193f8ff94b..38c1c2644554 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -4745,6 +4745,29 @@ static Instruction *foldICmpAndXX(ICmpInst &I, const SimplifyQuery &Q,
Constant::getNullValue(Op1->getType()));
}
+ if (!ICmpInst::isSigned(Pred))
+ return nullptr;
+
+ KnownBits KnownY = IC.computeKnownBits(A, /*Depth=*/0, &I);
+ // (X & NegY) spred X --> (X & NegY) upred X
+ if (KnownY.isNegative())
+ return new ICmpInst(ICmpInst::getUnsignedPredicate(Pred), Op0, Op1);
+
+ if (Pred != ICmpInst::ICMP_SLE && Pred != ICmpInst::ICMP_SGT)
+ return nullptr;
+
+ if (KnownY.isNonNegative())
+ // (X & PosY) s<= X --> X s>= 0
+ // (X & PosY) s> X --> X s< 0
+ return new ICmpInst(ICmpInst::getSwappedPredicate(Pred), Op1,
+ Constant::getNullValue(Op1->getType()));
+
+ if (isKnownNegative(Op1, IC.getSimplifyQuery().getWithInstruction(&I)))
+ // (NegX & Y) s<= NegX --> Y s< 0
+ // (NegX & Y) s> NegX --> Y s>= 0
+ return new ICmpInst(ICmpInst::getFlippedStrictnessPredicate(Pred), A,
+ Constant::getNullValue(A->getType()));
+
return nullptr;
}
@@ -4772,7 +4795,7 @@ static Instruction *foldICmpOrXX(ICmpInst &I, const SimplifyQuery &Q,
if (ICmpInst::isEquality(Pred) && Op0->hasOneUse()) {
// icmp (X | Y) eq/ne Y --> (X & ~Y) eq/ne 0 if Y is freely invertible
if (Value *NotOp1 =
- IC.getFreelyInverted(Op1, Op1->hasOneUse(), &IC.Builder))
+ IC.getFreelyInverted(Op1, !Op1->hasNUsesOrMore(3), &IC.Builder))
return new ICmpInst(Pred, IC.Builder.CreateAnd(A, NotOp1),
Constant::getNullValue(Op1->getType()));
// icmp (X | Y) eq/ne Y --> (~X | Y) eq/ne -1 if X is freely invertible.
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
index 313beb7b6407..b04e0b300f95 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -1288,22 +1288,36 @@ Instruction *InstCombinerImpl::foldSelectValueEquivalence(SelectInst &Sel,
Swapped = true;
}
- // In X == Y ? f(X) : Z, try to evaluate f(Y) and replace the operand.
- // Make sure Y cannot be undef though, as we might pick different values for
- // undef in the icmp and in f(Y). Additionally, take care to avoid replacing
- // X == Y ? X : Z with X == Y ? Y : Z, as that would lead to an infinite
- // replacement cycle.
Value *CmpLHS = Cmp.getOperand(0), *CmpRHS = Cmp.getOperand(1);
- if (TrueVal != CmpLHS &&
- isGuaranteedNotToBeUndefOrPoison(CmpRHS, SQ.AC, &Sel, &DT)) {
- if (Value *V = simplifyWithOpReplaced(TrueVal, CmpLHS, CmpRHS, SQ,
- /* AllowRefinement */ true))
- // Require either the replacement or the simplification result to be a
- // constant to avoid infinite loops.
- // FIXME: Make this check more precise.
- if (isa<Constant>(CmpRHS) || isa<Constant>(V))
+ auto ReplaceOldOpWithNewOp = [&](Value *OldOp,
+ Value *NewOp) -> Instruction * {
+ // In X == Y ? f(X) : Z, try to evaluate f(Y) and replace the operand.
+ // Take care to avoid replacing X == Y ? X : Z with X == Y ? Y : Z, as that
+ // would lead to an infinite replacement cycle.
+ // If we will be able to evaluate f(Y) to a constant, we can allow undef,
+ // otherwise Y cannot be undef as we might pick different values for undef
+ // in the icmp and in f(Y).
+ if (TrueVal == OldOp)
+ return nullptr;
+
+ if (Value *V = simplifyWithOpReplaced(TrueVal, OldOp, NewOp, SQ,
+ /* AllowRefinement=*/true)) {
+ // Need some guarantees about the new simplified op to ensure we don't inf
+ // loop.
+ // If we simplify to a constant, replace if we aren't creating new undef.
+ if (match(V, m_ImmConstant()) &&
+ isGuaranteedNotToBeUndef(V, SQ.AC, &Sel, &DT))
return replaceOperand(Sel, Swapped ? 2 : 1, V);
+ // If NewOp is a constant and OldOp is not replace iff NewOp doesn't
+ // contain and undef elements.
+ if (match(NewOp, m_ImmConstant())) {
+ if (isGuaranteedNotToBeUndef(NewOp, SQ.AC, &Sel, &DT))
+ return replaceOperand(Sel, Swapped ? 2 : 1, V);
+ return nullptr;
+ }
+ }
+
// Even if TrueVal does not simplify, we can directly replace a use of
// CmpLHS with CmpRHS, as long as the instruction is not used anywhere
// else and is safe to speculatively execute (we may end up executing it
@@ -1311,17 +1325,18 @@ Instruction *InstCombinerImpl::foldSelectValueEquivalence(SelectInst &Sel,
// undefined behavior). Only do this if CmpRHS is a constant, as
// profitability is not clear for other cases.
// FIXME: Support vectors.
- if (match(CmpRHS, m_ImmConstant()) && !match(CmpLHS, m_ImmConstant()) &&
- !Cmp.getType()->isVectorTy())
- if (replaceInInstruction(TrueVal, CmpLHS, CmpRHS))
+ if (OldOp == CmpLHS && match(NewOp, m_ImmConstant()) &&
+ !match(OldOp, m_ImmConstant()) && !Cmp.getType()->isVectorTy() &&
+ isGuaranteedNotToBeUndef(NewOp, SQ.AC, &Sel, &DT))
+ if (replaceInInstruction(TrueVal, OldOp, NewOp))
return &Sel;
- }
- if (TrueVal != CmpRHS &&
- isGuaranteedNotToBeUndefOrPoison(CmpLHS, SQ.AC, &Sel, &DT))
- if (Value *V = simplifyWithOpReplaced(TrueVal, CmpRHS, CmpLHS, SQ,
- /* AllowRefinement */ true))
- if (isa<Constant>(CmpLHS) || isa<Constant>(V))
- return replaceOperand(Sel, Swapped ? 2 : 1, V);
+ return nullptr;
+ };
+
+ if (Instruction *R = ReplaceOldOpWithNewOp(CmpLHS, CmpRHS))
+ return R;
+ if (Instruction *R = ReplaceOldOpWithNewOp(CmpRHS, CmpLHS))
+ return R;
auto *FalseInst = dyn_cast<Instruction>(FalseVal);
if (!FalseInst)
diff --git a/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp
index 2aa21759d56e..a0e63bf12400 100644
--- a/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp
@@ -337,13 +337,17 @@ private:
unsigned AccessSizeIndex,
Instruction *InsertBefore, DomTreeUpdater &DTU,
LoopInfo *LI);
- bool ignoreMemIntrinsic(MemIntrinsic *MI);
+ bool ignoreMemIntrinsic(OptimizationRemarkEmitter &ORE, MemIntrinsic *MI);
void instrumentMemIntrinsic(MemIntrinsic *MI);
bool instrumentMemAccess(InterestingMemoryOperand &O, DomTreeUpdater &DTU,
LoopInfo *LI);
- bool ignoreAccess(Instruction *Inst, Value *Ptr);
+ bool ignoreAccessWithoutRemark(Instruction *Inst, Value *Ptr);
+ bool ignoreAccess(OptimizationRemarkEmitter &ORE, Instruction *Inst,
+ Value *Ptr);
+
void getInterestingMemoryOperands(
- Instruction *I, const TargetLibraryInfo &TLI,
+ OptimizationRemarkEmitter &ORE, Instruction *I,
+ const TargetLibraryInfo &TLI,
SmallVectorImpl<InterestingMemoryOperand> &Interesting);
void tagAlloca(IRBuilder<> &IRB, AllocaInst *AI, Value *Tag, size_t Size);
@@ -765,7 +769,8 @@ Value *HWAddressSanitizer::getShadowNonTls(IRBuilder<> &IRB) {
return IRB.CreateLoad(PtrTy, GlobalDynamicAddress);
}
-bool HWAddressSanitizer::ignoreAccess(Instruction *Inst, Value *Ptr) {
+bool HWAddressSanitizer::ignoreAccessWithoutRemark(Instruction *Inst,
+ Value *Ptr) {
// Do not instrument accesses from different address spaces; we cannot deal
// with them.
Type *PtrTy = cast<PointerType>(Ptr->getType()->getScalarType());
@@ -795,8 +800,23 @@ bool HWAddressSanitizer::ignoreAccess(Instruction *Inst, Value *Ptr) {
return false;
}
+bool HWAddressSanitizer::ignoreAccess(OptimizationRemarkEmitter &ORE,
+ Instruction *Inst, Value *Ptr) {
+ bool Ignored = ignoreAccessWithoutRemark(Inst, Ptr);
+ if (Ignored) {
+ ORE.emit(
+ [&]() { return OptimizationRemark(DEBUG_TYPE, "ignoreAccess", Inst); });
+ } else {
+ ORE.emit([&]() {
+ return OptimizationRemarkMissed(DEBUG_TYPE, "ignoreAccess", Inst);
+ });
+ }
+ return Ignored;
+}
+
void HWAddressSanitizer::getInterestingMemoryOperands(
- Instruction *I, const TargetLibraryInfo &TLI,
+ OptimizationRemarkEmitter &ORE, Instruction *I,
+ const TargetLibraryInfo &TLI,
SmallVectorImpl<InterestingMemoryOperand> &Interesting) {
// Skip memory accesses inserted by another instrumentation.
if (I->hasMetadata(LLVMContext::MD_nosanitize))
@@ -807,22 +827,22 @@ void HWAddressSanitizer::getInterestingMemoryOperands(
return;
if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
- if (!ClInstrumentReads || ignoreAccess(I, LI->getPointerOperand()))
+ if (!ClInstrumentReads || ignoreAccess(ORE, I, LI->getPointerOperand()))
return;
Interesting.emplace_back(I, LI->getPointerOperandIndex(), false,
LI->getType(), LI->getAlign());
} else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
- if (!ClInstrumentWrites || ignoreAccess(I, SI->getPointerOperand()))
+ if (!ClInstrumentWrites || ignoreAccess(ORE, I, SI->getPointerOperand()))
return;
Interesting.emplace_back(I, SI->getPointerOperandIndex(), true,
SI->getValueOperand()->getType(), SI->getAlign());
} else if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(I)) {
- if (!ClInstrumentAtomics || ignoreAccess(I, RMW->getPointerOperand()))
+ if (!ClInstrumentAtomics || ignoreAccess(ORE, I, RMW->getPointerOperand()))
return;
Interesting.emplace_back(I, RMW->getPointerOperandIndex(), true,
RMW->getValOperand()->getType(), std::nullopt);
} else if (AtomicCmpXchgInst *XCHG = dyn_cast<AtomicCmpXchgInst>(I)) {
- if (!ClInstrumentAtomics || ignoreAccess(I, XCHG->getPointerOperand()))
+ if (!ClInstrumentAtomics || ignoreAccess(ORE, I, XCHG->getPointerOperand()))
return;
Interesting.emplace_back(I, XCHG->getPointerOperandIndex(), true,
XCHG->getCompareOperand()->getType(),
@@ -830,7 +850,7 @@ void HWAddressSanitizer::getInterestingMemoryOperands(
} else if (auto *CI = dyn_cast<CallInst>(I)) {
for (unsigned ArgNo = 0; ArgNo < CI->arg_size(); ArgNo++) {
if (!ClInstrumentByval || !CI->isByValArgument(ArgNo) ||
- ignoreAccess(I, CI->getArgOperand(ArgNo)))
+ ignoreAccess(ORE, I, CI->getArgOperand(ArgNo)))
continue;
Type *Ty = CI->getParamByValType(ArgNo);
Interesting.emplace_back(I, ArgNo, false, Ty, Align(1));
@@ -1035,13 +1055,14 @@ void HWAddressSanitizer::instrumentMemAccessInline(Value *Ptr, bool IsWrite,
->setSuccessor(0, TCI.TagMismatchTerm->getParent());
}
-bool HWAddressSanitizer::ignoreMemIntrinsic(MemIntrinsic *MI) {
+bool HWAddressSanitizer::ignoreMemIntrinsic(OptimizationRemarkEmitter &ORE,
+ MemIntrinsic *MI) {
if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI)) {
- return (!ClInstrumentWrites || ignoreAccess(MTI, MTI->getDest())) &&
- (!ClInstrumentReads || ignoreAccess(MTI, MTI->getSource()));
+ return (!ClInstrumentWrites || ignoreAccess(ORE, MTI, MTI->getDest())) &&
+ (!ClInstrumentReads || ignoreAccess(ORE, MTI, MTI->getSource()));
}
if (isa<MemSetInst>(MI))
- return !ClInstrumentWrites || ignoreAccess(MI, MI->getDest());
+ return !ClInstrumentWrites || ignoreAccess(ORE, MI, MI->getDest());
return false;
}
@@ -1541,6 +1562,9 @@ void HWAddressSanitizer::sanitizeFunction(Function &F,
NumTotalFuncs++;
+ OptimizationRemarkEmitter &ORE =
+ FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
+
if (selectiveInstrumentationShouldSkip(F, FAM))
return;
@@ -1562,10 +1586,10 @@ void HWAddressSanitizer::sanitizeFunction(Function &F,
if (InstrumentLandingPads && isa<LandingPadInst>(Inst))
LandingPadVec.push_back(&Inst);
- getInterestingMemoryOperands(&Inst, TLI, OperandsToInstrument);
+ getInterestingMemoryOperands(ORE, &Inst, TLI, OperandsToInstrument);
if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(&Inst))
- if (!ignoreMemIntrinsic(MI))
+ if (!ignoreMemIntrinsic(ORE, MI))
IntrinToInstrument.push_back(MI);
}
diff --git a/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
index c0a3bf8464d2..d70c6a7a0a15 100644
--- a/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
@@ -759,7 +759,7 @@ static void readMemprof(Module &M, Function &F,
std::map<uint64_t, std::set<const AllocationInfo *>> LocHashToAllocInfo;
// For the callsites we need to record the index of the associated frame in
// the frame array (see comments below where the map entries are added).
- std::map<uint64_t, std::set<std::pair<const SmallVector<Frame> *, unsigned>>>
+ std::map<uint64_t, std::set<std::pair<const std::vector<Frame> *, unsigned>>>
LocHashToCallSites;
for (auto &AI : MemProfRec->AllocSites) {
// Associate the allocation info with the leaf frame. The later matching
@@ -815,7 +815,7 @@ static void readMemprof(Module &M, Function &F,
// and another callsite).
std::map<uint64_t, std::set<const AllocationInfo *>>::iterator
AllocInfoIter;
- std::map<uint64_t, std::set<std::pair<const SmallVector<Frame> *,
+ std::map<uint64_t, std::set<std::pair<const std::vector<Frame> *,
unsigned>>>::iterator CallSitesIter;
for (const DILocation *DIL = I.getDebugLoc(); DIL != nullptr;
DIL = DIL->getInlinedAt()) {
diff --git a/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp b/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp
index ba2546b8db0e..4371b821eae6 100644
--- a/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp
+++ b/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp
@@ -827,7 +827,8 @@ private:
return false;
}
- if (Metrics.convergent) {
+ // FIXME: Allow jump threading with controlled convergence.
+ if (Metrics.Convergence != ConvergenceKind::None) {
LLVM_DEBUG(dbgs() << "DFA Jump Threading: Not jump threading, contains "
<< "convergent instructions.\n");
ORE->emit([&]() {
diff --git a/llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp b/llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp
index 7b4c54370e48..f8e2f1f28088 100644
--- a/llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp
@@ -327,8 +327,7 @@ tryToUnrollAndJamLoop(Loop *L, DominatorTree &DT, LoopInfo *LI,
UnrollCostEstimator OuterUCE(L, TTI, EphValues, UP.BEInsns);
if (!InnerUCE.canUnroll() || !OuterUCE.canUnroll()) {
- LLVM_DEBUG(dbgs() << " Not unrolling loop which contains instructions"
- << " which cannot be duplicated or have invalid cost.\n");
+ LLVM_DEBUG(dbgs() << " Loop not considered unrollable\n");
return LoopUnrollResult::Unmodified;
}
@@ -341,7 +340,10 @@ tryToUnrollAndJamLoop(Loop *L, DominatorTree &DT, LoopInfo *LI,
LLVM_DEBUG(dbgs() << " Not unrolling loop with inlinable calls.\n");
return LoopUnrollResult::Unmodified;
}
- if (InnerUCE.Convergent || OuterUCE.Convergent) {
+ // FIXME: The call to canUnroll() allows some controlled convergent
+ // operations, but we block them here for future changes.
+ if (InnerUCE.Convergence != ConvergenceKind::None ||
+ OuterUCE.Convergence != ConvergenceKind::None) {
LLVM_DEBUG(
dbgs() << " Not unrolling loop with convergent instructions.\n");
return LoopUnrollResult::Unmodified;
diff --git a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
index 10fc9e9303e8..cbc35b6dd429 100644
--- a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -684,11 +684,15 @@ UnrollCostEstimator::UnrollCostEstimator(
const SmallPtrSetImpl<const Value *> &EphValues, unsigned BEInsns) {
CodeMetrics Metrics;
for (BasicBlock *BB : L->blocks())
- Metrics.analyzeBasicBlock(BB, TTI, EphValues);
+ Metrics.analyzeBasicBlock(BB, TTI, EphValues, /* PrepareForLTO= */ false,
+ L);
NumInlineCandidates = Metrics.NumInlineCandidates;
NotDuplicatable = Metrics.notDuplicatable;
- Convergent = Metrics.convergent;
+ Convergence = Metrics.Convergence;
LoopSize = Metrics.NumInsts;
+ ConvergenceAllowsRuntime =
+ Metrics.Convergence != ConvergenceKind::Uncontrolled &&
+ !getLoopConvergenceHeart(L);
// Don't allow an estimate of size zero. This would allows unrolling of loops
// with huge iteration counts, which is a compile time problem even if it's
@@ -701,6 +705,25 @@ UnrollCostEstimator::UnrollCostEstimator(
LoopSize = BEInsns + 1;
}
+bool UnrollCostEstimator::canUnroll() const {
+ switch (Convergence) {
+ case ConvergenceKind::ExtendedLoop:
+ LLVM_DEBUG(dbgs() << " Convergence prevents unrolling.\n");
+ return false;
+ default:
+ break;
+ }
+ if (!LoopSize.isValid()) {
+ LLVM_DEBUG(dbgs() << " Invalid loop size prevents unrolling.\n");
+ return false;
+ }
+ if (NotDuplicatable) {
+ LLVM_DEBUG(dbgs() << " Non-duplicatable blocks prevent unrolling.\n");
+ return false;
+ }
+ return true;
+}
+
uint64_t UnrollCostEstimator::getUnrolledLoopSize(
const TargetTransformInfo::UnrollingPreferences &UP,
unsigned CountOverwrite) const {
@@ -1206,8 +1229,7 @@ tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE,
UnrollCostEstimator UCE(L, TTI, EphValues, UP.BEInsns);
if (!UCE.canUnroll()) {
- LLVM_DEBUG(dbgs() << " Not unrolling loop which contains instructions"
- << " which cannot be duplicated or have invalid cost.\n");
+ LLVM_DEBUG(dbgs() << " Loop not considered unrollable.\n");
return LoopUnrollResult::Unmodified;
}
@@ -1254,15 +1276,9 @@ tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE,
// is unsafe -- it adds a control-flow dependency to the convergent
// operation. Therefore restrict remainder loop (try unrolling without).
//
- // TODO: This is quite conservative. In practice, convergent_op()
- // is likely to be called unconditionally in the loop. In this
- // case, the program would be ill-formed (on most architectures)
- // unless n were the same on all threads in a thread group.
- // Assuming n is the same on all threads, any kind of unrolling is
- // safe. But currently llvm's notion of convergence isn't powerful
- // enough to express this.
- if (UCE.Convergent)
- UP.AllowRemainder = false;
+ // TODO: This is somewhat conservative; we could allow the remainder if the
+ // trip count is uniform.
+ UP.AllowRemainder &= UCE.ConvergenceAllowsRuntime;
// Try to find the trip count upper bound if we cannot find the exact trip
// count.
@@ -1282,6 +1298,8 @@ tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE,
if (!UP.Count)
return LoopUnrollResult::Unmodified;
+ UP.Runtime &= UCE.ConvergenceAllowsRuntime;
+
if (PP.PeelCount) {
assert(UP.Count == 1 && "Cannot perform peel and unroll in the same step");
LLVM_DEBUG(dbgs() << "PEELING loop %" << L->getHeader()->getName()
@@ -1324,11 +1342,16 @@ tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE,
// Unroll the loop.
Loop *RemainderLoop = nullptr;
+ UnrollLoopOptions ULO;
+ ULO.Count = UP.Count;
+ ULO.Force = UP.Force;
+ ULO.AllowExpensiveTripCount = UP.AllowExpensiveTripCount;
+ ULO.UnrollRemainder = UP.UnrollRemainder;
+ ULO.Runtime = UP.Runtime;
+ ULO.ForgetAllSCEV = ForgetAllSCEV;
+ ULO.Heart = getLoopConvergenceHeart(L);
LoopUnrollResult UnrollResult = UnrollLoop(
- L,
- {UP.Count, UP.Force, UP.Runtime, UP.AllowExpensiveTripCount,
- UP.UnrollRemainder, ForgetAllSCEV},
- LI, &SE, &DT, &AC, &TTI, &ORE, PreserveLCSSA, &RemainderLoop, AA);
+ L, ULO, LI, &SE, &DT, &AC, &TTI, &ORE, PreserveLCSSA, &RemainderLoop, AA);
if (UnrollResult == LoopUnrollResult::Unmodified)
return LoopUnrollResult::Unmodified;
diff --git a/llvm/lib/Transforms/Utils/InlineFunction.cpp b/llvm/lib/Transforms/Utils/InlineFunction.cpp
index eb471b259c7d..cfe63496a100 100644
--- a/llvm/lib/Transforms/Utils/InlineFunction.cpp
+++ b/llvm/lib/Transforms/Utils/InlineFunction.cpp
@@ -1221,7 +1221,6 @@ static void AddAliasScopeMetadata(CallBase &CB, ValueToValueMapTy &VMap,
SmallPtrSet<const Value *, 4> ObjSet;
SmallVector<Metadata *, 4> Scopes, NoAliases;
- SmallSetVector<const Argument *, 4> NAPtrArgs;
for (const Value *V : PtrArgs) {
SmallVector<const Value *, 4> Objects;
getUnderlyingObjects(V, Objects, /* LI = */ nullptr);
diff --git a/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp b/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp
index 08ba65d9483e..3d950b151cd3 100644
--- a/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp
+++ b/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp
@@ -460,7 +460,7 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
L->dump());
return Rotated;
}
- if (Metrics.convergent) {
+ if (Metrics.Convergence != ConvergenceKind::None) {
LLVM_DEBUG(dbgs() << "LoopRotation: NOT rotating - contains convergent "
"instructions: ";
L->dump());
diff --git a/llvm/lib/Transforms/Utils/LoopUnroll.cpp b/llvm/lib/Transforms/Utils/LoopUnroll.cpp
index 1216538195fb..90d7b99e9d81 100644
--- a/llvm/lib/Transforms/Utils/LoopUnroll.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUnroll.cpp
@@ -419,6 +419,26 @@ void llvm::simplifyLoopAfterUnroll(Loop *L, bool SimplifyIVs, LoopInfo *LI,
}
}
+// Loops containing convergent instructions that are uncontrolled or controlled
+// from outside the loop must have a count that divides their TripMultiple.
+LLVM_ATTRIBUTE_USED
+static bool canHaveUnrollRemainder(const Loop *L) {
+ if (getLoopConvergenceHeart(L))
+ return false;
+
+ // Check for uncontrolled convergent operations.
+ for (auto &BB : L->blocks()) {
+ for (auto &I : *BB) {
+ if (isa<ConvergenceControlInst>(I))
+ return true;
+ if (auto *CB = dyn_cast<CallBase>(&I))
+ if (CB->isConvergent())
+ return CB->getConvergenceControlToken();
+ }
+ }
+ return true;
+}
+
/// Unroll the given loop by Count. The loop must be in LCSSA form. Unrolling
/// can only fail when the loop's latch block is not terminated by a conditional
/// branch instruction. However, if the trip count (and multiple) are not known,
@@ -564,19 +584,8 @@ llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
return LoopUnrollResult::Unmodified;
}
- // Loops containing convergent instructions cannot use runtime unrolling,
- // as the prologue/epilogue may add additional control-dependencies to
- // convergent operations.
- LLVM_DEBUG(
- {
- bool HasConvergent = false;
- for (auto &BB : L->blocks())
- for (auto &I : *BB)
- if (auto *CB = dyn_cast<CallBase>(&I))
- HasConvergent |= CB->isConvergent();
- assert((!HasConvergent || !ULO.Runtime) &&
- "Can't runtime unroll if loop contains a convergent operation.");
- });
+ assert((!ULO.Runtime || canHaveUnrollRemainder(L)) &&
+ "Can't runtime unroll if loop contains a convergent operation.");
bool EpilogProfitability =
UnrollRuntimeEpilog.getNumOccurrences() ? UnrollRuntimeEpilog
@@ -722,7 +731,7 @@ llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
if (OldLoop)
LoopsToSimplify.insert(NewLoops[OldLoop]);
- if (*BB == Header)
+ if (*BB == Header) {
// Loop over all of the PHI nodes in the block, changing them to use
// the incoming values from the previous block.
for (PHINode *OrigPHI : OrigPHINode) {
@@ -735,6 +744,16 @@ llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
NewPHI->eraseFromParent();
}
+ // Eliminate copies of the loop heart intrinsic, if any.
+ if (ULO.Heart) {
+ auto it = VMap.find(ULO.Heart);
+ assert(it != VMap.end());
+ Instruction *heartCopy = cast<Instruction>(it->second);
+ heartCopy->eraseFromParent();
+ VMap.erase(it);
+ }
+ }
+
// Update our running map of newest clones
LastValueMap[*BB] = New;
for (ValueToValueMapTy::iterator VI = VMap.begin(), VE = VMap.end();
diff --git a/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
index e1af02829c1d..dd7150bc63ec 100644
--- a/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
@@ -1016,12 +1016,17 @@ bool llvm::UnrollRuntimeLoopRemainder(
auto UnrollResult = LoopUnrollResult::Unmodified;
if (remainderLoop && UnrollRemainder) {
LLVM_DEBUG(dbgs() << "Unrolling remainder loop\n");
- UnrollResult =
- UnrollLoop(remainderLoop,
- {/*Count*/ Count - 1, /*Force*/ false, /*Runtime*/ false,
- /*AllowExpensiveTripCount*/ false,
- /*UnrollRemainder*/ false, ForgetAllSCEV},
- LI, SE, DT, AC, TTI, /*ORE*/ nullptr, PreserveLCSSA);
+ UnrollLoopOptions ULO;
+ ULO.Count = Count - 1;
+ ULO.Force = false;
+ ULO.Runtime = false;
+ ULO.AllowExpensiveTripCount = false;
+ ULO.UnrollRemainder = false;
+ ULO.ForgetAllSCEV = ForgetAllSCEV;
+ assert(!getLoopConvergenceHeart(L) &&
+ "A loop with a convergence heart does not allow runtime unrolling.");
+ UnrollResult = UnrollLoop(remainderLoop, ULO, LI, SE, DT, AC, TTI,
+ /*ORE*/ nullptr, PreserveLCSSA);
}
if (ResultLoop && UnrollResult != LoopUnrollResult::FullyUnrolled)
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-vscale.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-vscale.mir
new file mode 100644
index 000000000000..9b7a44954afd
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-vscale.mir
@@ -0,0 +1,113 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -o - -mtriple=aarch64 -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs %s | FileCheck %s
+
+...
+---
+name: sum_of_vscale
+body: |
+ bb.1:
+ liveins: $x0, $x1
+ ; CHECK-LABEL: name: sum_of_vscale
+ ; CHECK: liveins: $x0, $x1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %sum:_(s64) = G_VSCALE i64 20
+ ; CHECK-NEXT: $x0 = COPY %sum(s64)
+ ; CHECK-NEXT: RET_ReallyLR implicit $x0
+ %rhs:_(s64) = G_VSCALE i64 11
+ %lhs:_(s64) = G_VSCALE i64 9
+ %sum:_(s64) = nsw G_ADD %lhs(s64), %rhs(s64)
+ $x0 = COPY %sum(s64)
+ RET_ReallyLR implicit $x0
+...
+---
+name: sum_of_vscale_multi_use
+body: |
+ bb.1:
+ liveins: $x0, $x1
+ ; CHECK-LABEL: name: sum_of_vscale_multi_use
+ ; CHECK: liveins: $x0, $x1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %rhs:_(s64) = G_VSCALE i64 11
+ ; CHECK-NEXT: %lhs:_(s64) = G_VSCALE i64 9
+ ; CHECK-NEXT: %sum:_(s64) = nsw G_ADD %lhs, %rhs
+ ; CHECK-NEXT: $x0 = COPY %sum(s64)
+ ; CHECK-NEXT: $x1 = COPY %rhs(s64)
+ ; CHECK-NEXT: RET_ReallyLR implicit $x0
+ %rhs:_(s64) = G_VSCALE i64 11
+ %lhs:_(s64) = G_VSCALE i64 9
+ %sum:_(s64) = nsw G_ADD %lhs(s64), %rhs(s64)
+ $x0 = COPY %sum(s64)
+ $x1 = COPY %rhs(s64)
+ RET_ReallyLR implicit $x0
+...
+---
+name: mul_of_vscale
+body: |
+ bb.1:
+ liveins: $x0, $x1
+ ; CHECK-LABEL: name: mul_of_vscale
+ ; CHECK: liveins: $x0, $x1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %mul:_(s64) = G_VSCALE i64 99
+ ; CHECK-NEXT: $x0 = COPY %mul(s64)
+ ; CHECK-NEXT: RET_ReallyLR implicit $x0
+ %rhs:_(s64) = G_CONSTANT i64 11
+ %lhs:_(s64) = G_VSCALE i64 9
+ %mul:_(s64) = nsw G_MUL %lhs(s64), %rhs(s64)
+ $x0 = COPY %mul(s64)
+ RET_ReallyLR implicit $x0
+...
+---
+name: sub_of_vscale
+body: |
+ bb.1:
+ liveins: $x0, $x1
+ ; CHECK-LABEL: name: sub_of_vscale
+ ; CHECK: liveins: $x0, $x1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %x:_(s64) = COPY $x0
+ ; CHECK-NEXT: [[VSCALE:%[0-9]+]]:_(s64) = G_VSCALE i64 -9
+ ; CHECK-NEXT: %sub:_(s64) = nsw G_ADD %x, [[VSCALE]]
+ ; CHECK-NEXT: $x0 = COPY %sub(s64)
+ ; CHECK-NEXT: RET_ReallyLR implicit $x0
+ %x:_(s64) = COPY $x0
+ %rhs:_(s64) = G_VSCALE i64 9
+ %sub:_(s64) = nsw G_SUB %x(s64), %rhs(s64)
+ $x0 = COPY %sub(s64)
+ RET_ReallyLR implicit $x0
+...
+---
+name: shl_of_vscale
+body: |
+ bb.1:
+ liveins: $x0, $x1
+ ; CHECK-LABEL: name: shl_of_vscale
+ ; CHECK: liveins: $x0, $x1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %shl:_(s64) = G_VSCALE i64 44
+ ; CHECK-NEXT: $x0 = COPY %shl(s64)
+ ; CHECK-NEXT: RET_ReallyLR implicit $x0
+ %rhs:_(s64) = G_CONSTANT i64 2
+ %lhs:_(s64) = G_VSCALE i64 11
+ %shl:_(s64) = nsw G_SHL %lhs(s64), %rhs(s64)
+ $x0 = COPY %shl(s64)
+ RET_ReallyLR implicit $x0
+...
+---
+name: shl_of_vscale_wrong_flag
+body: |
+ bb.1:
+ liveins: $x0, $x1
+ ; CHECK-LABEL: name: shl_of_vscale_wrong_flag
+ ; CHECK: liveins: $x0, $x1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %rhs:_(s64) = G_CONSTANT i64 2
+ ; CHECK-NEXT: %lhs:_(s64) = G_VSCALE i64 11
+ ; CHECK-NEXT: %shl:_(s64) = nuw G_SHL %lhs, %rhs(s64)
+ ; CHECK-NEXT: $x0 = COPY %shl(s64)
+ ; CHECK-NEXT: RET_ReallyLR implicit $x0
+ %rhs:_(s64) = G_CONSTANT i64 2
+ %lhs:_(s64) = G_VSCALE i64 11
+ %shl:_(s64) = nuw G_SHL %lhs(s64), %rhs(s64)
+ $x0 = COPY %shl(s64)
+ RET_ReallyLR implicit $x0
diff --git a/llvm/test/CodeGen/AArch64/arm64-2011-10-18-LdStOptBug.ll b/llvm/test/CodeGen/AArch64/arm64-2011-10-18-LdStOptBug.ll
index 3b6c4fa875e6..dafdcf82f311 100644
--- a/llvm/test/CodeGen/AArch64/arm64-2011-10-18-LdStOptBug.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-2011-10-18-LdStOptBug.ll
@@ -12,7 +12,7 @@ entry:
for.body:
; CHECK: for.body
-; CHECK: ldr w{{[0-9]+}}, [x{{[0-9]+}}, x{{[0-9]+}}]
+; CHECK: ldr w{{[0-9]+}}, [x{{[0-9]+}}]
; CHECK: add x[[REG:[0-9]+]],
; CHECK: x[[REG]], #1, lsl #12
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
diff --git a/llvm/test/CodeGen/AArch64/arm64-ldp-cluster.ll b/llvm/test/CodeGen/AArch64/arm64-ldp-cluster.ll
index 8c7b31fd34c4..114203e46f19 100644
--- a/llvm/test/CodeGen/AArch64/arm64-ldp-cluster.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-ldp-cluster.ll
@@ -176,13 +176,13 @@ exit:
; CHECK: ********** MI Scheduling **********
; CHECK: LDURDi_LDRDui:%bb.1 vector_body
;
-; CHECK: Cluster ld/st SU(2) - SU(6)
-; CHECK: Cluster ld/st SU(3) - SU(7)
+; CHECK: Cluster ld/st SU(0) - SU(4)
+; CHECK: Cluster ld/st SU(1) - SU(5)
;
-; CHECK: SU(2): %{{[0-9]+}}:fpr64 = LDURDi
-; CHECK: SU(3): %{{[0-9]+}}:fpr64 = LDURDi
-; CHECK: SU(6): %{{[0-9]+}}:fpr64 = LDRDui
-; CHECK: SU(7): %{{[0-9]+}}:fpr64 = LDRDui
+; CHECK: SU(0): %{{[0-9]+}}:fpr64 = LDURDi
+; CHECK: SU(1): %{{[0-9]+}}:fpr64 = LDURDi
+; CHECK: SU(4): %{{[0-9]+}}:fpr64 = LDRDui
+; CHECK: SU(5): %{{[0-9]+}}:fpr64 = LDRDui
;
define void @LDURDi_LDRDui(ptr nocapture readonly %arg) {
entry:
diff --git a/llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions-predicated-scalable.ll b/llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions-predicated-scalable.ll
index ac2b21af29ab..2ef35283568c 100644
--- a/llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions-predicated-scalable.ll
+++ b/llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions-predicated-scalable.ll
@@ -15,36 +15,34 @@ define %"class.std::complex" @complex_mul_v2f64(ptr %a, ptr %b) {
; CHECK-LABEL: complex_mul_v2f64:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mov z1.d, #0 // =0x0
-; CHECK-NEXT: mov w9, #100 // =0x64
-; CHECK-NEXT: cntd x10
-; CHECK-NEXT: whilelo p1.d, xzr, x9
-; CHECK-NEXT: mov x8, xzr
-; CHECK-NEXT: rdvl x11, #2
+; CHECK-NEXT: mov w8, #100 // =0x64
+; CHECK-NEXT: cntd x9
+; CHECK-NEXT: whilelo p1.d, xzr, x8
+; CHECK-NEXT: rdvl x10, #2
+; CHECK-NEXT: mov x11, x9
; CHECK-NEXT: ptrue p0.d
-; CHECK-NEXT: mov x12, x10
; CHECK-NEXT: zip2 z0.d, z1.d, z1.d
; CHECK-NEXT: zip1 z1.d, z1.d, z1.d
; CHECK-NEXT: .LBB0_1: // %vector.body
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: zip2 p2.d, p1.d, p1.d
-; CHECK-NEXT: add x13, x0, x8
-; CHECK-NEXT: add x14, x1, x8
-; CHECK-NEXT: zip1 p1.d, p1.d, p1.d
; CHECK-NEXT: mov z6.d, z1.d
; CHECK-NEXT: mov z7.d, z0.d
-; CHECK-NEXT: ld1d { z2.d }, p2/z, [x13, #1, mul vl]
-; CHECK-NEXT: ld1d { z4.d }, p2/z, [x14, #1, mul vl]
-; CHECK-NEXT: add x8, x8, x11
-; CHECK-NEXT: ld1d { z3.d }, p1/z, [x13]
-; CHECK-NEXT: ld1d { z5.d }, p1/z, [x14]
+; CHECK-NEXT: zip1 p1.d, p1.d, p1.d
+; CHECK-NEXT: ld1d { z2.d }, p2/z, [x0, #1, mul vl]
+; CHECK-NEXT: ld1d { z4.d }, p2/z, [x1, #1, mul vl]
+; CHECK-NEXT: ld1d { z3.d }, p1/z, [x0]
+; CHECK-NEXT: ld1d { z5.d }, p1/z, [x1]
+; CHECK-NEXT: add x1, x1, x10
+; CHECK-NEXT: add x0, x0, x10
; CHECK-NEXT: fcmla z7.d, p0/m, z4.d, z2.d, #0
; CHECK-NEXT: fcmla z6.d, p0/m, z5.d, z3.d, #0
; CHECK-NEXT: fcmla z7.d, p0/m, z4.d, z2.d, #90
; CHECK-NEXT: fcmla z6.d, p0/m, z5.d, z3.d, #90
; CHECK-NEXT: mov z0.d, p2/m, z7.d
; CHECK-NEXT: mov z1.d, p1/m, z6.d
-; CHECK-NEXT: whilelo p1.d, x12, x9
-; CHECK-NEXT: add x12, x12, x10
+; CHECK-NEXT: whilelo p1.d, x11, x8
+; CHECK-NEXT: add x11, x11, x9
; CHECK-NEXT: b.mi .LBB0_1
; CHECK-NEXT: // %bb.2: // %exit.block
; CHECK-NEXT: uzp1 z2.d, z1.d, z0.d
@@ -114,39 +112,37 @@ define %"class.std::complex" @complex_mul_predicated_v2f64(ptr %a, ptr %b, ptr %
; CHECK-LABEL: complex_mul_predicated_v2f64:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mov z1.d, #0 // =0x0
-; CHECK-NEXT: cntd x10
-; CHECK-NEXT: mov w12, #100 // =0x64
-; CHECK-NEXT: neg x11, x10
+; CHECK-NEXT: cntd x9
+; CHECK-NEXT: mov w11, #100 // =0x64
+; CHECK-NEXT: neg x10, x9
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: mov x8, xzr
-; CHECK-NEXT: mov x9, xzr
-; CHECK-NEXT: and x11, x11, x12
-; CHECK-NEXT: rdvl x12, #2
+; CHECK-NEXT: and x10, x10, x11
+; CHECK-NEXT: rdvl x11, #2
; CHECK-NEXT: zip2 z0.d, z1.d, z1.d
; CHECK-NEXT: zip1 z1.d, z1.d, z1.d
; CHECK-NEXT: .LBB1_1: // %vector.body
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: ld1w { z2.d }, p0/z, [x2, x9, lsl #2]
-; CHECK-NEXT: add x13, x0, x8
-; CHECK-NEXT: add x14, x1, x8
+; CHECK-NEXT: ld1w { z2.d }, p0/z, [x2, x8, lsl #2]
; CHECK-NEXT: mov z6.d, z1.d
; CHECK-NEXT: mov z7.d, z0.d
-; CHECK-NEXT: add x9, x9, x10
-; CHECK-NEXT: add x8, x8, x12
-; CHECK-NEXT: cmpne p2.d, p0/z, z2.d, #0
-; CHECK-NEXT: cmp x11, x9
-; CHECK-NEXT: zip2 p1.d, p2.d, p2.d
-; CHECK-NEXT: zip1 p2.d, p2.d, p2.d
-; CHECK-NEXT: ld1d { z2.d }, p1/z, [x13, #1, mul vl]
-; CHECK-NEXT: ld1d { z4.d }, p1/z, [x14, #1, mul vl]
-; CHECK-NEXT: ld1d { z3.d }, p2/z, [x13]
-; CHECK-NEXT: ld1d { z5.d }, p2/z, [x14]
+; CHECK-NEXT: add x8, x8, x9
+; CHECK-NEXT: cmpne p1.d, p0/z, z2.d, #0
+; CHECK-NEXT: cmp x10, x8
+; CHECK-NEXT: zip2 p2.d, p1.d, p1.d
+; CHECK-NEXT: zip1 p1.d, p1.d, p1.d
+; CHECK-NEXT: ld1d { z2.d }, p2/z, [x0, #1, mul vl]
+; CHECK-NEXT: ld1d { z4.d }, p2/z, [x1, #1, mul vl]
+; CHECK-NEXT: ld1d { z3.d }, p1/z, [x0]
+; CHECK-NEXT: ld1d { z5.d }, p1/z, [x1]
+; CHECK-NEXT: add x1, x1, x11
+; CHECK-NEXT: add x0, x0, x11
; CHECK-NEXT: fcmla z7.d, p0/m, z4.d, z2.d, #0
; CHECK-NEXT: fcmla z6.d, p0/m, z5.d, z3.d, #0
; CHECK-NEXT: fcmla z7.d, p0/m, z4.d, z2.d, #90
; CHECK-NEXT: fcmla z6.d, p0/m, z5.d, z3.d, #90
-; CHECK-NEXT: mov z0.d, p1/m, z7.d
-; CHECK-NEXT: mov z1.d, p2/m, z6.d
+; CHECK-NEXT: mov z0.d, p2/m, z7.d
+; CHECK-NEXT: mov z1.d, p1/m, z6.d
; CHECK-NEXT: b.ne .LBB1_1
; CHECK-NEXT: // %bb.2: // %exit.block
; CHECK-NEXT: uzp1 z2.d, z1.d, z0.d
@@ -218,38 +214,38 @@ define %"class.std::complex" @complex_mul_predicated_x2_v2f64(ptr %a, ptr %b, pt
; CHECK-LABEL: complex_mul_predicated_x2_v2f64:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mov z1.d, #0 // =0x0
-; CHECK-NEXT: mov w10, #100 // =0x64
+; CHECK-NEXT: mov w8, #100 // =0x64
+; CHECK-NEXT: cntd x9
+; CHECK-NEXT: whilelo p1.d, xzr, x8
+; CHECK-NEXT: rdvl x10, #2
+; CHECK-NEXT: cnth x11
; CHECK-NEXT: ptrue p0.d
-; CHECK-NEXT: whilelo p1.d, xzr, x10
-; CHECK-NEXT: mov x8, xzr
-; CHECK-NEXT: mov x9, xzr
-; CHECK-NEXT: cntd x11
-; CHECK-NEXT: rdvl x12, #2
+; CHECK-NEXT: mov x12, x9
; CHECK-NEXT: zip2 z0.d, z1.d, z1.d
; CHECK-NEXT: zip1 z1.d, z1.d, z1.d
; CHECK-NEXT: .LBB2_1: // %vector.body
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: ld1w { z2.d }, p1/z, [x2, x9, lsl #2]
-; CHECK-NEXT: add x13, x0, x8
-; CHECK-NEXT: add x14, x1, x8
+; CHECK-NEXT: ld1w { z2.d }, p1/z, [x2]
; CHECK-NEXT: mov z6.d, z1.d
; CHECK-NEXT: mov z7.d, z0.d
-; CHECK-NEXT: add x9, x9, x11
-; CHECK-NEXT: add x8, x8, x12
-; CHECK-NEXT: cmpne p2.d, p1/z, z2.d, #0
-; CHECK-NEXT: zip2 p1.d, p2.d, p2.d
-; CHECK-NEXT: zip1 p2.d, p2.d, p2.d
-; CHECK-NEXT: ld1d { z2.d }, p1/z, [x13, #1, mul vl]
-; CHECK-NEXT: ld1d { z4.d }, p1/z, [x14, #1, mul vl]
-; CHECK-NEXT: ld1d { z3.d }, p2/z, [x13]
-; CHECK-NEXT: ld1d { z5.d }, p2/z, [x14]
+; CHECK-NEXT: add x2, x2, x11
+; CHECK-NEXT: cmpne p1.d, p1/z, z2.d, #0
+; CHECK-NEXT: zip2 p2.d, p1.d, p1.d
+; CHECK-NEXT: zip1 p1.d, p1.d, p1.d
+; CHECK-NEXT: ld1d { z2.d }, p2/z, [x0, #1, mul vl]
+; CHECK-NEXT: ld1d { z4.d }, p2/z, [x1, #1, mul vl]
+; CHECK-NEXT: ld1d { z3.d }, p1/z, [x0]
+; CHECK-NEXT: ld1d { z5.d }, p1/z, [x1]
+; CHECK-NEXT: add x1, x1, x10
+; CHECK-NEXT: add x0, x0, x10
; CHECK-NEXT: fcmla z7.d, p0/m, z4.d, z2.d, #0
; CHECK-NEXT: fcmla z6.d, p0/m, z5.d, z3.d, #0
; CHECK-NEXT: fcmla z7.d, p0/m, z4.d, z2.d, #90
; CHECK-NEXT: fcmla z6.d, p0/m, z5.d, z3.d, #90
-; CHECK-NEXT: mov z0.d, p1/m, z7.d
-; CHECK-NEXT: whilelo p1.d, x9, x10
-; CHECK-NEXT: mov z1.d, p2/m, z6.d
+; CHECK-NEXT: mov z0.d, p2/m, z7.d
+; CHECK-NEXT: mov z1.d, p1/m, z6.d
+; CHECK-NEXT: whilelo p1.d, x12, x8
+; CHECK-NEXT: add x12, x12, x9
; CHECK-NEXT: b.mi .LBB2_1
; CHECK-NEXT: // %bb.2: // %exit.block
; CHECK-NEXT: uzp1 z2.d, z1.d, z0.d
diff --git a/llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions-scalable.ll b/llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions-scalable.ll
index af07519ad53d..8e26ef6b87ec 100644
--- a/llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions-scalable.ll
+++ b/llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions-scalable.ll
@@ -15,30 +15,27 @@ define %"class.std::complex" @complex_mul_v2f64(ptr %a, ptr %b) {
; CHECK-LABEL: complex_mul_v2f64:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mov z1.d, #0 // =0x0
-; CHECK-NEXT: cntd x9
-; CHECK-NEXT: ptrue p1.b
-; CHECK-NEXT: neg x10, x9
-; CHECK-NEXT: mov w11, #100 // =0x64
+; CHECK-NEXT: cntd x8
+; CHECK-NEXT: mov w10, #100 // =0x64
+; CHECK-NEXT: neg x9, x8
; CHECK-NEXT: ptrue p0.d
-; CHECK-NEXT: mov x8, xzr
-; CHECK-NEXT: and x10, x10, x11
-; CHECK-NEXT: rdvl x11, #2
+; CHECK-NEXT: and x9, x9, x10
+; CHECK-NEXT: rdvl x10, #2
; CHECK-NEXT: zip2 z0.d, z1.d, z1.d
; CHECK-NEXT: zip1 z1.d, z1.d, z1.d
; CHECK-NEXT: .LBB0_1: // %vector.body
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: add x12, x0, x8
-; CHECK-NEXT: add x13, x1, x8
-; CHECK-NEXT: ld1b { z2.b }, p1/z, [x0, x8]
-; CHECK-NEXT: ld1d { z3.d }, p0/z, [x12, #1, mul vl]
-; CHECK-NEXT: ld1b { z4.b }, p1/z, [x1, x8]
-; CHECK-NEXT: ld1d { z5.d }, p0/z, [x13, #1, mul vl]
-; CHECK-NEXT: subs x10, x10, x9
-; CHECK-NEXT: add x8, x8, x11
-; CHECK-NEXT: fcmla z1.d, p0/m, z4.d, z2.d, #0
-; CHECK-NEXT: fcmla z0.d, p0/m, z5.d, z3.d, #0
-; CHECK-NEXT: fcmla z1.d, p0/m, z4.d, z2.d, #90
-; CHECK-NEXT: fcmla z0.d, p0/m, z5.d, z3.d, #90
+; CHECK-NEXT: ld1d { z2.d }, p0/z, [x0, #1, mul vl]
+; CHECK-NEXT: ld1d { z3.d }, p0/z, [x0]
+; CHECK-NEXT: subs x9, x9, x8
+; CHECK-NEXT: ld1d { z4.d }, p0/z, [x1, #1, mul vl]
+; CHECK-NEXT: ld1d { z5.d }, p0/z, [x1]
+; CHECK-NEXT: add x1, x1, x10
+; CHECK-NEXT: add x0, x0, x10
+; CHECK-NEXT: fcmla z1.d, p0/m, z5.d, z3.d, #0
+; CHECK-NEXT: fcmla z0.d, p0/m, z4.d, z2.d, #0
+; CHECK-NEXT: fcmla z1.d, p0/m, z5.d, z3.d, #90
+; CHECK-NEXT: fcmla z0.d, p0/m, z4.d, z2.d, #90
; CHECK-NEXT: b.ne .LBB0_1
; CHECK-NEXT: // %bb.2: // %exit.block
; CHECK-NEXT: uzp1 z2.d, z1.d, z0.d
@@ -103,34 +100,31 @@ define %"class.std::complex" @complex_mul_nonzero_init_v2f64(ptr %a, ptr %b) {
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: fmov d0, #1.00000000
; CHECK-NEXT: mov z1.d, #0 // =0x0
-; CHECK-NEXT: cntd x9
+; CHECK-NEXT: cntd x8
; CHECK-NEXT: fmov d2, #2.00000000
; CHECK-NEXT: ptrue p0.d, vl1
-; CHECK-NEXT: neg x10, x9
-; CHECK-NEXT: ptrue p1.b
-; CHECK-NEXT: mov w11, #100 // =0x64
-; CHECK-NEXT: mov x8, xzr
+; CHECK-NEXT: neg x9, x8
+; CHECK-NEXT: mov w10, #100 // =0x64
; CHECK-NEXT: sel z3.d, p0, z0.d, z1.d
-; CHECK-NEXT: and x10, x10, x11
-; CHECK-NEXT: rdvl x11, #2
+; CHECK-NEXT: and x9, x9, x10
+; CHECK-NEXT: rdvl x10, #2
; CHECK-NEXT: mov z1.d, p0/m, z2.d
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: zip2 z0.d, z1.d, z3.d
; CHECK-NEXT: zip1 z1.d, z1.d, z3.d
; CHECK-NEXT: .LBB1_1: // %vector.body
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: add x12, x0, x8
-; CHECK-NEXT: add x13, x1, x8
-; CHECK-NEXT: ld1b { z2.b }, p1/z, [x0, x8]
-; CHECK-NEXT: ld1d { z3.d }, p0/z, [x12, #1, mul vl]
-; CHECK-NEXT: ld1b { z4.b }, p1/z, [x1, x8]
-; CHECK-NEXT: ld1d { z5.d }, p0/z, [x13, #1, mul vl]
-; CHECK-NEXT: subs x10, x10, x9
-; CHECK-NEXT: add x8, x8, x11
-; CHECK-NEXT: fcmla z1.d, p0/m, z4.d, z2.d, #0
-; CHECK-NEXT: fcmla z0.d, p0/m, z5.d, z3.d, #0
-; CHECK-NEXT: fcmla z1.d, p0/m, z4.d, z2.d, #90
-; CHECK-NEXT: fcmla z0.d, p0/m, z5.d, z3.d, #90
+; CHECK-NEXT: ld1d { z2.d }, p0/z, [x0, #1, mul vl]
+; CHECK-NEXT: ld1d { z3.d }, p0/z, [x0]
+; CHECK-NEXT: subs x9, x9, x8
+; CHECK-NEXT: ld1d { z4.d }, p0/z, [x1, #1, mul vl]
+; CHECK-NEXT: ld1d { z5.d }, p0/z, [x1]
+; CHECK-NEXT: add x1, x1, x10
+; CHECK-NEXT: add x0, x0, x10
+; CHECK-NEXT: fcmla z1.d, p0/m, z5.d, z3.d, #0
+; CHECK-NEXT: fcmla z0.d, p0/m, z4.d, z2.d, #0
+; CHECK-NEXT: fcmla z1.d, p0/m, z5.d, z3.d, #90
+; CHECK-NEXT: fcmla z0.d, p0/m, z4.d, z2.d, #90
; CHECK-NEXT: b.ne .LBB1_1
; CHECK-NEXT: // %bb.2: // %exit.block
; CHECK-NEXT: uzp1 z2.d, z1.d, z0.d
@@ -190,45 +184,37 @@ define %"class.std::complex" @complex_mul_v2f64_unrolled(ptr %a, ptr %b) {
; CHECK-LABEL: complex_mul_v2f64_unrolled:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mov z1.d, #0 // =0x0
-; CHECK-NEXT: cntw x9
-; CHECK-NEXT: mov w11, #1000 // =0x3e8
-; CHECK-NEXT: neg x10, x9
-; CHECK-NEXT: rdvl x12, #2
-; CHECK-NEXT: ptrue p1.b
+; CHECK-NEXT: cntw x8
+; CHECK-NEXT: mov w10, #1000 // =0x3e8
+; CHECK-NEXT: neg x9, x8
; CHECK-NEXT: ptrue p0.d
-; CHECK-NEXT: mov x8, xzr
-; CHECK-NEXT: and x10, x10, x11
+; CHECK-NEXT: and x9, x9, x10
+; CHECK-NEXT: rdvl x10, #4
; CHECK-NEXT: zip2 z0.d, z1.d, z1.d
; CHECK-NEXT: zip1 z1.d, z1.d, z1.d
-; CHECK-NEXT: add x11, x1, x12
-; CHECK-NEXT: add x12, x0, x12
-; CHECK-NEXT: rdvl x13, #4
; CHECK-NEXT: mov z2.d, z1.d
; CHECK-NEXT: mov z3.d, z0.d
; CHECK-NEXT: .LBB2_1: // %vector.body
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: add x14, x0, x8
-; CHECK-NEXT: add x15, x12, x8
-; CHECK-NEXT: add x16, x1, x8
-; CHECK-NEXT: add x17, x11, x8
-; CHECK-NEXT: ld1b { z4.b }, p1/z, [x0, x8]
-; CHECK-NEXT: ld1d { z5.d }, p0/z, [x14, #1, mul vl]
-; CHECK-NEXT: ld1b { z6.b }, p1/z, [x12, x8]
-; CHECK-NEXT: ld1b { z7.b }, p1/z, [x1, x8]
-; CHECK-NEXT: ld1d { z16.d }, p0/z, [x16, #1, mul vl]
-; CHECK-NEXT: ld1d { z17.d }, p0/z, [x15, #1, mul vl]
-; CHECK-NEXT: ld1b { z18.b }, p1/z, [x11, x8]
-; CHECK-NEXT: ld1d { z19.d }, p0/z, [x17, #1, mul vl]
-; CHECK-NEXT: subs x10, x10, x9
-; CHECK-NEXT: add x8, x8, x13
-; CHECK-NEXT: fcmla z1.d, p0/m, z7.d, z4.d, #0
-; CHECK-NEXT: fcmla z0.d, p0/m, z16.d, z5.d, #0
-; CHECK-NEXT: fcmla z2.d, p0/m, z18.d, z6.d, #0
-; CHECK-NEXT: fcmla z3.d, p0/m, z19.d, z17.d, #0
-; CHECK-NEXT: fcmla z1.d, p0/m, z7.d, z4.d, #90
-; CHECK-NEXT: fcmla z0.d, p0/m, z16.d, z5.d, #90
-; CHECK-NEXT: fcmla z2.d, p0/m, z18.d, z6.d, #90
-; CHECK-NEXT: fcmla z3.d, p0/m, z19.d, z17.d, #90
+; CHECK-NEXT: ld1d { z4.d }, p0/z, [x0, #1, mul vl]
+; CHECK-NEXT: ld1d { z5.d }, p0/z, [x0]
+; CHECK-NEXT: subs x9, x9, x8
+; CHECK-NEXT: ld1d { z6.d }, p0/z, [x0, #3, mul vl]
+; CHECK-NEXT: ld1d { z7.d }, p0/z, [x1, #1, mul vl]
+; CHECK-NEXT: ld1d { z16.d }, p0/z, [x1]
+; CHECK-NEXT: ld1d { z17.d }, p0/z, [x0, #2, mul vl]
+; CHECK-NEXT: add x0, x0, x10
+; CHECK-NEXT: ld1d { z18.d }, p0/z, [x1, #3, mul vl]
+; CHECK-NEXT: ld1d { z19.d }, p0/z, [x1, #2, mul vl]
+; CHECK-NEXT: add x1, x1, x10
+; CHECK-NEXT: fcmla z1.d, p0/m, z16.d, z5.d, #0
+; CHECK-NEXT: fcmla z0.d, p0/m, z7.d, z4.d, #0
+; CHECK-NEXT: fcmla z3.d, p0/m, z18.d, z6.d, #0
+; CHECK-NEXT: fcmla z2.d, p0/m, z19.d, z17.d, #0
+; CHECK-NEXT: fcmla z1.d, p0/m, z16.d, z5.d, #90
+; CHECK-NEXT: fcmla z0.d, p0/m, z7.d, z4.d, #90
+; CHECK-NEXT: fcmla z3.d, p0/m, z18.d, z6.d, #90
+; CHECK-NEXT: fcmla z2.d, p0/m, z19.d, z17.d, #90
; CHECK-NEXT: b.ne .LBB2_1
; CHECK-NEXT: // %bb.2: // %exit.block
; CHECK-NEXT: uzp1 z4.d, z2.d, z3.d
diff --git a/llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions.ll b/llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions.ll
index 44d0a9392ba6..aed3072bb4af 100644
--- a/llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions.ll
+++ b/llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions.ll
@@ -148,17 +148,16 @@ define %"struct.std::complex" @complex_mul_v2f64_unrolled(ptr %a, ptr %b) {
; CHECK-NEXT: adrp x8, .LCPI2_0
; CHECK-NEXT: movi v3.2d, #0000000000000000
; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI2_0]
-; CHECK-NEXT: mov x8, xzr
+; CHECK-NEXT: add x8, x0, #32
+; CHECK-NEXT: add x9, x1, #32
+; CHECK-NEXT: mov x10, #-100 // =0xffffffffffffff9c
; CHECK-NEXT: .LBB2_1: // %vector.body
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: add x9, x0, x8
-; CHECK-NEXT: add x10, x1, x8
-; CHECK-NEXT: add x8, x8, #64
-; CHECK-NEXT: ldp q5, q4, [x9]
-; CHECK-NEXT: cmp x8, #1600
-; CHECK-NEXT: ldp q7, q6, [x10]
-; CHECK-NEXT: ldp q17, q16, [x9, #32]
-; CHECK-NEXT: ldp q19, q18, [x10, #32]
+; CHECK-NEXT: ldp q5, q4, [x8, #-32]
+; CHECK-NEXT: adds x10, x10, #4
+; CHECK-NEXT: ldp q7, q6, [x9, #-32]
+; CHECK-NEXT: ldp q17, q16, [x8], #64
+; CHECK-NEXT: ldp q19, q18, [x9], #64
; CHECK-NEXT: fcmla v2.2d, v7.2d, v5.2d, #0
; CHECK-NEXT: fcmla v0.2d, v6.2d, v4.2d, #0
; CHECK-NEXT: fcmla v1.2d, v19.2d, v17.2d, #0
diff --git a/llvm/test/CodeGen/AArch64/sme-support-routines-calling-convention.ll b/llvm/test/CodeGen/AArch64/sme-support-routines-calling-convention.ll
index 7535638137ca..63c65334afe1 100644
--- a/llvm/test/CodeGen/AArch64/sme-support-routines-calling-convention.ll
+++ b/llvm/test/CodeGen/AArch64/sme-support-routines-calling-convention.ll
@@ -25,6 +25,25 @@ define void @test_sme_calling_convention_x0() nounwind {
ret void
}
+define i64 @test_sme_calling_convention_x1() nounwind {
+; CHECK-LABEL: test_sme_calling_convention_x1:
+; CHECK: // %bb.0:
+; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT: bl __arm_get_current_vg
+; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: ret
+; DARWIN-LABEL: test_sme_calling_convention_x1:
+; DARWIN: stp x29, x30, [sp, #-16]!
+; DARWIN: bl ___arm_get_current_vg
+; DARWIN: ldp x29, x30, [sp], #16
+; DARWIN: ret
+;
+; CHECK-CSRMASK-LABEL: name: test_sme_calling_convention_x1
+; CHECK-CSRMASK: BL @__arm_get_current_vg, csr_aarch64_sme_abi_support_routines_preservemost_from_x1
+ %vg = call aarch64_sme_preservemost_from_x1 i64 @__arm_get_current_vg()
+ ret i64 %vg
+}
+
define i64 @test_sme_calling_convention_x2() nounwind {
; CHECK-LABEL: test_sme_calling_convention_x2:
; CHECK: // %bb.0:
@@ -46,4 +65,5 @@ define i64 @test_sme_calling_convention_x2() nounwind {
}
declare void @__arm_tpidr2_save()
+declare i64 @__arm_get_current_vg()
declare {i64, i64} @__arm_sme_state()
diff --git a/llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization.ll b/llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization.ll
index 4c02a5240ba6..c993051ccebf 100644
--- a/llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization.ll
+++ b/llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization.ll
@@ -648,7 +648,19 @@ define float @test_v3f32_ninf(<3 x float> %a) nounwind {
define fp128 @test_v2f128(<2 x fp128> %a) nounwind {
; CHECK-LABEL: test_v2f128:
; CHECK: // %bb.0:
-; CHECK-NEXT: b fmaxl
+; CHECK-NEXT: sub sp, sp, #48
+; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill
+; CHECK-NEXT: stp q0, q1, [sp] // 32-byte Folded Spill
+; CHECK-NEXT: bl __gttf2
+; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT: cmp w0, #0
+; CHECK-NEXT: b.le .LBB18_2
+; CHECK-NEXT: // %bb.1:
+; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT: .LBB18_2:
+; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload
+; CHECK-NEXT: add sp, sp, #48
+; CHECK-NEXT: ret
%b = call nnan fp128 @llvm.vector.reduce.fmax.v2f128(<2 x fp128> %a)
ret fp128 %b
}
diff --git a/llvm/test/CodeGen/AArch64/vecreduce-fmin-legalization.ll b/llvm/test/CodeGen/AArch64/vecreduce-fmin-legalization.ll
index 18d40cb18ba6..0116be51dd69 100644
--- a/llvm/test/CodeGen/AArch64/vecreduce-fmin-legalization.ll
+++ b/llvm/test/CodeGen/AArch64/vecreduce-fmin-legalization.ll
@@ -648,7 +648,19 @@ define float @test_v3f32_ninf(<3 x float> %a) nounwind {
define fp128 @test_v2f128(<2 x fp128> %a) nounwind {
; CHECK-LABEL: test_v2f128:
; CHECK: // %bb.0:
-; CHECK-NEXT: b fminl
+; CHECK-NEXT: sub sp, sp, #48
+; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill
+; CHECK-NEXT: stp q0, q1, [sp] // 32-byte Folded Spill
+; CHECK-NEXT: bl __lttf2
+; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT: cmp w0, #0
+; CHECK-NEXT: b.ge .LBB18_2
+; CHECK-NEXT: // %bb.1:
+; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT: .LBB18_2:
+; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload
+; CHECK-NEXT: add sp, sp, #48
+; CHECK-NEXT: ret
%b = call nnan fp128 @llvm.vector.reduce.fmin.v2f128(<2 x fp128> %a)
ret fp128 %b
}
diff --git a/llvm/test/CodeGen/AArch64/zext-to-tbl.ll b/llvm/test/CodeGen/AArch64/zext-to-tbl.ll
index 599bd811d7d5..66bb131ce724 100644
--- a/llvm/test/CodeGen/AArch64/zext-to-tbl.ll
+++ b/llvm/test/CodeGen/AArch64/zext-to-tbl.ll
@@ -1669,42 +1669,41 @@ define void @zext_v8i8_to_v8i64_with_add_in_sequence_in_loop(ptr %src, ptr %dst)
; CHECK-LABEL: zext_v8i8_to_v8i64_with_add_in_sequence_in_loop:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: Lloh18:
-; CHECK-NEXT: adrp x9, lCPI17_0@PAGE
+; CHECK-NEXT: adrp x8, lCPI17_0@PAGE
; CHECK-NEXT: Lloh19:
-; CHECK-NEXT: adrp x10, lCPI17_1@PAGE
-; CHECK-NEXT: mov x8, xzr
+; CHECK-NEXT: adrp x9, lCPI17_1@PAGE
+; CHECK-NEXT: mov w10, #128 ; =0x80
; CHECK-NEXT: Lloh20:
-; CHECK-NEXT: ldr q0, [x9, lCPI17_0@PAGEOFF]
+; CHECK-NEXT: ldr q0, [x8, lCPI17_0@PAGEOFF]
; CHECK-NEXT: Lloh21:
-; CHECK-NEXT: ldr q1, [x10, lCPI17_1@PAGEOFF]
+; CHECK-NEXT: ldr q1, [x9, lCPI17_1@PAGEOFF]
+; CHECK-NEXT: add x8, x1, #64
; CHECK-NEXT: add x9, x0, #8
; CHECK-NEXT: LBB17_1: ; %loop
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
; CHECK-NEXT: ldp d2, d3, [x9, #-8]
-; CHECK-NEXT: add x10, x1, x8
-; CHECK-NEXT: ldp q6, q5, [x10, #32]
-; CHECK-NEXT: add x8, x8, #128
-; CHECK-NEXT: ldp q17, q16, [x10]
-; CHECK-NEXT: cmp x8, #1024
+; CHECK-NEXT: subs x10, x10, #16
+; CHECK-NEXT: ldp q6, q5, [x8, #-32]
+; CHECK-NEXT: add x9, x9, #16
+; CHECK-NEXT: ldp q17, q16, [x8, #-64]
; CHECK-NEXT: tbl.16b v4, { v2 }, v1
; CHECK-NEXT: tbl.16b v2, { v2 }, v0
; CHECK-NEXT: tbl.16b v7, { v3 }, v1
; CHECK-NEXT: tbl.16b v3, { v3 }, v0
-; CHECK-NEXT: add x9, x9, #16
; CHECK-NEXT: uaddw2.2d v5, v5, v4
; CHECK-NEXT: uaddw.2d v4, v6, v4
; CHECK-NEXT: uaddw2.2d v6, v16, v2
-; CHECK-NEXT: ldp q18, q16, [x10, #96]
+; CHECK-NEXT: ldp q18, q16, [x8, #32]
; CHECK-NEXT: uaddw.2d v2, v17, v2
-; CHECK-NEXT: stp q4, q5, [x10, #32]
+; CHECK-NEXT: stp q4, q5, [x8, #-32]
; CHECK-NEXT: uaddw2.2d v5, v16, v7
-; CHECK-NEXT: ldp q16, q4, [x10, #64]
+; CHECK-NEXT: ldp q16, q4, [x8]
; CHECK-NEXT: uaddw.2d v7, v18, v7
-; CHECK-NEXT: stp q2, q6, [x10]
+; CHECK-NEXT: stp q2, q6, [x8, #-64]
; CHECK-NEXT: uaddw2.2d v4, v4, v3
; CHECK-NEXT: uaddw.2d v2, v16, v3
-; CHECK-NEXT: stp q7, q5, [x10, #96]
-; CHECK-NEXT: stp q2, q4, [x10, #64]
+; CHECK-NEXT: stp q7, q5, [x8, #32]
+; CHECK-NEXT: stp q2, q4, [x8], #128
; CHECK-NEXT: b.ne LBB17_1
; CHECK-NEXT: ; %bb.2: ; %exit
; CHECK-NEXT: ret
@@ -1715,67 +1714,67 @@ define void @zext_v8i8_to_v8i64_with_add_in_sequence_in_loop(ptr %src, ptr %dst)
; CHECK-BE: // %bb.0: // %entry
; CHECK-BE-NEXT: adrp x9, .LCPI17_0
; CHECK-BE-NEXT: add x9, x9, :lo12:.LCPI17_0
-; CHECK-BE-NEXT: mov x8, xzr
+; CHECK-BE-NEXT: mov w8, #128 // =0x80
; CHECK-BE-NEXT: ld1 { v0.16b }, [x9]
; CHECK-BE-NEXT: adrp x9, .LCPI17_1
; CHECK-BE-NEXT: add x9, x9, :lo12:.LCPI17_1
; CHECK-BE-NEXT: ld1 { v1.16b }, [x9]
-; CHECK-BE-NEXT: add x9, x0, #8
+; CHECK-BE-NEXT: add x9, x1, #64
+; CHECK-BE-NEXT: add x10, x0, #8
; CHECK-BE-NEXT: .LBB17_1: // %loop
; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-BE-NEXT: sub x10, x9, #8
-; CHECK-BE-NEXT: ld1 { v2.8b }, [x9]
-; CHECK-BE-NEXT: add x9, x9, #16
-; CHECK-BE-NEXT: ld1 { v3.8b }, [x10]
-; CHECK-BE-NEXT: add x10, x1, x8
-; CHECK-BE-NEXT: add x8, x8, #128
-; CHECK-BE-NEXT: add x11, x10, #32
-; CHECK-BE-NEXT: add x14, x10, #64
-; CHECK-BE-NEXT: add x15, x10, #96
+; CHECK-BE-NEXT: ld1 { v2.8b }, [x10]
+; CHECK-BE-NEXT: sub x11, x10, #8
+; CHECK-BE-NEXT: add x15, x9, #32
+; CHECK-BE-NEXT: ld1 { v3.8b }, [x11]
+; CHECK-BE-NEXT: ld1 { v16.2d }, [x15]
+; CHECK-BE-NEXT: sub x11, x9, #64
+; CHECK-BE-NEXT: sub x12, x9, #32
+; CHECK-BE-NEXT: ld1 { v6.2d }, [x9]
+; CHECK-BE-NEXT: ld1 { v21.2d }, [x11]
; CHECK-BE-NEXT: tbl v4.16b, { v2.16b }, v1.16b
; CHECK-BE-NEXT: tbl v2.16b, { v2.16b }, v0.16b
-; CHECK-BE-NEXT: ld1 { v5.2d }, [x10]
-; CHECK-BE-NEXT: tbl v6.16b, { v3.16b }, v1.16b
+; CHECK-BE-NEXT: ld1 { v19.2d }, [x12]
+; CHECK-BE-NEXT: tbl v5.16b, { v3.16b }, v1.16b
; CHECK-BE-NEXT: tbl v3.16b, { v3.16b }, v0.16b
-; CHECK-BE-NEXT: ld1 { v16.2d }, [x15]
-; CHECK-BE-NEXT: ld1 { v19.2d }, [x14]
-; CHECK-BE-NEXT: ld1 { v21.2d }, [x11]
-; CHECK-BE-NEXT: add x12, x10, #48
-; CHECK-BE-NEXT: add x13, x10, #16
-; CHECK-BE-NEXT: add x16, x10, #112
-; CHECK-BE-NEXT: add x17, x10, #80
+; CHECK-BE-NEXT: sub x13, x9, #16
+; CHECK-BE-NEXT: sub x14, x9, #48
+; CHECK-BE-NEXT: add x16, x9, #48
+; CHECK-BE-NEXT: add x17, x9, #16
+; CHECK-BE-NEXT: ld1 { v22.2d }, [x13]
+; CHECK-BE-NEXT: subs x8, x8, #16
+; CHECK-BE-NEXT: add x10, x10, #16
; CHECK-BE-NEXT: rev32 v7.8b, v4.8b
; CHECK-BE-NEXT: ext v4.16b, v4.16b, v4.16b, #8
; CHECK-BE-NEXT: rev32 v17.8b, v2.8b
-; CHECK-BE-NEXT: ext v18.16b, v6.16b, v6.16b, #8
+; CHECK-BE-NEXT: ext v18.16b, v5.16b, v5.16b, #8
; CHECK-BE-NEXT: ext v20.16b, v3.16b, v3.16b, #8
; CHECK-BE-NEXT: ext v2.16b, v2.16b, v2.16b, #8
-; CHECK-BE-NEXT: rev32 v6.8b, v6.8b
+; CHECK-BE-NEXT: rev32 v5.8b, v5.8b
; CHECK-BE-NEXT: rev32 v3.8b, v3.8b
-; CHECK-BE-NEXT: ld1 { v22.2d }, [x12]
-; CHECK-BE-NEXT: cmp x8, #1024
-; CHECK-BE-NEXT: rev32 v4.8b, v4.8b
; CHECK-BE-NEXT: uaddw v7.2d, v16.2d, v7.2s
-; CHECK-BE-NEXT: ld1 { v16.2d }, [x16]
-; CHECK-BE-NEXT: rev32 v18.8b, v18.8b
+; CHECK-BE-NEXT: rev32 v4.8b, v4.8b
+; CHECK-BE-NEXT: uaddw v6.2d, v6.2d, v17.2s
+; CHECK-BE-NEXT: rev32 v17.8b, v18.8b
; CHECK-BE-NEXT: rev32 v20.8b, v20.8b
; CHECK-BE-NEXT: rev32 v2.8b, v2.8b
-; CHECK-BE-NEXT: uaddw v17.2d, v19.2d, v17.2s
-; CHECK-BE-NEXT: ld1 { v19.2d }, [x13]
-; CHECK-BE-NEXT: uaddw v6.2d, v21.2d, v6.2s
-; CHECK-BE-NEXT: uaddw v3.2d, v5.2d, v3.2s
-; CHECK-BE-NEXT: ld1 { v5.2d }, [x17]
+; CHECK-BE-NEXT: ld1 { v16.2d }, [x16]
+; CHECK-BE-NEXT: ld1 { v18.2d }, [x14]
+; CHECK-BE-NEXT: uaddw v5.2d, v19.2d, v5.2s
+; CHECK-BE-NEXT: uaddw v3.2d, v21.2d, v3.2s
; CHECK-BE-NEXT: st1 { v7.2d }, [x15]
+; CHECK-BE-NEXT: ld1 { v7.2d }, [x17]
+; CHECK-BE-NEXT: st1 { v6.2d }, [x9]
+; CHECK-BE-NEXT: add x9, x9, #128
; CHECK-BE-NEXT: uaddw v4.2d, v16.2d, v4.2s
-; CHECK-BE-NEXT: st1 { v6.2d }, [x11]
-; CHECK-BE-NEXT: uaddw v6.2d, v22.2d, v18.2s
-; CHECK-BE-NEXT: st1 { v3.2d }, [x10]
-; CHECK-BE-NEXT: uaddw v3.2d, v19.2d, v20.2s
-; CHECK-BE-NEXT: uaddw v2.2d, v5.2d, v2.2s
-; CHECK-BE-NEXT: st1 { v17.2d }, [x14]
+; CHECK-BE-NEXT: st1 { v5.2d }, [x12]
+; CHECK-BE-NEXT: uaddw v5.2d, v22.2d, v17.2s
+; CHECK-BE-NEXT: st1 { v3.2d }, [x11]
+; CHECK-BE-NEXT: uaddw v3.2d, v18.2d, v20.2s
+; CHECK-BE-NEXT: uaddw v2.2d, v7.2d, v2.2s
; CHECK-BE-NEXT: st1 { v4.2d }, [x16]
-; CHECK-BE-NEXT: st1 { v6.2d }, [x12]
-; CHECK-BE-NEXT: st1 { v3.2d }, [x13]
+; CHECK-BE-NEXT: st1 { v5.2d }, [x13]
+; CHECK-BE-NEXT: st1 { v3.2d }, [x14]
; CHECK-BE-NEXT: st1 { v2.2d }, [x17]
; CHECK-BE-NEXT: b.ne .LBB17_1
; CHECK-BE-NEXT: // %bb.2: // %exit
@@ -1813,14 +1812,14 @@ exit:
define void @zext_v16i8_to_v16i64_in_sequence_in_loop(ptr %src, ptr %dst) {
; CHECK-LABEL: zext_v16i8_to_v16i64_in_sequence_in_loop:
; CHECK: ; %bb.0: ; %entry
-; CHECK-NEXT: mov x8, xzr
+; CHECK-NEXT: mov w8, #128 ; =0x80
; CHECK-NEXT: add x9, x1, #128
+; CHECK-NEXT: add x10, x0, #16
; CHECK-NEXT: LBB18_1: ; %loop
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: add x10, x0, x8
-; CHECK-NEXT: add x8, x8, #16
-; CHECK-NEXT: ldp q0, q1, [x10]
-; CHECK-NEXT: cmp x8, #128
+; CHECK-NEXT: ldp q0, q1, [x10, #-16]
+; CHECK-NEXT: subs x8, x8, #16
+; CHECK-NEXT: add x10, x10, #16
; CHECK-NEXT: ushll2.8h v2, v0, #0
; CHECK-NEXT: ushll.8h v0, v0, #0
; CHECK-NEXT: ushll2.8h v6, v1, #0
@@ -1863,18 +1862,18 @@ define void @zext_v16i8_to_v16i64_in_sequence_in_loop(ptr %src, ptr %dst) {
;
; CHECK-BE-LABEL: zext_v16i8_to_v16i64_in_sequence_in_loop:
; CHECK-BE: // %bb.0: // %entry
-; CHECK-BE-NEXT: mov x8, xzr
+; CHECK-BE-NEXT: mov w8, #128 // =0x80
; CHECK-BE-NEXT: add x9, x1, #128
+; CHECK-BE-NEXT: add x10, x0, #16
; CHECK-BE-NEXT: .LBB18_1: // %loop
; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-BE-NEXT: add x10, x0, x8
-; CHECK-BE-NEXT: sub x11, x9, #32
-; CHECK-BE-NEXT: add x8, x8, #16
-; CHECK-BE-NEXT: ld1 { v0.16b }, [x10]
-; CHECK-BE-NEXT: add x10, x10, #16
-; CHECK-BE-NEXT: cmp x8, #128
+; CHECK-BE-NEXT: sub x11, x10, #16
; CHECK-BE-NEXT: ld1 { v5.16b }, [x10]
-; CHECK-BE-NEXT: sub x10, x9, #16
+; CHECK-BE-NEXT: sub x12, x9, #32
+; CHECK-BE-NEXT: ld1 { v0.16b }, [x11]
+; CHECK-BE-NEXT: sub x11, x9, #16
+; CHECK-BE-NEXT: subs x8, x8, #16
+; CHECK-BE-NEXT: add x10, x10, #16
; CHECK-BE-NEXT: ushll2 v1.8h, v0.16b, #0
; CHECK-BE-NEXT: ushll v0.8h, v0.8b, #0
; CHECK-BE-NEXT: ushll2 v2.4s, v1.8h, #0
@@ -1885,54 +1884,54 @@ define void @zext_v16i8_to_v16i64_in_sequence_in_loop(ptr %src, ptr %dst) {
; CHECK-BE-NEXT: ushll v2.2d, v2.2s, #0
; CHECK-BE-NEXT: ushll2 v6.2d, v1.4s, #0
; CHECK-BE-NEXT: ushll v1.2d, v1.2s, #0
-; CHECK-BE-NEXT: st1 { v4.2d }, [x10]
+; CHECK-BE-NEXT: st1 { v4.2d }, [x11]
; CHECK-BE-NEXT: ushll2 v4.2d, v3.4s, #0
; CHECK-BE-NEXT: ushll v3.2d, v3.2s, #0
-; CHECK-BE-NEXT: st1 { v2.2d }, [x11]
+; CHECK-BE-NEXT: st1 { v2.2d }, [x12]
; CHECK-BE-NEXT: ushll2 v2.8h, v5.16b, #0
-; CHECK-BE-NEXT: sub x11, x9, #80
-; CHECK-BE-NEXT: sub x10, x9, #48
-; CHECK-BE-NEXT: st1 { v4.2d }, [x11]
+; CHECK-BE-NEXT: sub x12, x9, #80
+; CHECK-BE-NEXT: sub x11, x9, #48
+; CHECK-BE-NEXT: st1 { v4.2d }, [x12]
; CHECK-BE-NEXT: ushll v4.8h, v5.8b, #0
-; CHECK-BE-NEXT: sub x11, x9, #64
+; CHECK-BE-NEXT: sub x12, x9, #64
; CHECK-BE-NEXT: ushll2 v5.4s, v2.8h, #0
-; CHECK-BE-NEXT: st1 { v1.2d }, [x11]
-; CHECK-BE-NEXT: sub x11, x9, #96
+; CHECK-BE-NEXT: st1 { v1.2d }, [x12]
+; CHECK-BE-NEXT: sub x12, x9, #96
; CHECK-BE-NEXT: ushll2 v1.2d, v0.4s, #0
; CHECK-BE-NEXT: ushll v2.4s, v2.4h, #0
; CHECK-BE-NEXT: ushll v0.2d, v0.2s, #0
-; CHECK-BE-NEXT: st1 { v6.2d }, [x10]
-; CHECK-BE-NEXT: sub x10, x9, #128
-; CHECK-BE-NEXT: st1 { v3.2d }, [x11]
+; CHECK-BE-NEXT: st1 { v6.2d }, [x11]
+; CHECK-BE-NEXT: sub x11, x9, #128
+; CHECK-BE-NEXT: st1 { v3.2d }, [x12]
; CHECK-BE-NEXT: ushll2 v3.4s, v4.8h, #0
; CHECK-BE-NEXT: ushll2 v6.2d, v5.4s, #0
-; CHECK-BE-NEXT: sub x11, x9, #112
+; CHECK-BE-NEXT: sub x12, x9, #112
; CHECK-BE-NEXT: ushll v5.2d, v5.2s, #0
-; CHECK-BE-NEXT: st1 { v0.2d }, [x10]
-; CHECK-BE-NEXT: st1 { v1.2d }, [x11]
+; CHECK-BE-NEXT: st1 { v0.2d }, [x11]
+; CHECK-BE-NEXT: st1 { v1.2d }, [x12]
; CHECK-BE-NEXT: ushll2 v1.2d, v2.4s, #0
-; CHECK-BE-NEXT: add x10, x9, #112
+; CHECK-BE-NEXT: add x11, x9, #112
; CHECK-BE-NEXT: ushll v4.4s, v4.4h, #0
; CHECK-BE-NEXT: ushll2 v0.2d, v3.4s, #0
-; CHECK-BE-NEXT: st1 { v6.2d }, [x10]
-; CHECK-BE-NEXT: add x10, x9, #96
+; CHECK-BE-NEXT: st1 { v6.2d }, [x11]
+; CHECK-BE-NEXT: add x11, x9, #96
; CHECK-BE-NEXT: ushll v2.2d, v2.2s, #0
; CHECK-BE-NEXT: ushll v3.2d, v3.2s, #0
-; CHECK-BE-NEXT: st1 { v5.2d }, [x10]
-; CHECK-BE-NEXT: add x10, x9, #80
-; CHECK-BE-NEXT: st1 { v1.2d }, [x10]
-; CHECK-BE-NEXT: add x10, x9, #48
+; CHECK-BE-NEXT: st1 { v5.2d }, [x11]
+; CHECK-BE-NEXT: add x11, x9, #80
+; CHECK-BE-NEXT: st1 { v1.2d }, [x11]
+; CHECK-BE-NEXT: add x11, x9, #48
; CHECK-BE-NEXT: ushll2 v1.2d, v4.4s, #0
-; CHECK-BE-NEXT: st1 { v0.2d }, [x10]
+; CHECK-BE-NEXT: st1 { v0.2d }, [x11]
; CHECK-BE-NEXT: ushll v0.2d, v4.2s, #0
-; CHECK-BE-NEXT: add x10, x9, #64
-; CHECK-BE-NEXT: st1 { v2.2d }, [x10]
-; CHECK-BE-NEXT: add x10, x9, #32
-; CHECK-BE-NEXT: st1 { v3.2d }, [x10]
-; CHECK-BE-NEXT: add x10, x9, #16
+; CHECK-BE-NEXT: add x11, x9, #64
+; CHECK-BE-NEXT: st1 { v2.2d }, [x11]
+; CHECK-BE-NEXT: add x11, x9, #32
+; CHECK-BE-NEXT: st1 { v3.2d }, [x11]
+; CHECK-BE-NEXT: add x11, x9, #16
; CHECK-BE-NEXT: st1 { v0.2d }, [x9]
; CHECK-BE-NEXT: add x9, x9, #128
-; CHECK-BE-NEXT: st1 { v1.2d }, [x10]
+; CHECK-BE-NEXT: st1 { v1.2d }, [x11]
; CHECK-BE-NEXT: b.ne .LBB18_1
; CHECK-BE-NEXT: // %bb.2: // %exit
; CHECK-BE-NEXT: ret
diff --git a/llvm/test/CodeGen/AMDGPU/directive-amdgcn-target.ll b/llvm/test/CodeGen/AMDGPU/directive-amdgcn-target.ll
index c347437c3082..40d77a7f51e9 100644
--- a/llvm/test/CodeGen/AMDGPU/directive-amdgcn-target.ll
+++ b/llvm/test/CodeGen/AMDGPU/directive-amdgcn-target.ll
@@ -105,6 +105,7 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1103 < %s | FileCheck --check-prefixes=GFX1103 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1150 < %s | FileCheck --check-prefixes=GFX1150 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1151 < %s | FileCheck --check-prefixes=GFX1151 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1152 < %s | FileCheck --check-prefixes=GFX1152 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 < %s | FileCheck --check-prefixes=GFX1200 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1201 < %s | FileCheck --check-prefixes=GFX1201 %s
@@ -201,6 +202,7 @@
; GFX1103: .amdgcn_target "amdgcn-amd-amdhsa--gfx1103"
; GFX1150: .amdgcn_target "amdgcn-amd-amdhsa--gfx1150"
; GFX1151: .amdgcn_target "amdgcn-amd-amdhsa--gfx1151"
+; GFX1152: .amdgcn_target "amdgcn-amd-amdhsa--gfx1152"
; GFX1200: .amdgcn_target "amdgcn-amd-amdhsa--gfx1200"
; GFX1201: .amdgcn_target "amdgcn-amd-amdhsa--gfx1201"
diff --git a/llvm/test/CodeGen/AMDGPU/elf-header-flags-mach.ll b/llvm/test/CodeGen/AMDGPU/elf-header-flags-mach.ll
index edc20153ffd7..560a05abd5e7 100644
--- a/llvm/test/CodeGen/AMDGPU/elf-header-flags-mach.ll
+++ b/llvm/test/CodeGen/AMDGPU/elf-header-flags-mach.ll
@@ -74,6 +74,7 @@
; RUN: llc -filetype=obj -mtriple=amdgcn -mcpu=gfx1103 < %s | llvm-readobj --file-header - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX1103 %s
; RUN: llc -filetype=obj -mtriple=amdgcn -mcpu=gfx1150 < %s | llvm-readobj --file-header - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX1150 %s
; RUN: llc -filetype=obj -mtriple=amdgcn -mcpu=gfx1151 < %s | llvm-readobj --file-header - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX1151 %s
+; RUN: llc -filetype=obj -mtriple=amdgcn -mcpu=gfx1152 < %s | llvm-readobj --file-header - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX1152 %s
; RUN: llc -filetype=obj -mtriple=amdgcn -mcpu=gfx1200 < %s | llvm-readobj --file-header - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX1200 %s
; RUN: llc -filetype=obj -mtriple=amdgcn -mcpu=gfx1201 < %s | llvm-readobj --file-header - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX1201 %s
@@ -153,6 +154,7 @@
; GFX1103: EF_AMDGPU_MACH_AMDGCN_GFX1103 (0x44)
; GFX1150: EF_AMDGPU_MACH_AMDGCN_GFX1150 (0x43)
; GFX1151: EF_AMDGPU_MACH_AMDGCN_GFX1151 (0x4A)
+; GFX1152: EF_AMDGPU_MACH_AMDGCN_GFX1152 (0x55)
; GFX1200: EF_AMDGPU_MACH_AMDGCN_GFX1200 (0x48)
; GFX1201: EF_AMDGPU_MACH_AMDGCN_GFX1201 (0x4E)
diff --git a/llvm/test/CodeGen/AMDGPU/expand-variadic-call.ll b/llvm/test/CodeGen/AMDGPU/expand-variadic-call.ll
new file mode 100644
index 000000000000..ce55558dabaf
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/expand-variadic-call.ll
@@ -0,0 +1,545 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: -p --function-signature
+; RUN: opt -S --passes=expand-variadics --expand-variadics-override=lowering < %s | FileCheck %s
+; REQUIRES: amdgpu-registered-target
+target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9"
+target triple = "amdgcn-amd-amdhsa"
+
+; Check the variables are lowered to the locations this target expects
+
+; The types show the call frames
+; CHECK: %single_i32.vararg = type <{ i32 }>
+; CHECK: %single_double.vararg = type <{ double }>
+; CHECK: %single_v4f32.vararg = type <{ <4 x float> }>
+; CHECK: %single_v8f32.vararg = type <{ <8 x float> }>
+; CHECK: %single_v16f32.vararg = type <{ <16 x float> }>
+; CHECK: %single_v32f32.vararg = type <{ <32 x float> }>
+; CHECK: %i32_double.vararg = type <{ i32, double }>
+; CHECK: %double_i32.vararg = type <{ double, i32 }>
+; CHECK: %i32_libcS.vararg = type <{ i32, %struct.libcS }>
+; CHECK: %libcS_i32.vararg = type <{ %struct.libcS, i32 }>
+; CHECK: %i32_v4f32.vararg = type <{ i32, <4 x float> }>
+; CHECK: %v4f32_i32.vararg = type <{ <4 x float>, i32 }>
+; CHECK: %i32_v8f32.vararg = type <{ i32, <8 x float> }>
+; CHECK: %v8f32_i32.vararg = type <{ <8 x float>, i32 }>
+; CHECK: %i32_v16f32.vararg = type <{ i32, <16 x float> }>
+; CHECK: %v16f32_i32.vararg = type <{ <16 x float>, i32 }>
+; CHECK: %i32_v32f32.vararg = type <{ i32, <32 x float> }>
+; CHECK: %v32f32_i32.vararg = type <{ <32 x float>, i32 }>
+; CHECK: %fptr_single_i32.vararg = type <{ i32 }>
+; CHECK: %fptr_libcS.vararg = type <{ %struct.libcS }>
+
+%struct.libcS = type { i8, i16, i32, i64, float, double }
+
+@vararg_ptr = hidden addrspace(1) global ptr @vararg, align 8
+
+define hidden void @copy(ptr noundef %va) {
+; CHECK-LABEL: define {{[^@]+}}@copy(ptr noundef %va) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %va.addr = alloca ptr, align 8, addrspace(5)
+; CHECK-NEXT: %cp = alloca ptr, align 8, addrspace(5)
+; CHECK-NEXT: %va.addr.ascast = addrspacecast ptr addrspace(5) %va.addr to ptr
+; CHECK-NEXT: %cp.ascast = addrspacecast ptr addrspace(5) %cp to ptr
+; CHECK-NEXT: store ptr %va, ptr addrspace(5) %va.addr, align 8
+; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) %cp)
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr %cp.ascast, ptr %va.addr.ascast, i32 8, i1 false)
+; CHECK-NEXT: %0 = load ptr, ptr addrspace(5) %cp, align 8
+; CHECK-NEXT: call void @valist(ptr noundef %0)
+; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) %cp)
+; CHECK-NEXT: ret void
+;
+entry:
+ %va.addr = alloca ptr, align 8, addrspace(5)
+ %cp = alloca ptr, align 8, addrspace(5)
+ %va.addr.ascast = addrspacecast ptr addrspace(5) %va.addr to ptr
+ %cp.ascast = addrspacecast ptr addrspace(5) %cp to ptr
+ store ptr %va, ptr addrspace(5) %va.addr, align 8
+ call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) %cp)
+ call void @llvm.va_copy.p0(ptr %cp.ascast, ptr nonnull %va.addr.ascast)
+ %0 = load ptr, ptr addrspace(5) %cp, align 8
+ call void @valist(ptr noundef %0)
+ call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) %cp)
+ ret void
+}
+
+declare void @llvm.lifetime.start.p5(i64 immarg, ptr addrspace(5) nocapture)
+
+declare void @llvm.va_copy.p0(ptr, ptr)
+
+declare hidden void @valist(ptr noundef)
+
+declare void @llvm.lifetime.end.p5(i64 immarg, ptr addrspace(5) nocapture)
+
+define hidden void @start_once(...) {
+; CHECK-LABEL: define {{[^@]+}}@start_once(ptr %varargs) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %s = alloca ptr, align 8, addrspace(5)
+; CHECK-NEXT: %s.ascast = addrspacecast ptr addrspace(5) %s to ptr
+; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) %s)
+; CHECK-NEXT: store ptr %varargs, ptr %s.ascast, align 8
+; CHECK-NEXT: %0 = load ptr, ptr addrspace(5) %s, align 8
+; CHECK-NEXT: call void @valist(ptr noundef %0)
+; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) %s)
+; CHECK-NEXT: ret void
+;
+entry:
+ %s = alloca ptr, align 8, addrspace(5)
+ %s.ascast = addrspacecast ptr addrspace(5) %s to ptr
+ call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) %s)
+ call void @llvm.va_start.p0(ptr %s.ascast)
+ %0 = load ptr, ptr addrspace(5) %s, align 8
+ call void @valist(ptr noundef %0)
+ call void @llvm.va_end.p0(ptr %s.ascast)
+ call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) %s)
+ ret void
+}
+
+declare void @llvm.va_start.p0(ptr)
+
+declare void @llvm.va_end.p0(ptr)
+
+define hidden void @start_twice(...) {
+; CHECK-LABEL: define {{[^@]+}}@start_twice(ptr %varargs) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %s0 = alloca ptr, align 8, addrspace(5)
+; CHECK-NEXT: %s1 = alloca ptr, align 8, addrspace(5)
+; CHECK-NEXT: %s0.ascast = addrspacecast ptr addrspace(5) %s0 to ptr
+; CHECK-NEXT: %s1.ascast = addrspacecast ptr addrspace(5) %s1 to ptr
+; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) %s0)
+; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) %s1)
+; CHECK-NEXT: store ptr %varargs, ptr %s0.ascast, align 8
+; CHECK-NEXT: %0 = load ptr, ptr addrspace(5) %s0, align 8
+; CHECK-NEXT: call void @valist(ptr noundef %0)
+; CHECK-NEXT: store ptr %varargs, ptr %s1.ascast, align 8
+; CHECK-NEXT: %1 = load ptr, ptr addrspace(5) %s1, align 8
+; CHECK-NEXT: call void @valist(ptr noundef %1)
+; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) %s1)
+; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) %s0)
+; CHECK-NEXT: ret void
+;
+entry:
+ %s0 = alloca ptr, align 8, addrspace(5)
+ %s1 = alloca ptr, align 8, addrspace(5)
+ %s0.ascast = addrspacecast ptr addrspace(5) %s0 to ptr
+ %s1.ascast = addrspacecast ptr addrspace(5) %s1 to ptr
+ call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) %s0)
+ call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) %s1)
+ call void @llvm.va_start.p0(ptr %s0.ascast)
+ %0 = load ptr, ptr addrspace(5) %s0, align 8
+ call void @valist(ptr noundef %0)
+ call void @llvm.va_end.p0(ptr %s0.ascast)
+ call void @llvm.va_start.p0(ptr %s1.ascast)
+ %1 = load ptr, ptr addrspace(5) %s1, align 8
+ call void @valist(ptr noundef %1)
+ call void @llvm.va_end.p0(ptr %s1.ascast)
+ call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) %s1)
+ call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) %s0)
+ ret void
+}
+
+define hidden void @single_i32(i32 noundef %x) {
+; CHECK-LABEL: define {{[^@]+}}@single_i32(i32 noundef %x) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %single_i32.vararg, align 4, addrspace(5)
+; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) %vararg_buffer)
+; CHECK-NEXT: %0 = getelementptr inbounds %single_i32.vararg, ptr addrspace(5) %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store i32 %x, ptr addrspace(5) %0, align 4
+; CHECK-NEXT: %1 = addrspacecast ptr addrspace(5) %vararg_buffer to ptr
+; CHECK-NEXT: call void @vararg(ptr %1)
+; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) %vararg_buffer)
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(i32 noundef %x)
+ ret void
+}
+
+declare hidden void @vararg(...)
+
+define hidden void @single_double(double noundef %x) {
+; CHECK-LABEL: define {{[^@]+}}@single_double(double noundef %x) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %single_double.vararg, align 4, addrspace(5)
+; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) %vararg_buffer)
+; CHECK-NEXT: %0 = getelementptr inbounds %single_double.vararg, ptr addrspace(5) %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store double %x, ptr addrspace(5) %0, align 8
+; CHECK-NEXT: %1 = addrspacecast ptr addrspace(5) %vararg_buffer to ptr
+; CHECK-NEXT: call void @vararg(ptr %1)
+; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) %vararg_buffer)
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(double noundef %x)
+ ret void
+}
+
+define hidden void @single_v4f32(<4 x float> noundef %x) {
+; CHECK-LABEL: define {{[^@]+}}@single_v4f32(<4 x float> noundef %x) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %single_v4f32.vararg, align 4, addrspace(5)
+; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 16, ptr addrspace(5) %vararg_buffer)
+; CHECK-NEXT: %0 = getelementptr inbounds %single_v4f32.vararg, ptr addrspace(5) %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store <4 x float> %x, ptr addrspace(5) %0, align 16
+; CHECK-NEXT: %1 = addrspacecast ptr addrspace(5) %vararg_buffer to ptr
+; CHECK-NEXT: call void @vararg(ptr %1)
+; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 16, ptr addrspace(5) %vararg_buffer)
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(<4 x float> noundef %x)
+ ret void
+}
+
+define hidden void @single_v8f32(<8 x float> noundef %x) {
+; CHECK-LABEL: define {{[^@]+}}@single_v8f32(<8 x float> noundef %x) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %single_v8f32.vararg, align 4, addrspace(5)
+; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 32, ptr addrspace(5) %vararg_buffer)
+; CHECK-NEXT: %0 = getelementptr inbounds %single_v8f32.vararg, ptr addrspace(5) %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store <8 x float> %x, ptr addrspace(5) %0, align 32
+; CHECK-NEXT: %1 = addrspacecast ptr addrspace(5) %vararg_buffer to ptr
+; CHECK-NEXT: call void @vararg(ptr %1)
+; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 32, ptr addrspace(5) %vararg_buffer)
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(<8 x float> noundef %x)
+ ret void
+}
+
+define hidden void @single_v16f32(<16 x float> noundef %x) {
+; CHECK-LABEL: define {{[^@]+}}@single_v16f32(<16 x float> noundef %x) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %single_v16f32.vararg, align 4, addrspace(5)
+; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 64, ptr addrspace(5) %vararg_buffer)
+; CHECK-NEXT: %0 = getelementptr inbounds %single_v16f32.vararg, ptr addrspace(5) %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store <16 x float> %x, ptr addrspace(5) %0, align 64
+; CHECK-NEXT: %1 = addrspacecast ptr addrspace(5) %vararg_buffer to ptr
+; CHECK-NEXT: call void @vararg(ptr %1)
+; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 64, ptr addrspace(5) %vararg_buffer)
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(<16 x float> noundef %x)
+ ret void
+}
+
+define hidden void @single_v32f32(<32 x float> noundef %x) {
+; CHECK-LABEL: define {{[^@]+}}@single_v32f32(<32 x float> noundef %x) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %single_v32f32.vararg, align 4, addrspace(5)
+; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 128, ptr addrspace(5) %vararg_buffer)
+; CHECK-NEXT: %0 = getelementptr inbounds %single_v32f32.vararg, ptr addrspace(5) %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store <32 x float> %x, ptr addrspace(5) %0, align 128
+; CHECK-NEXT: %1 = addrspacecast ptr addrspace(5) %vararg_buffer to ptr
+; CHECK-NEXT: call void @vararg(ptr %1)
+; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 128, ptr addrspace(5) %vararg_buffer)
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(<32 x float> noundef %x)
+ ret void
+}
+
+define hidden void @i32_double(i32 noundef %x, double noundef %y) {
+; CHECK-LABEL: define {{[^@]+}}@i32_double(i32 noundef %x, double noundef %y) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %i32_double.vararg, align 4, addrspace(5)
+; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 12, ptr addrspace(5) %vararg_buffer)
+; CHECK-NEXT: %0 = getelementptr inbounds %i32_double.vararg, ptr addrspace(5) %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store i32 %x, ptr addrspace(5) %0, align 4
+; CHECK-NEXT: %1 = getelementptr inbounds %i32_double.vararg, ptr addrspace(5) %vararg_buffer, i32 0, i32 1
+; CHECK-NEXT: store double %y, ptr addrspace(5) %1, align 8
+; CHECK-NEXT: %2 = addrspacecast ptr addrspace(5) %vararg_buffer to ptr
+; CHECK-NEXT: call void @vararg(ptr %2)
+; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 12, ptr addrspace(5) %vararg_buffer)
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(i32 noundef %x, double noundef %y)
+ ret void
+}
+
+define hidden void @double_i32(double noundef %x, i32 noundef %y) {
+; CHECK-LABEL: define {{[^@]+}}@double_i32(double noundef %x, i32 noundef %y) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %double_i32.vararg, align 4, addrspace(5)
+; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 12, ptr addrspace(5) %vararg_buffer)
+; CHECK-NEXT: %0 = getelementptr inbounds %double_i32.vararg, ptr addrspace(5) %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store double %x, ptr addrspace(5) %0, align 8
+; CHECK-NEXT: %1 = getelementptr inbounds %double_i32.vararg, ptr addrspace(5) %vararg_buffer, i32 0, i32 1
+; CHECK-NEXT: store i32 %y, ptr addrspace(5) %1, align 4
+; CHECK-NEXT: %2 = addrspacecast ptr addrspace(5) %vararg_buffer to ptr
+; CHECK-NEXT: call void @vararg(ptr %2)
+; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 12, ptr addrspace(5) %vararg_buffer)
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(double noundef %x, i32 noundef %y)
+ ret void
+}
+
+define hidden void @i32_libcS(i32 noundef %x, i8 %y.coerce0, i16 %y.coerce1, i32 %y.coerce2, i64 %y.coerce3, float %y.coerce4, double %y.coerce5) {
+; CHECK-LABEL: define {{[^@]+}}@i32_libcS(i32 noundef %x, i8 %y.coerce0, i16 %y.coerce1, i32 %y.coerce2, i64 %y.coerce3, float %y.coerce4, double %y.coerce5) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %i32_libcS.vararg, align 4, addrspace(5)
+; CHECK-NEXT: %.fca.0.insert = insertvalue %struct.libcS poison, i8 %y.coerce0, 0
+; CHECK-NEXT: %.fca.1.insert = insertvalue %struct.libcS %.fca.0.insert, i16 %y.coerce1, 1
+; CHECK-NEXT: %.fca.2.insert = insertvalue %struct.libcS %.fca.1.insert, i32 %y.coerce2, 2
+; CHECK-NEXT: %.fca.3.insert = insertvalue %struct.libcS %.fca.2.insert, i64 %y.coerce3, 3
+; CHECK-NEXT: %.fca.4.insert = insertvalue %struct.libcS %.fca.3.insert, float %y.coerce4, 4
+; CHECK-NEXT: %.fca.5.insert = insertvalue %struct.libcS %.fca.4.insert, double %y.coerce5, 5
+; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 36, ptr addrspace(5) %vararg_buffer)
+; CHECK-NEXT: %0 = getelementptr inbounds %i32_libcS.vararg, ptr addrspace(5) %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store i32 %x, ptr addrspace(5) %0, align 4
+; CHECK-NEXT: %1 = getelementptr inbounds %i32_libcS.vararg, ptr addrspace(5) %vararg_buffer, i32 0, i32 1
+; CHECK-NEXT: store %struct.libcS %.fca.5.insert, ptr addrspace(5) %1, align 8
+; CHECK-NEXT: %2 = addrspacecast ptr addrspace(5) %vararg_buffer to ptr
+; CHECK-NEXT: call void @vararg(ptr %2)
+; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 36, ptr addrspace(5) %vararg_buffer)
+; CHECK-NEXT: ret void
+;
+entry:
+ %.fca.0.insert = insertvalue %struct.libcS poison, i8 %y.coerce0, 0
+ %.fca.1.insert = insertvalue %struct.libcS %.fca.0.insert, i16 %y.coerce1, 1
+ %.fca.2.insert = insertvalue %struct.libcS %.fca.1.insert, i32 %y.coerce2, 2
+ %.fca.3.insert = insertvalue %struct.libcS %.fca.2.insert, i64 %y.coerce3, 3
+ %.fca.4.insert = insertvalue %struct.libcS %.fca.3.insert, float %y.coerce4, 4
+ %.fca.5.insert = insertvalue %struct.libcS %.fca.4.insert, double %y.coerce5, 5
+ tail call void (...) @vararg(i32 noundef %x, %struct.libcS %.fca.5.insert)
+ ret void
+}
+
+define hidden void @libcS_i32(i8 %x.coerce0, i16 %x.coerce1, i32 %x.coerce2, i64 %x.coerce3, float %x.coerce4, double %x.coerce5, i32 noundef %y) {
+; CHECK-LABEL: define {{[^@]+}}@libcS_i32(i8 %x.coerce0, i16 %x.coerce1, i32 %x.coerce2, i64 %x.coerce3, float %x.coerce4, double %x.coerce5, i32 noundef %y) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %libcS_i32.vararg, align 4, addrspace(5)
+; CHECK-NEXT: %.fca.0.insert = insertvalue %struct.libcS poison, i8 %x.coerce0, 0
+; CHECK-NEXT: %.fca.1.insert = insertvalue %struct.libcS %.fca.0.insert, i16 %x.coerce1, 1
+; CHECK-NEXT: %.fca.2.insert = insertvalue %struct.libcS %.fca.1.insert, i32 %x.coerce2, 2
+; CHECK-NEXT: %.fca.3.insert = insertvalue %struct.libcS %.fca.2.insert, i64 %x.coerce3, 3
+; CHECK-NEXT: %.fca.4.insert = insertvalue %struct.libcS %.fca.3.insert, float %x.coerce4, 4
+; CHECK-NEXT: %.fca.5.insert = insertvalue %struct.libcS %.fca.4.insert, double %x.coerce5, 5
+; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 36, ptr addrspace(5) %vararg_buffer)
+; CHECK-NEXT: %0 = getelementptr inbounds %libcS_i32.vararg, ptr addrspace(5) %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store %struct.libcS %.fca.5.insert, ptr addrspace(5) %0, align 8
+; CHECK-NEXT: %1 = getelementptr inbounds %libcS_i32.vararg, ptr addrspace(5) %vararg_buffer, i32 0, i32 1
+; CHECK-NEXT: store i32 %y, ptr addrspace(5) %1, align 4
+; CHECK-NEXT: %2 = addrspacecast ptr addrspace(5) %vararg_buffer to ptr
+; CHECK-NEXT: call void @vararg(ptr %2)
+; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 36, ptr addrspace(5) %vararg_buffer)
+; CHECK-NEXT: ret void
+;
+entry:
+ %.fca.0.insert = insertvalue %struct.libcS poison, i8 %x.coerce0, 0
+ %.fca.1.insert = insertvalue %struct.libcS %.fca.0.insert, i16 %x.coerce1, 1
+ %.fca.2.insert = insertvalue %struct.libcS %.fca.1.insert, i32 %x.coerce2, 2
+ %.fca.3.insert = insertvalue %struct.libcS %.fca.2.insert, i64 %x.coerce3, 3
+ %.fca.4.insert = insertvalue %struct.libcS %.fca.3.insert, float %x.coerce4, 4
+ %.fca.5.insert = insertvalue %struct.libcS %.fca.4.insert, double %x.coerce5, 5
+ tail call void (...) @vararg(%struct.libcS %.fca.5.insert, i32 noundef %y)
+ ret void
+}
+
+define hidden void @i32_v4f32(i32 noundef %x, <4 x float> noundef %y) {
+; CHECK-LABEL: define {{[^@]+}}@i32_v4f32(i32 noundef %x, <4 x float> noundef %y) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %i32_v4f32.vararg, align 4, addrspace(5)
+; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 20, ptr addrspace(5) %vararg_buffer)
+; CHECK-NEXT: %0 = getelementptr inbounds %i32_v4f32.vararg, ptr addrspace(5) %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store i32 %x, ptr addrspace(5) %0, align 4
+; CHECK-NEXT: %1 = getelementptr inbounds %i32_v4f32.vararg, ptr addrspace(5) %vararg_buffer, i32 0, i32 1
+; CHECK-NEXT: store <4 x float> %y, ptr addrspace(5) %1, align 16
+; CHECK-NEXT: %2 = addrspacecast ptr addrspace(5) %vararg_buffer to ptr
+; CHECK-NEXT: call void @vararg(ptr %2)
+; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 20, ptr addrspace(5) %vararg_buffer)
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(i32 noundef %x, <4 x float> noundef %y)
+ ret void
+}
+
+define hidden void @v4f32_i32(<4 x float> noundef %x, i32 noundef %y) {
+; CHECK-LABEL: define {{[^@]+}}@v4f32_i32(<4 x float> noundef %x, i32 noundef %y) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %v4f32_i32.vararg, align 4, addrspace(5)
+; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 20, ptr addrspace(5) %vararg_buffer)
+; CHECK-NEXT: %0 = getelementptr inbounds %v4f32_i32.vararg, ptr addrspace(5) %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store <4 x float> %x, ptr addrspace(5) %0, align 16
+; CHECK-NEXT: %1 = getelementptr inbounds %v4f32_i32.vararg, ptr addrspace(5) %vararg_buffer, i32 0, i32 1
+; CHECK-NEXT: store i32 %y, ptr addrspace(5) %1, align 4
+; CHECK-NEXT: %2 = addrspacecast ptr addrspace(5) %vararg_buffer to ptr
+; CHECK-NEXT: call void @vararg(ptr %2)
+; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 20, ptr addrspace(5) %vararg_buffer)
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(<4 x float> noundef %x, i32 noundef %y)
+ ret void
+}
+
+define hidden void @i32_v8f32(i32 noundef %x, <8 x float> noundef %y) {
+; CHECK-LABEL: define {{[^@]+}}@i32_v8f32(i32 noundef %x, <8 x float> noundef %y) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %i32_v8f32.vararg, align 4, addrspace(5)
+; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 36, ptr addrspace(5) %vararg_buffer)
+; CHECK-NEXT: %0 = getelementptr inbounds %i32_v8f32.vararg, ptr addrspace(5) %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store i32 %x, ptr addrspace(5) %0, align 4
+; CHECK-NEXT: %1 = getelementptr inbounds %i32_v8f32.vararg, ptr addrspace(5) %vararg_buffer, i32 0, i32 1
+; CHECK-NEXT: store <8 x float> %y, ptr addrspace(5) %1, align 32
+; CHECK-NEXT: %2 = addrspacecast ptr addrspace(5) %vararg_buffer to ptr
+; CHECK-NEXT: call void @vararg(ptr %2)
+; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 36, ptr addrspace(5) %vararg_buffer)
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(i32 noundef %x, <8 x float> noundef %y)
+ ret void
+}
+
+define hidden void @v8f32_i32(<8 x float> noundef %x, i32 noundef %y) {
+; CHECK-LABEL: define {{[^@]+}}@v8f32_i32(<8 x float> noundef %x, i32 noundef %y) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %v8f32_i32.vararg, align 4, addrspace(5)
+; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 36, ptr addrspace(5) %vararg_buffer)
+; CHECK-NEXT: %0 = getelementptr inbounds %v8f32_i32.vararg, ptr addrspace(5) %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store <8 x float> %x, ptr addrspace(5) %0, align 32
+; CHECK-NEXT: %1 = getelementptr inbounds %v8f32_i32.vararg, ptr addrspace(5) %vararg_buffer, i32 0, i32 1
+; CHECK-NEXT: store i32 %y, ptr addrspace(5) %1, align 4
+; CHECK-NEXT: %2 = addrspacecast ptr addrspace(5) %vararg_buffer to ptr
+; CHECK-NEXT: call void @vararg(ptr %2)
+; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 36, ptr addrspace(5) %vararg_buffer)
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(<8 x float> noundef %x, i32 noundef %y)
+ ret void
+}
+
+define hidden void @i32_v16f32(i32 noundef %x, <16 x float> noundef %y) {
+; CHECK-LABEL: define {{[^@]+}}@i32_v16f32(i32 noundef %x, <16 x float> noundef %y) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %i32_v16f32.vararg, align 4, addrspace(5)
+; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 68, ptr addrspace(5) %vararg_buffer)
+; CHECK-NEXT: %0 = getelementptr inbounds %i32_v16f32.vararg, ptr addrspace(5) %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store i32 %x, ptr addrspace(5) %0, align 4
+; CHECK-NEXT: %1 = getelementptr inbounds %i32_v16f32.vararg, ptr addrspace(5) %vararg_buffer, i32 0, i32 1
+; CHECK-NEXT: store <16 x float> %y, ptr addrspace(5) %1, align 64
+; CHECK-NEXT: %2 = addrspacecast ptr addrspace(5) %vararg_buffer to ptr
+; CHECK-NEXT: call void @vararg(ptr %2)
+; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 68, ptr addrspace(5) %vararg_buffer)
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(i32 noundef %x, <16 x float> noundef %y)
+ ret void
+}
+
+define hidden void @v16f32_i32(<16 x float> noundef %x, i32 noundef %y) {
+; CHECK-LABEL: define {{[^@]+}}@v16f32_i32(<16 x float> noundef %x, i32 noundef %y) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %v16f32_i32.vararg, align 4, addrspace(5)
+; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 68, ptr addrspace(5) %vararg_buffer)
+; CHECK-NEXT: %0 = getelementptr inbounds %v16f32_i32.vararg, ptr addrspace(5) %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store <16 x float> %x, ptr addrspace(5) %0, align 64
+; CHECK-NEXT: %1 = getelementptr inbounds %v16f32_i32.vararg, ptr addrspace(5) %vararg_buffer, i32 0, i32 1
+; CHECK-NEXT: store i32 %y, ptr addrspace(5) %1, align 4
+; CHECK-NEXT: %2 = addrspacecast ptr addrspace(5) %vararg_buffer to ptr
+; CHECK-NEXT: call void @vararg(ptr %2)
+; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 68, ptr addrspace(5) %vararg_buffer)
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(<16 x float> noundef %x, i32 noundef %y)
+ ret void
+}
+
+define hidden void @i32_v32f32(i32 noundef %x, <32 x float> noundef %y) {
+; CHECK-LABEL: define {{[^@]+}}@i32_v32f32(i32 noundef %x, <32 x float> noundef %y) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %i32_v32f32.vararg, align 4, addrspace(5)
+; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 132, ptr addrspace(5) %vararg_buffer)
+; CHECK-NEXT: %0 = getelementptr inbounds %i32_v32f32.vararg, ptr addrspace(5) %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store i32 %x, ptr addrspace(5) %0, align 4
+; CHECK-NEXT: %1 = getelementptr inbounds %i32_v32f32.vararg, ptr addrspace(5) %vararg_buffer, i32 0, i32 1
+; CHECK-NEXT: store <32 x float> %y, ptr addrspace(5) %1, align 128
+; CHECK-NEXT: %2 = addrspacecast ptr addrspace(5) %vararg_buffer to ptr
+; CHECK-NEXT: call void @vararg(ptr %2)
+; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 132, ptr addrspace(5) %vararg_buffer)
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(i32 noundef %x, <32 x float> noundef %y)
+ ret void
+}
+
+define hidden void @v32f32_i32(<32 x float> noundef %x, i32 noundef %y) {
+; CHECK-LABEL: define {{[^@]+}}@v32f32_i32(<32 x float> noundef %x, i32 noundef %y) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %v32f32_i32.vararg, align 4, addrspace(5)
+; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 132, ptr addrspace(5) %vararg_buffer)
+; CHECK-NEXT: %0 = getelementptr inbounds %v32f32_i32.vararg, ptr addrspace(5) %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store <32 x float> %x, ptr addrspace(5) %0, align 128
+; CHECK-NEXT: %1 = getelementptr inbounds %v32f32_i32.vararg, ptr addrspace(5) %vararg_buffer, i32 0, i32 1
+; CHECK-NEXT: store i32 %y, ptr addrspace(5) %1, align 4
+; CHECK-NEXT: %2 = addrspacecast ptr addrspace(5) %vararg_buffer to ptr
+; CHECK-NEXT: call void @vararg(ptr %2)
+; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 132, ptr addrspace(5) %vararg_buffer)
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(<32 x float> noundef %x, i32 noundef %y)
+ ret void
+}
+
+define hidden void @fptr_single_i32(i32 noundef %x) {
+; CHECK-LABEL: define {{[^@]+}}@fptr_single_i32(i32 noundef %x) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %fptr_single_i32.vararg, align 4, addrspace(5)
+; CHECK-NEXT: %0 = load volatile ptr, ptr addrspacecast (ptr addrspace(1) @vararg_ptr to ptr), align 8
+; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) %vararg_buffer)
+; CHECK-NEXT: %1 = getelementptr inbounds %fptr_single_i32.vararg, ptr addrspace(5) %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store i32 %x, ptr addrspace(5) %1, align 4
+; CHECK-NEXT: %2 = addrspacecast ptr addrspace(5) %vararg_buffer to ptr
+; CHECK-NEXT: call void %0(ptr %2)
+; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) %vararg_buffer)
+; CHECK-NEXT: ret void
+;
+entry:
+ %0 = load volatile ptr, ptr addrspacecast (ptr addrspace(1) @vararg_ptr to ptr), align 8
+ tail call void (...) %0(i32 noundef %x)
+ ret void
+}
+
+define hidden void @fptr_libcS(i8 %x.coerce0, i16 %x.coerce1, i32 %x.coerce2, i64 %x.coerce3, float %x.coerce4, double %x.coerce5) {
+; CHECK-LABEL: define {{[^@]+}}@fptr_libcS(i8 %x.coerce0, i16 %x.coerce1, i32 %x.coerce2, i64 %x.coerce3, float %x.coerce4, double %x.coerce5) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %fptr_libcS.vararg, align 4, addrspace(5)
+; CHECK-NEXT: %0 = load volatile ptr, ptr addrspacecast (ptr addrspace(1) @vararg_ptr to ptr), align 8
+; CHECK-NEXT: %.fca.0.insert = insertvalue %struct.libcS poison, i8 %x.coerce0, 0
+; CHECK-NEXT: %.fca.1.insert = insertvalue %struct.libcS %.fca.0.insert, i16 %x.coerce1, 1
+; CHECK-NEXT: %.fca.2.insert = insertvalue %struct.libcS %.fca.1.insert, i32 %x.coerce2, 2
+; CHECK-NEXT: %.fca.3.insert = insertvalue %struct.libcS %.fca.2.insert, i64 %x.coerce3, 3
+; CHECK-NEXT: %.fca.4.insert = insertvalue %struct.libcS %.fca.3.insert, float %x.coerce4, 4
+; CHECK-NEXT: %.fca.5.insert = insertvalue %struct.libcS %.fca.4.insert, double %x.coerce5, 5
+; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 32, ptr addrspace(5) %vararg_buffer)
+; CHECK-NEXT: %1 = getelementptr inbounds %fptr_libcS.vararg, ptr addrspace(5) %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store %struct.libcS %.fca.5.insert, ptr addrspace(5) %1, align 8
+; CHECK-NEXT: %2 = addrspacecast ptr addrspace(5) %vararg_buffer to ptr
+; CHECK-NEXT: call void %0(ptr %2)
+; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 32, ptr addrspace(5) %vararg_buffer)
+; CHECK-NEXT: ret void
+;
+entry:
+ %0 = load volatile ptr, ptr addrspacecast (ptr addrspace(1) @vararg_ptr to ptr), align 8
+ %.fca.0.insert = insertvalue %struct.libcS poison, i8 %x.coerce0, 0
+ %.fca.1.insert = insertvalue %struct.libcS %.fca.0.insert, i16 %x.coerce1, 1
+ %.fca.2.insert = insertvalue %struct.libcS %.fca.1.insert, i32 %x.coerce2, 2
+ %.fca.3.insert = insertvalue %struct.libcS %.fca.2.insert, i64 %x.coerce3, 3
+ %.fca.4.insert = insertvalue %struct.libcS %.fca.3.insert, float %x.coerce4, 4
+ %.fca.5.insert = insertvalue %struct.libcS %.fca.4.insert, double %x.coerce5, 5
+ tail call void (...) %0(%struct.libcS %.fca.5.insert)
+ ret void
+}
+
+
diff --git a/llvm/test/CodeGen/AMDGPU/fmaximum3.ll b/llvm/test/CodeGen/AMDGPU/fmaximum3.ll
index 3ec36f03a48a..9ce1ba3316dd 100644
--- a/llvm/test/CodeGen/AMDGPU/fmaximum3.ll
+++ b/llvm/test/CodeGen/AMDGPU/fmaximum3.ll
@@ -497,47 +497,19 @@ define <2 x float> @v_fmaximum3_v2f32(<2 x float> %a, <2 x float> %b, <2 x float
; GFX9-LABEL: v_fmaximum3_v2f32:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v1, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v6, v3, v1, vcc
+; GFX9-NEXT: v_max_f32_e32 v6, v1, v3
; GFX9-NEXT: v_mov_b32_e32 v7, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v6, v7, v6, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v1, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v6, v1, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v3, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v6
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v6, v1, vcc
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v2, v0, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc
+; GFX9-NEXT: v_max_f32_e32 v3, v0, v2
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v7, v3, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v0, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v2, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v4, v0
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v0, v4, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v3, vcc
+; GFX9-NEXT: v_max_f32_e32 v2, v4, v0
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v4, v0
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v7, v2, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v4, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v2, v4, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v0, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v2
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v5, v1
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v1, v5, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc
+; GFX9-NEXT: v_max_f32_e32 v2, v5, v1
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v5, v1
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v7, v2, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v5, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v2, v5, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v1, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v2
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call <2 x float> @llvm.maximum.v2f32(<2 x float> %a, <2 x float> %b)
%max1 = call <2 x float> @llvm.maximum.v2f32(<2 x float> %c, <2 x float> %max0)
@@ -559,47 +531,19 @@ define <2 x float> @v_fmaximum3_v2f32_commute(<2 x float> %a, <2 x float> %b, <2
; GFX9-LABEL: v_fmaximum3_v2f32_commute:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v1, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v6, v3, v1, vcc
+; GFX9-NEXT: v_max_f32_e32 v6, v1, v3
; GFX9-NEXT: v_mov_b32_e32 v7, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v6, v7, v6, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v1, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v6, v1, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v3, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v6
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v6, v1, vcc
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v2, v0, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc
+; GFX9-NEXT: v_max_f32_e32 v3, v0, v2
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v7, v3, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v0, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v2, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v0, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v4, v0, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v3, vcc
+; GFX9-NEXT: v_max_f32_e32 v2, v0, v4
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v7, v2, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v0, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v4, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v2
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v1, v5
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v5, v1, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc
+; GFX9-NEXT: v_max_f32_e32 v2, v1, v5
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v5
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v7, v2, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v1, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v5, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v2
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call <2 x float> @llvm.maximum.v2f32(<2 x float> %a, <2 x float> %b)
%max1 = call <2 x float> @llvm.maximum.v2f32(<2 x float> %max0, <2 x float> %c)
@@ -621,47 +565,19 @@ define <2 x float> @v_fmaximum3_v2f32__fabs_all(<2 x float> %a, <2 x float> %b,
; GFX9-LABEL: v_fmaximum3_v2f32__fabs_all:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_cmp_gt_f32_e64 vcc, |v1|, |v3|
-; GFX9-NEXT: v_cndmask_b32_e32 v6, v3, v1, vcc
+; GFX9-NEXT: v_max_f32_e64 v6, |v1|, |v3|
; GFX9-NEXT: v_mov_b32_e32 v7, 0x7fc00000
-; GFX9-NEXT: v_cmp_o_f32_e64 s[4:5], |v1|, |v3|
-; GFX9-NEXT: v_cndmask_b32_e64 v6, v7, |v6|, s[4:5]
-; GFX9-NEXT: v_cmp_class_f32_e64 s[4:5], |v1|, 64
-; GFX9-NEXT: v_cndmask_b32_e64 v1, v6, |v1|, s[4:5]
-; GFX9-NEXT: v_cmp_class_f32_e64 s[4:5], |v3|, 64
-; GFX9-NEXT: v_cndmask_b32_e64 v1, v1, |v3|, s[4:5]
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v6
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v6, v1, vcc
-; GFX9-NEXT: v_cmp_gt_f32_e64 vcc, |v0|, |v2|
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v2, v0, vcc
-; GFX9-NEXT: v_cmp_o_f32_e64 s[4:5], |v0|, |v2|
-; GFX9-NEXT: v_cndmask_b32_e64 v3, v7, |v3|, s[4:5]
-; GFX9-NEXT: v_cmp_class_f32_e64 s[4:5], |v0|, 64
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v3, |v0|, s[4:5]
-; GFX9-NEXT: v_cmp_class_f32_e64 s[4:5], |v2|, 64
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v0, |v2|, s[4:5]
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc
-; GFX9-NEXT: v_cmp_gt_f32_e64 s[4:5], v0, |v4|
-; GFX9-NEXT: v_cndmask_b32_e64 v2, |v4|, v0, s[4:5]
+; GFX9-NEXT: v_cmp_o_f32_e64 vcc, |v1|, |v3|
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc
+; GFX9-NEXT: v_max_f32_e64 v3, |v0|, |v2|
+; GFX9-NEXT: v_cmp_o_f32_e64 vcc, |v0|, |v2|
+; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v3, vcc
+; GFX9-NEXT: v_max_f32_e64 v2, v0, |v4|
; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v0, |v4|
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v7, v2, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v0, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 s[4:5], |v4|, 64
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v0, |v4|, s[4:5]
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v2
-; GFX9-NEXT: v_cmp_gt_f32_e64 s[4:5], v1, |v5|
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v2, |v5|, v1, s[4:5]
+; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc
+; GFX9-NEXT: v_max_f32_e64 v2, v1, |v5|
; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v1, |v5|
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v7, v2, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v1, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 s[4:5], |v5|, 64
-; GFX9-NEXT: v_cndmask_b32_e64 v1, v1, |v5|, s[4:5]
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v2
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%a.fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %a)
%b.fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %b)
@@ -686,47 +602,19 @@ define <2 x float> @v_fmaximum3_v2f32__fneg_all(<2 x float> %a, <2 x float> %b,
; GFX9-LABEL: v_fmaximum3_v2f32__fneg_all:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_cmp_gt_f32_e64 vcc, -v1, -v3
-; GFX9-NEXT: v_cndmask_b32_e32 v6, v3, v1, vcc
+; GFX9-NEXT: v_max_f32_e64 v6, -v1, -v3
; GFX9-NEXT: v_mov_b32_e32 v7, 0x7fc00000
-; GFX9-NEXT: v_cmp_o_f32_e64 s[4:5], -v1, -v3
-; GFX9-NEXT: v_cndmask_b32_e64 v6, v7, -v6, s[4:5]
-; GFX9-NEXT: v_cmp_class_f32_e64 s[4:5], -v1, 64
-; GFX9-NEXT: v_cndmask_b32_e64 v1, v6, -v1, s[4:5]
-; GFX9-NEXT: v_cmp_class_f32_e64 s[4:5], -v3, 64
-; GFX9-NEXT: v_cndmask_b32_e64 v1, v1, -v3, s[4:5]
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v6
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v6, v1, vcc
-; GFX9-NEXT: v_cmp_gt_f32_e64 vcc, -v0, -v2
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v2, v0, vcc
-; GFX9-NEXT: v_cmp_o_f32_e64 s[4:5], -v0, -v2
-; GFX9-NEXT: v_cndmask_b32_e64 v3, v7, -v3, s[4:5]
-; GFX9-NEXT: v_cmp_class_f32_e64 s[4:5], -v0, 64
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v3, -v0, s[4:5]
-; GFX9-NEXT: v_cmp_class_f32_e64 s[4:5], -v2, 64
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v0, -v2, s[4:5]
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc
-; GFX9-NEXT: v_cmp_gt_f32_e64 s[4:5], v0, -v4
-; GFX9-NEXT: v_cndmask_b32_e64 v2, -v4, v0, s[4:5]
+; GFX9-NEXT: v_cmp_o_f32_e64 vcc, -v1, -v3
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc
+; GFX9-NEXT: v_max_f32_e64 v3, -v0, -v2
+; GFX9-NEXT: v_cmp_o_f32_e64 vcc, -v0, -v2
+; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v3, vcc
+; GFX9-NEXT: v_max_f32_e64 v2, v0, -v4
; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v0, -v4
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v7, v2, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v0, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 s[4:5], -v4, 64
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v0, -v4, s[4:5]
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v2
-; GFX9-NEXT: v_cmp_gt_f32_e64 s[4:5], v1, -v5
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v2, -v5, v1, s[4:5]
+; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc
+; GFX9-NEXT: v_max_f32_e64 v2, v1, -v5
; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v1, -v5
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v7, v2, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v1, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 s[4:5], -v5, 64
-; GFX9-NEXT: v_cndmask_b32_e64 v1, v1, -v5, s[4:5]
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v2
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%a.fneg = fneg <2 x float> %a
%b.fneg = fneg <2 x float> %b
@@ -751,35 +639,19 @@ define <2 x float> @v_fmaximum3_v2f32__inlineimm1(<2 x float> %a, <2 x float> %c
; GFX9-LABEL: v_fmaximum3_v2f32__inlineimm1:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, 2.0, v1
-; GFX9-NEXT: v_cndmask_b32_e32 v4, 2.0, v1, vcc
+; GFX9-NEXT: v_max_f32_e32 v4, 2.0, v1
; GFX9-NEXT: v_mov_b32_e32 v5, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, 2.0, v0
-; GFX9-NEXT: v_cndmask_b32_e32 v4, 2.0, v0, vcc
+; GFX9-NEXT: v_max_f32_e32 v4, 2.0, v0
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v0
; GFX9-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v2, v0, vcc
+; GFX9-NEXT: v_max_f32_e32 v4, v0, v2
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v5, v4, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v0, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v2, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v1, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v3, v1, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc
+; GFX9-NEXT: v_max_f32_e32 v2, v1, v3
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v5, v2, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v1, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v3, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v2
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call <2 x float> @llvm.maximum.v2f32(<2 x float> %a, <2 x float> <float 2.0, float 2.0>)
%max1 = call <2 x float> @llvm.maximum.v2f32(<2 x float> %max0, <2 x float> %c)
@@ -801,33 +673,17 @@ define <2 x float> @v_fmaximum3_v2f32__inlineimm2(<2 x float> %a, <2 x float> %b
; GFX9-LABEL: v_fmaximum3_v2f32__inlineimm2:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v1, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v3, v1, vcc
+; GFX9-NEXT: v_max_f32_e32 v4, v1, v3
; GFX9-NEXT: v_mov_b32_e32 v5, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v5, v4, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v1, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v3, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v2, v0, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc
+; GFX9-NEXT: v_max_f32_e32 v3, v0, v2
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v5, v3, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v0, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v2, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, 4.0, v0
-; GFX9-NEXT: v_cndmask_b32_e32 v2, 4.0, v0, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v0, v5, v3, vcc
+; GFX9-NEXT: v_max_f32_e32 v2, 4.0, v0
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v0
; GFX9-NEXT: v_cndmask_b32_e32 v0, v5, v2, vcc
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, 4.0, v1
-; GFX9-NEXT: v_cndmask_b32_e32 v2, 4.0, v1, vcc
+; GFX9-NEXT: v_max_f32_e32 v2, 4.0, v1
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
@@ -852,67 +708,25 @@ define <3 x float> @v_fmaximum3_v3f32(<3 x float> %a, <3 x float> %b, <3 x float
; GFX9-LABEL: v_fmaximum3_v3f32:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v2, v5
-; GFX9-NEXT: v_cndmask_b32_e32 v9, v5, v2, vcc
+; GFX9-NEXT: v_max_f32_e32 v9, v2, v5
; GFX9-NEXT: v_mov_b32_e32 v10, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v2, v5
-; GFX9-NEXT: v_cndmask_b32_e32 v9, v10, v9, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v2, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v9, v2, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v5, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v9
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v9, v2, vcc
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v1, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v5, v4, v1, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v2, v10, v9, vcc
+; GFX9-NEXT: v_max_f32_e32 v5, v1, v4
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v5, v10, v5, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v1, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v4, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v5
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v0, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v3, v0, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v10, v5, vcc
+; GFX9-NEXT: v_max_f32_e32 v4, v0, v3
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v10, v4, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v0, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v3, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v6, v0
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v0, v6, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v0, v10, v4, vcc
+; GFX9-NEXT: v_max_f32_e32 v3, v6, v0
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v6, v0
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v10, v3, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v6, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v3, v6, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v0, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v7, v1
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v1, v7, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v0, v10, v3, vcc
+; GFX9-NEXT: v_max_f32_e32 v3, v7, v1
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v7, v1
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v10, v3, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v7, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v3, v7, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v1, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v8, v2
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v2, v8, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v10, v3, vcc
+; GFX9-NEXT: v_max_f32_e32 v3, v8, v2
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v8, v2
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v10, v3, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v8, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v3, v8, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v2, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v2, v10, v3, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call <3 x float> @llvm.maximum.v3f32(<3 x float> %a, <3 x float> %b)
%max1 = call <3 x float> @llvm.maximum.v3f32(<3 x float> %c, <3 x float> %max0)
@@ -935,67 +749,25 @@ define <3 x float> @v_fmaximum3_v3f32_commute(<3 x float> %a, <3 x float> %b, <3
; GFX9-LABEL: v_fmaximum3_v3f32_commute:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v2, v5
-; GFX9-NEXT: v_cndmask_b32_e32 v9, v5, v2, vcc
+; GFX9-NEXT: v_max_f32_e32 v9, v2, v5
; GFX9-NEXT: v_mov_b32_e32 v10, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v2, v5
-; GFX9-NEXT: v_cndmask_b32_e32 v9, v10, v9, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v2, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v9, v2, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v5, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v9
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v9, v2, vcc
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v1, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v5, v4, v1, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v2, v10, v9, vcc
+; GFX9-NEXT: v_max_f32_e32 v5, v1, v4
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v5, v10, v5, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v1, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v4, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v5
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v0, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v3, v0, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v10, v5, vcc
+; GFX9-NEXT: v_max_f32_e32 v4, v0, v3
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v10, v4, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v0, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v3, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v0, v6
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v6, v0, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v0, v10, v4, vcc
+; GFX9-NEXT: v_max_f32_e32 v3, v0, v6
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v6
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v10, v3, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v0, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v6, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v1, v7
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v7, v1, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v0, v10, v3, vcc
+; GFX9-NEXT: v_max_f32_e32 v3, v1, v7
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v7
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v10, v3, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v1, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v7, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v2, v8
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v8, v2, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v10, v3, vcc
+; GFX9-NEXT: v_max_f32_e32 v3, v2, v8
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v2, v8
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v10, v3, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v2, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v8, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v8, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v2, v10, v3, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call <3 x float> @llvm.maximum.v3f32(<3 x float> %a, <3 x float> %b)
%max1 = call <3 x float> @llvm.maximum.v3f32(<3 x float> %max0, <3 x float> %c)
@@ -1018,67 +790,25 @@ define <3 x float> @v_fmaximum3_v3f32__fabs_all(<3 x float> %a, <3 x float> %b,
; GFX9-LABEL: v_fmaximum3_v3f32__fabs_all:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_cmp_gt_f32_e64 vcc, |v2|, |v5|
-; GFX9-NEXT: v_cndmask_b32_e32 v9, v5, v2, vcc
+; GFX9-NEXT: v_max_f32_e64 v9, |v2|, |v5|
; GFX9-NEXT: v_mov_b32_e32 v10, 0x7fc00000
-; GFX9-NEXT: v_cmp_o_f32_e64 s[4:5], |v2|, |v5|
-; GFX9-NEXT: v_cndmask_b32_e64 v9, v10, |v9|, s[4:5]
-; GFX9-NEXT: v_cmp_class_f32_e64 s[4:5], |v2|, 64
-; GFX9-NEXT: v_cndmask_b32_e64 v2, v9, |v2|, s[4:5]
-; GFX9-NEXT: v_cmp_class_f32_e64 s[4:5], |v5|, 64
-; GFX9-NEXT: v_cndmask_b32_e64 v2, v2, |v5|, s[4:5]
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v9
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v9, v2, vcc
-; GFX9-NEXT: v_cmp_gt_f32_e64 vcc, |v1|, |v4|
-; GFX9-NEXT: v_cndmask_b32_e32 v5, v4, v1, vcc
-; GFX9-NEXT: v_cmp_o_f32_e64 s[4:5], |v1|, |v4|
-; GFX9-NEXT: v_cndmask_b32_e64 v5, v10, |v5|, s[4:5]
-; GFX9-NEXT: v_cmp_class_f32_e64 s[4:5], |v1|, 64
-; GFX9-NEXT: v_cndmask_b32_e64 v1, v5, |v1|, s[4:5]
-; GFX9-NEXT: v_cmp_class_f32_e64 s[4:5], |v4|, 64
-; GFX9-NEXT: v_cndmask_b32_e64 v1, v1, |v4|, s[4:5]
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v5
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
-; GFX9-NEXT: v_cmp_gt_f32_e64 vcc, |v0|, |v3|
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v3, v0, vcc
-; GFX9-NEXT: v_cmp_o_f32_e64 s[4:5], |v0|, |v3|
-; GFX9-NEXT: v_cndmask_b32_e64 v4, v10, |v4|, s[4:5]
-; GFX9-NEXT: v_cmp_class_f32_e64 s[4:5], |v0|, 64
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v4, |v0|, s[4:5]
-; GFX9-NEXT: v_cmp_class_f32_e64 s[4:5], |v3|, 64
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v0, |v3|, s[4:5]
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
-; GFX9-NEXT: v_cmp_gt_f32_e64 s[4:5], v0, |v6|
-; GFX9-NEXT: v_cndmask_b32_e64 v3, |v6|, v0, s[4:5]
+; GFX9-NEXT: v_cmp_o_f32_e64 vcc, |v2|, |v5|
+; GFX9-NEXT: v_cndmask_b32_e32 v2, v10, v9, vcc
+; GFX9-NEXT: v_max_f32_e64 v5, |v1|, |v4|
+; GFX9-NEXT: v_cmp_o_f32_e64 vcc, |v1|, |v4|
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v10, v5, vcc
+; GFX9-NEXT: v_max_f32_e64 v4, |v0|, |v3|
+; GFX9-NEXT: v_cmp_o_f32_e64 vcc, |v0|, |v3|
+; GFX9-NEXT: v_cndmask_b32_e32 v0, v10, v4, vcc
+; GFX9-NEXT: v_max_f32_e64 v3, v0, |v6|
; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v0, |v6|
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v10, v3, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v0, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 s[4:5], |v6|, 64
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v0, |v6|, s[4:5]
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
-; GFX9-NEXT: v_cmp_gt_f32_e64 s[4:5], v1, |v7|
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v3, |v7|, v1, s[4:5]
+; GFX9-NEXT: v_cndmask_b32_e32 v0, v10, v3, vcc
+; GFX9-NEXT: v_max_f32_e64 v3, v1, |v7|
; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v1, |v7|
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v10, v3, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v1, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 s[4:5], |v7|, 64
-; GFX9-NEXT: v_cndmask_b32_e64 v1, v1, |v7|, s[4:5]
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
-; GFX9-NEXT: v_cmp_gt_f32_e64 s[4:5], v2, |v8|
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v3, |v8|, v2, s[4:5]
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v10, v3, vcc
+; GFX9-NEXT: v_max_f32_e64 v3, v2, |v8|
; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v2, |v8|
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v10, v3, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v2, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 s[4:5], |v8|, 64
-; GFX9-NEXT: v_cndmask_b32_e64 v2, v2, |v8|, s[4:5]
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v2, v10, v3, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%a.fabs = call <3 x float> @llvm.fabs.v3f32(<3 x float> %a)
%b.fabs = call <3 x float> @llvm.fabs.v3f32(<3 x float> %b)
@@ -1104,67 +834,25 @@ define <3 x float> @v_fmaximum3_v3f32__fneg_all(<3 x float> %a, <3 x float> %b,
; GFX9-LABEL: v_fmaximum3_v3f32__fneg_all:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_cmp_gt_f32_e64 vcc, -v2, -v5
-; GFX9-NEXT: v_cndmask_b32_e32 v9, v5, v2, vcc
+; GFX9-NEXT: v_max_f32_e64 v9, -v2, -v5
; GFX9-NEXT: v_mov_b32_e32 v10, 0x7fc00000
-; GFX9-NEXT: v_cmp_o_f32_e64 s[4:5], -v2, -v5
-; GFX9-NEXT: v_cndmask_b32_e64 v9, v10, -v9, s[4:5]
-; GFX9-NEXT: v_cmp_class_f32_e64 s[4:5], -v2, 64
-; GFX9-NEXT: v_cndmask_b32_e64 v2, v9, -v2, s[4:5]
-; GFX9-NEXT: v_cmp_class_f32_e64 s[4:5], -v5, 64
-; GFX9-NEXT: v_cndmask_b32_e64 v2, v2, -v5, s[4:5]
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v9
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v9, v2, vcc
-; GFX9-NEXT: v_cmp_gt_f32_e64 vcc, -v1, -v4
-; GFX9-NEXT: v_cndmask_b32_e32 v5, v4, v1, vcc
-; GFX9-NEXT: v_cmp_o_f32_e64 s[4:5], -v1, -v4
-; GFX9-NEXT: v_cndmask_b32_e64 v5, v10, -v5, s[4:5]
-; GFX9-NEXT: v_cmp_class_f32_e64 s[4:5], -v1, 64
-; GFX9-NEXT: v_cndmask_b32_e64 v1, v5, -v1, s[4:5]
-; GFX9-NEXT: v_cmp_class_f32_e64 s[4:5], -v4, 64
-; GFX9-NEXT: v_cndmask_b32_e64 v1, v1, -v4, s[4:5]
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v5
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
-; GFX9-NEXT: v_cmp_gt_f32_e64 vcc, -v0, -v3
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v3, v0, vcc
-; GFX9-NEXT: v_cmp_o_f32_e64 s[4:5], -v0, -v3
-; GFX9-NEXT: v_cndmask_b32_e64 v4, v10, -v4, s[4:5]
-; GFX9-NEXT: v_cmp_class_f32_e64 s[4:5], -v0, 64
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v4, -v0, s[4:5]
-; GFX9-NEXT: v_cmp_class_f32_e64 s[4:5], -v3, 64
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v0, -v3, s[4:5]
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
-; GFX9-NEXT: v_cmp_gt_f32_e64 s[4:5], v0, -v6
-; GFX9-NEXT: v_cndmask_b32_e64 v3, -v6, v0, s[4:5]
+; GFX9-NEXT: v_cmp_o_f32_e64 vcc, -v2, -v5
+; GFX9-NEXT: v_cndmask_b32_e32 v2, v10, v9, vcc
+; GFX9-NEXT: v_max_f32_e64 v5, -v1, -v4
+; GFX9-NEXT: v_cmp_o_f32_e64 vcc, -v1, -v4
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v10, v5, vcc
+; GFX9-NEXT: v_max_f32_e64 v4, -v0, -v3
+; GFX9-NEXT: v_cmp_o_f32_e64 vcc, -v0, -v3
+; GFX9-NEXT: v_cndmask_b32_e32 v0, v10, v4, vcc
+; GFX9-NEXT: v_max_f32_e64 v3, v0, -v6
; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v0, -v6
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v10, v3, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v0, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 s[4:5], -v6, 64
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v0, -v6, s[4:5]
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
-; GFX9-NEXT: v_cmp_gt_f32_e64 s[4:5], v1, -v7
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v3, -v7, v1, s[4:5]
+; GFX9-NEXT: v_cndmask_b32_e32 v0, v10, v3, vcc
+; GFX9-NEXT: v_max_f32_e64 v3, v1, -v7
; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v1, -v7
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v10, v3, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v1, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 s[4:5], -v7, 64
-; GFX9-NEXT: v_cndmask_b32_e64 v1, v1, -v7, s[4:5]
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
-; GFX9-NEXT: v_cmp_gt_f32_e64 s[4:5], v2, -v8
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v3, -v8, v2, s[4:5]
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v10, v3, vcc
+; GFX9-NEXT: v_max_f32_e64 v3, v2, -v8
; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v2, -v8
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v10, v3, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v2, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 s[4:5], -v8, 64
-; GFX9-NEXT: v_cndmask_b32_e64 v2, v2, -v8, s[4:5]
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v2, v10, v3, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%a.fneg = fneg <3 x float> %a
%b.fneg = fneg <3 x float> %b
@@ -1190,49 +878,25 @@ define <3 x float> @v_fmaximum3_v3f32__inlineimm1(<3 x float> %a, <3 x float> %c
; GFX9-LABEL: v_fmaximum3_v3f32__inlineimm1:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, 2.0, v2
-; GFX9-NEXT: v_cndmask_b32_e32 v6, 2.0, v2, vcc
+; GFX9-NEXT: v_max_f32_e32 v6, 2.0, v2
; GFX9-NEXT: v_mov_b32_e32 v7, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v2, v2
; GFX9-NEXT: v_cndmask_b32_e32 v2, v7, v6, vcc
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, 2.0, v1
-; GFX9-NEXT: v_cndmask_b32_e32 v6, 2.0, v1, vcc
+; GFX9-NEXT: v_max_f32_e32 v6, 2.0, v1
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, 2.0, v0
-; GFX9-NEXT: v_cndmask_b32_e32 v6, 2.0, v0, vcc
+; GFX9-NEXT: v_max_f32_e32 v6, 2.0, v0
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v0
; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v0, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v6, v3, v0, vcc
+; GFX9-NEXT: v_max_f32_e32 v6, v0, v3
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v6, v7, v6, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v0, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v3, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v6
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v1, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v4, v1, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
+; GFX9-NEXT: v_max_f32_e32 v3, v1, v4
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v7, v3, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v1, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v4, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v2, v5
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v5, v2, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v3, vcc
+; GFX9-NEXT: v_max_f32_e32 v3, v2, v5
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v2, v5
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v7, v3, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v2, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v5, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call <3 x float> @llvm.maximum.v3f32(<3 x float> %a, <3 x float> <float 2.0, float 2.0, float 2.0>)
%max1 = call <3 x float> @llvm.maximum.v3f32(<3 x float> %max0, <3 x float> %c)
@@ -1255,47 +919,23 @@ define <3 x float> @v_fmaximum3_v3f32__inlineimm2(<3 x float> %a, <3 x float> %b
; GFX9-LABEL: v_fmaximum3_v3f32__inlineimm2:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v2, v5
-; GFX9-NEXT: v_cndmask_b32_e32 v6, v5, v2, vcc
+; GFX9-NEXT: v_max_f32_e32 v6, v2, v5
; GFX9-NEXT: v_mov_b32_e32 v7, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v2, v5
-; GFX9-NEXT: v_cndmask_b32_e32 v6, v7, v6, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v2, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v6, v2, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v5, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v6
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v6, v2, vcc
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v1, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v5, v4, v1, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v2, v7, v6, vcc
+; GFX9-NEXT: v_max_f32_e32 v5, v1, v4
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v5, v7, v5, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v1, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v4, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v5
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v0, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v3, v0, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v5, vcc
+; GFX9-NEXT: v_max_f32_e32 v4, v0, v3
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v7, v4, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v0, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v3, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, 4.0, v0
-; GFX9-NEXT: v_cndmask_b32_e32 v3, 4.0, v0, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v4, vcc
+; GFX9-NEXT: v_max_f32_e32 v3, 4.0, v0
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v0
; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v3, vcc
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, 4.0, v1
-; GFX9-NEXT: v_cndmask_b32_e32 v3, 4.0, v1, vcc
+; GFX9-NEXT: v_max_f32_e32 v3, 4.0, v1
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v3, vcc
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, 4.0, v2
-; GFX9-NEXT: v_cndmask_b32_e32 v3, 4.0, v2, vcc
+; GFX9-NEXT: v_max_f32_e32 v3, 4.0, v2
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v2, v2
; GFX9-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
diff --git a/llvm/test/CodeGen/AMDGPU/fminimum3.ll b/llvm/test/CodeGen/AMDGPU/fminimum3.ll
index 0e0b73b88d2d..21074d58bdb7 100644
--- a/llvm/test/CodeGen/AMDGPU/fminimum3.ll
+++ b/llvm/test/CodeGen/AMDGPU/fminimum3.ll
@@ -497,47 +497,19 @@ define <2 x float> @v_fminimum3_v2f32(<2 x float> %a, <2 x float> %b, <2 x float
; GFX9-LABEL: v_fminimum3_v2f32:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v1, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v6, v3, v1, vcc
+; GFX9-NEXT: v_min_f32_e32 v6, v1, v3
; GFX9-NEXT: v_mov_b32_e32 v7, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v6, v7, v6, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v1, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v6, v1, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v3, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v6
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v6, v1, vcc
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v2, v0, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc
+; GFX9-NEXT: v_min_f32_e32 v3, v0, v2
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v7, v3, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v0, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v2, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v4, v0
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v0, v4, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v3, vcc
+; GFX9-NEXT: v_min_f32_e32 v2, v4, v0
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v4, v0
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v7, v2, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v4, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v2, v4, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v0, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v2
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v5, v1
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v1, v5, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc
+; GFX9-NEXT: v_min_f32_e32 v2, v5, v1
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v5, v1
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v7, v2, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v5, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v2, v5, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v1, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v2
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call <2 x float> @llvm.minimum.v2f32(<2 x float> %a, <2 x float> %b)
%max1 = call <2 x float> @llvm.minimum.v2f32(<2 x float> %c, <2 x float> %max0)
@@ -559,47 +531,19 @@ define <2 x float> @v_fminimum3_v2f32_commute(<2 x float> %a, <2 x float> %b, <2
; GFX9-LABEL: v_fminimum3_v2f32_commute:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v1, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v6, v3, v1, vcc
+; GFX9-NEXT: v_min_f32_e32 v6, v1, v3
; GFX9-NEXT: v_mov_b32_e32 v7, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v6, v7, v6, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v1, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v6, v1, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v3, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v6
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v6, v1, vcc
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v2, v0, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc
+; GFX9-NEXT: v_min_f32_e32 v3, v0, v2
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v7, v3, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v0, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v2, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v0, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v4, v0, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v3, vcc
+; GFX9-NEXT: v_min_f32_e32 v2, v0, v4
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v7, v2, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v0, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v4, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v2
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v1, v5
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v5, v1, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc
+; GFX9-NEXT: v_min_f32_e32 v2, v1, v5
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v5
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v7, v2, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v1, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v5, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v2
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call <2 x float> @llvm.minimum.v2f32(<2 x float> %a, <2 x float> %b)
%max1 = call <2 x float> @llvm.minimum.v2f32(<2 x float> %max0, <2 x float> %c)
@@ -621,47 +565,19 @@ define <2 x float> @v_fminimum3_v2f32__fabs_all(<2 x float> %a, <2 x float> %b,
; GFX9-LABEL: v_fminimum3_v2f32__fabs_all:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_cmp_lt_f32_e64 vcc, |v1|, |v3|
-; GFX9-NEXT: v_cndmask_b32_e32 v6, v3, v1, vcc
+; GFX9-NEXT: v_min_f32_e64 v6, |v1|, |v3|
; GFX9-NEXT: v_mov_b32_e32 v7, 0x7fc00000
-; GFX9-NEXT: v_cmp_o_f32_e64 s[4:5], |v1|, |v3|
-; GFX9-NEXT: v_cndmask_b32_e64 v6, v7, |v6|, s[4:5]
-; GFX9-NEXT: v_cmp_class_f32_e64 s[4:5], |v1|, 32
-; GFX9-NEXT: v_cndmask_b32_e64 v1, v6, |v1|, s[4:5]
-; GFX9-NEXT: v_cmp_class_f32_e64 s[4:5], |v3|, 32
-; GFX9-NEXT: v_cndmask_b32_e64 v1, v1, |v3|, s[4:5]
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v6
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v6, v1, vcc
-; GFX9-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, |v2|
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v2, v0, vcc
-; GFX9-NEXT: v_cmp_o_f32_e64 s[4:5], |v0|, |v2|
-; GFX9-NEXT: v_cndmask_b32_e64 v3, v7, |v3|, s[4:5]
-; GFX9-NEXT: v_cmp_class_f32_e64 s[4:5], |v0|, 32
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v3, |v0|, s[4:5]
-; GFX9-NEXT: v_cmp_class_f32_e64 s[4:5], |v2|, 32
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v0, |v2|, s[4:5]
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc
-; GFX9-NEXT: v_cmp_lt_f32_e64 s[4:5], v0, |v4|
-; GFX9-NEXT: v_cndmask_b32_e64 v2, |v4|, v0, s[4:5]
+; GFX9-NEXT: v_cmp_o_f32_e64 vcc, |v1|, |v3|
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc
+; GFX9-NEXT: v_min_f32_e64 v3, |v0|, |v2|
+; GFX9-NEXT: v_cmp_o_f32_e64 vcc, |v0|, |v2|
+; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v3, vcc
+; GFX9-NEXT: v_min_f32_e64 v2, v0, |v4|
; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v0, |v4|
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v7, v2, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v0, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 s[4:5], |v4|, 32
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v0, |v4|, s[4:5]
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v2
-; GFX9-NEXT: v_cmp_lt_f32_e64 s[4:5], v1, |v5|
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v2, |v5|, v1, s[4:5]
+; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc
+; GFX9-NEXT: v_min_f32_e64 v2, v1, |v5|
; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v1, |v5|
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v7, v2, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v1, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 s[4:5], |v5|, 32
-; GFX9-NEXT: v_cndmask_b32_e64 v1, v1, |v5|, s[4:5]
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v2
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%a.fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %a)
%b.fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %b)
@@ -686,47 +602,19 @@ define <2 x float> @v_fminimum3_v2f32__fneg_all(<2 x float> %a, <2 x float> %b,
; GFX9-LABEL: v_fminimum3_v2f32__fneg_all:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_cmp_lt_f32_e64 vcc, -v1, -v3
-; GFX9-NEXT: v_cndmask_b32_e32 v6, v3, v1, vcc
+; GFX9-NEXT: v_min_f32_e64 v6, -v1, -v3
; GFX9-NEXT: v_mov_b32_e32 v7, 0x7fc00000
-; GFX9-NEXT: v_cmp_o_f32_e64 s[4:5], -v1, -v3
-; GFX9-NEXT: v_cndmask_b32_e64 v6, v7, -v6, s[4:5]
-; GFX9-NEXT: v_cmp_class_f32_e64 s[4:5], -v1, 32
-; GFX9-NEXT: v_cndmask_b32_e64 v1, v6, -v1, s[4:5]
-; GFX9-NEXT: v_cmp_class_f32_e64 s[4:5], -v3, 32
-; GFX9-NEXT: v_cndmask_b32_e64 v1, v1, -v3, s[4:5]
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v6
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v6, v1, vcc
-; GFX9-NEXT: v_cmp_lt_f32_e64 vcc, -v0, -v2
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v2, v0, vcc
-; GFX9-NEXT: v_cmp_o_f32_e64 s[4:5], -v0, -v2
-; GFX9-NEXT: v_cndmask_b32_e64 v3, v7, -v3, s[4:5]
-; GFX9-NEXT: v_cmp_class_f32_e64 s[4:5], -v0, 32
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v3, -v0, s[4:5]
-; GFX9-NEXT: v_cmp_class_f32_e64 s[4:5], -v2, 32
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v0, -v2, s[4:5]
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc
-; GFX9-NEXT: v_cmp_lt_f32_e64 s[4:5], v0, -v4
-; GFX9-NEXT: v_cndmask_b32_e64 v2, -v4, v0, s[4:5]
+; GFX9-NEXT: v_cmp_o_f32_e64 vcc, -v1, -v3
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc
+; GFX9-NEXT: v_min_f32_e64 v3, -v0, -v2
+; GFX9-NEXT: v_cmp_o_f32_e64 vcc, -v0, -v2
+; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v3, vcc
+; GFX9-NEXT: v_min_f32_e64 v2, v0, -v4
; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v0, -v4
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v7, v2, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v0, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 s[4:5], -v4, 32
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v0, -v4, s[4:5]
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v2
-; GFX9-NEXT: v_cmp_lt_f32_e64 s[4:5], v1, -v5
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v2, -v5, v1, s[4:5]
+; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc
+; GFX9-NEXT: v_min_f32_e64 v2, v1, -v5
; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v1, -v5
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v7, v2, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v1, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 s[4:5], -v5, 32
-; GFX9-NEXT: v_cndmask_b32_e64 v1, v1, -v5, s[4:5]
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v2
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%a.fneg = fneg <2 x float> %a
%b.fneg = fneg <2 x float> %b
@@ -751,35 +639,19 @@ define <2 x float> @v_fminimum3_v2f32__inlineimm1(<2 x float> %a, <2 x float> %c
; GFX9-LABEL: v_fminimum3_v2f32__inlineimm1:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, 2.0, v1
-; GFX9-NEXT: v_cndmask_b32_e32 v4, 2.0, v1, vcc
+; GFX9-NEXT: v_min_f32_e32 v4, 2.0, v1
; GFX9-NEXT: v_mov_b32_e32 v5, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, 2.0, v0
-; GFX9-NEXT: v_cndmask_b32_e32 v4, 2.0, v0, vcc
+; GFX9-NEXT: v_min_f32_e32 v4, 2.0, v0
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v0
; GFX9-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v2, v0, vcc
+; GFX9-NEXT: v_min_f32_e32 v4, v0, v2
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v5, v4, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v0, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v2, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v1, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v3, v1, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc
+; GFX9-NEXT: v_min_f32_e32 v2, v1, v3
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v5, v2, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v1, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v3, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v2
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call <2 x float> @llvm.minimum.v2f32(<2 x float> %a, <2 x float> <float 2.0, float 2.0>)
%max1 = call <2 x float> @llvm.minimum.v2f32(<2 x float> %max0, <2 x float> %c)
@@ -801,33 +673,17 @@ define <2 x float> @v_fminimum3_v2f32__inlineimm2(<2 x float> %a, <2 x float> %b
; GFX9-LABEL: v_fminimum3_v2f32__inlineimm2:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v1, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v3, v1, vcc
+; GFX9-NEXT: v_min_f32_e32 v4, v1, v3
; GFX9-NEXT: v_mov_b32_e32 v5, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v5, v4, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v1, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v3, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v2, v0, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc
+; GFX9-NEXT: v_min_f32_e32 v3, v0, v2
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v5, v3, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v0, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v2, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, 4.0, v0
-; GFX9-NEXT: v_cndmask_b32_e32 v2, 4.0, v0, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v0, v5, v3, vcc
+; GFX9-NEXT: v_min_f32_e32 v2, 4.0, v0
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v0
; GFX9-NEXT: v_cndmask_b32_e32 v0, v5, v2, vcc
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, 4.0, v1
-; GFX9-NEXT: v_cndmask_b32_e32 v2, 4.0, v1, vcc
+; GFX9-NEXT: v_min_f32_e32 v2, 4.0, v1
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
@@ -852,67 +708,25 @@ define <3 x float> @v_fminimum3_v3f32(<3 x float> %a, <3 x float> %b, <3 x float
; GFX9-LABEL: v_fminimum3_v3f32:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v2, v5
-; GFX9-NEXT: v_cndmask_b32_e32 v9, v5, v2, vcc
+; GFX9-NEXT: v_min_f32_e32 v9, v2, v5
; GFX9-NEXT: v_mov_b32_e32 v10, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v2, v5
-; GFX9-NEXT: v_cndmask_b32_e32 v9, v10, v9, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v2, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v9, v2, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v5, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v9
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v9, v2, vcc
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v1, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v5, v4, v1, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v2, v10, v9, vcc
+; GFX9-NEXT: v_min_f32_e32 v5, v1, v4
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v5, v10, v5, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v1, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v4, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v5
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v0, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v3, v0, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v10, v5, vcc
+; GFX9-NEXT: v_min_f32_e32 v4, v0, v3
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v10, v4, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v0, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v3, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v6, v0
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v0, v6, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v0, v10, v4, vcc
+; GFX9-NEXT: v_min_f32_e32 v3, v6, v0
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v6, v0
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v10, v3, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v6, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v3, v6, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v0, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v7, v1
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v1, v7, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v0, v10, v3, vcc
+; GFX9-NEXT: v_min_f32_e32 v3, v7, v1
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v7, v1
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v10, v3, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v7, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v3, v7, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v1, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v8, v2
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v2, v8, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v10, v3, vcc
+; GFX9-NEXT: v_min_f32_e32 v3, v8, v2
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v8, v2
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v10, v3, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v8, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v3, v8, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v2, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v2, v10, v3, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call <3 x float> @llvm.minimum.v3f32(<3 x float> %a, <3 x float> %b)
%max1 = call <3 x float> @llvm.minimum.v3f32(<3 x float> %c, <3 x float> %max0)
@@ -935,67 +749,25 @@ define <3 x float> @v_fminimum3_v3f32_commute(<3 x float> %a, <3 x float> %b, <3
; GFX9-LABEL: v_fminimum3_v3f32_commute:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v2, v5
-; GFX9-NEXT: v_cndmask_b32_e32 v9, v5, v2, vcc
+; GFX9-NEXT: v_min_f32_e32 v9, v2, v5
; GFX9-NEXT: v_mov_b32_e32 v10, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v2, v5
-; GFX9-NEXT: v_cndmask_b32_e32 v9, v10, v9, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v2, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v9, v2, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v5, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v9
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v9, v2, vcc
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v1, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v5, v4, v1, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v2, v10, v9, vcc
+; GFX9-NEXT: v_min_f32_e32 v5, v1, v4
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v5, v10, v5, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v1, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v4, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v5
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v0, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v3, v0, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v10, v5, vcc
+; GFX9-NEXT: v_min_f32_e32 v4, v0, v3
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v10, v4, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v0, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v3, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v0, v6
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v6, v0, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v0, v10, v4, vcc
+; GFX9-NEXT: v_min_f32_e32 v3, v0, v6
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v6
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v10, v3, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v0, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v6, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v1, v7
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v7, v1, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v0, v10, v3, vcc
+; GFX9-NEXT: v_min_f32_e32 v3, v1, v7
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v7
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v10, v3, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v1, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v7, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v2, v8
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v8, v2, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v10, v3, vcc
+; GFX9-NEXT: v_min_f32_e32 v3, v2, v8
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v2, v8
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v10, v3, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v2, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v8, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v8, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v2, v10, v3, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call <3 x float> @llvm.minimum.v3f32(<3 x float> %a, <3 x float> %b)
%max1 = call <3 x float> @llvm.minimum.v3f32(<3 x float> %max0, <3 x float> %c)
@@ -1018,67 +790,25 @@ define <3 x float> @v_fminimum3_v3f32__fabs_all(<3 x float> %a, <3 x float> %b,
; GFX9-LABEL: v_fminimum3_v3f32__fabs_all:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_cmp_lt_f32_e64 vcc, |v2|, |v5|
-; GFX9-NEXT: v_cndmask_b32_e32 v9, v5, v2, vcc
+; GFX9-NEXT: v_min_f32_e64 v9, |v2|, |v5|
; GFX9-NEXT: v_mov_b32_e32 v10, 0x7fc00000
-; GFX9-NEXT: v_cmp_o_f32_e64 s[4:5], |v2|, |v5|
-; GFX9-NEXT: v_cndmask_b32_e64 v9, v10, |v9|, s[4:5]
-; GFX9-NEXT: v_cmp_class_f32_e64 s[4:5], |v2|, 32
-; GFX9-NEXT: v_cndmask_b32_e64 v2, v9, |v2|, s[4:5]
-; GFX9-NEXT: v_cmp_class_f32_e64 s[4:5], |v5|, 32
-; GFX9-NEXT: v_cndmask_b32_e64 v2, v2, |v5|, s[4:5]
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v9
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v9, v2, vcc
-; GFX9-NEXT: v_cmp_lt_f32_e64 vcc, |v1|, |v4|
-; GFX9-NEXT: v_cndmask_b32_e32 v5, v4, v1, vcc
-; GFX9-NEXT: v_cmp_o_f32_e64 s[4:5], |v1|, |v4|
-; GFX9-NEXT: v_cndmask_b32_e64 v5, v10, |v5|, s[4:5]
-; GFX9-NEXT: v_cmp_class_f32_e64 s[4:5], |v1|, 32
-; GFX9-NEXT: v_cndmask_b32_e64 v1, v5, |v1|, s[4:5]
-; GFX9-NEXT: v_cmp_class_f32_e64 s[4:5], |v4|, 32
-; GFX9-NEXT: v_cndmask_b32_e64 v1, v1, |v4|, s[4:5]
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v5
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
-; GFX9-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, |v3|
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v3, v0, vcc
-; GFX9-NEXT: v_cmp_o_f32_e64 s[4:5], |v0|, |v3|
-; GFX9-NEXT: v_cndmask_b32_e64 v4, v10, |v4|, s[4:5]
-; GFX9-NEXT: v_cmp_class_f32_e64 s[4:5], |v0|, 32
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v4, |v0|, s[4:5]
-; GFX9-NEXT: v_cmp_class_f32_e64 s[4:5], |v3|, 32
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v0, |v3|, s[4:5]
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
-; GFX9-NEXT: v_cmp_lt_f32_e64 s[4:5], v0, |v6|
-; GFX9-NEXT: v_cndmask_b32_e64 v3, |v6|, v0, s[4:5]
+; GFX9-NEXT: v_cmp_o_f32_e64 vcc, |v2|, |v5|
+; GFX9-NEXT: v_cndmask_b32_e32 v2, v10, v9, vcc
+; GFX9-NEXT: v_min_f32_e64 v5, |v1|, |v4|
+; GFX9-NEXT: v_cmp_o_f32_e64 vcc, |v1|, |v4|
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v10, v5, vcc
+; GFX9-NEXT: v_min_f32_e64 v4, |v0|, |v3|
+; GFX9-NEXT: v_cmp_o_f32_e64 vcc, |v0|, |v3|
+; GFX9-NEXT: v_cndmask_b32_e32 v0, v10, v4, vcc
+; GFX9-NEXT: v_min_f32_e64 v3, v0, |v6|
; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v0, |v6|
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v10, v3, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v0, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 s[4:5], |v6|, 32
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v0, |v6|, s[4:5]
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
-; GFX9-NEXT: v_cmp_lt_f32_e64 s[4:5], v1, |v7|
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v3, |v7|, v1, s[4:5]
+; GFX9-NEXT: v_cndmask_b32_e32 v0, v10, v3, vcc
+; GFX9-NEXT: v_min_f32_e64 v3, v1, |v7|
; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v1, |v7|
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v10, v3, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v1, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 s[4:5], |v7|, 32
-; GFX9-NEXT: v_cndmask_b32_e64 v1, v1, |v7|, s[4:5]
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
-; GFX9-NEXT: v_cmp_lt_f32_e64 s[4:5], v2, |v8|
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v3, |v8|, v2, s[4:5]
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v10, v3, vcc
+; GFX9-NEXT: v_min_f32_e64 v3, v2, |v8|
; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v2, |v8|
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v10, v3, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v2, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 s[4:5], |v8|, 32
-; GFX9-NEXT: v_cndmask_b32_e64 v2, v2, |v8|, s[4:5]
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v2, v10, v3, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%a.fabs = call <3 x float> @llvm.fabs.v3f32(<3 x float> %a)
%b.fabs = call <3 x float> @llvm.fabs.v3f32(<3 x float> %b)
@@ -1104,67 +834,25 @@ define <3 x float> @v_fminimum3_v3f32__fneg_all(<3 x float> %a, <3 x float> %b,
; GFX9-LABEL: v_fminimum3_v3f32__fneg_all:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_cmp_lt_f32_e64 vcc, -v2, -v5
-; GFX9-NEXT: v_cndmask_b32_e32 v9, v5, v2, vcc
+; GFX9-NEXT: v_min_f32_e64 v9, -v2, -v5
; GFX9-NEXT: v_mov_b32_e32 v10, 0x7fc00000
-; GFX9-NEXT: v_cmp_o_f32_e64 s[4:5], -v2, -v5
-; GFX9-NEXT: v_cndmask_b32_e64 v9, v10, -v9, s[4:5]
-; GFX9-NEXT: v_cmp_class_f32_e64 s[4:5], -v2, 32
-; GFX9-NEXT: v_cndmask_b32_e64 v2, v9, -v2, s[4:5]
-; GFX9-NEXT: v_cmp_class_f32_e64 s[4:5], -v5, 32
-; GFX9-NEXT: v_cndmask_b32_e64 v2, v2, -v5, s[4:5]
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v9
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v9, v2, vcc
-; GFX9-NEXT: v_cmp_lt_f32_e64 vcc, -v1, -v4
-; GFX9-NEXT: v_cndmask_b32_e32 v5, v4, v1, vcc
-; GFX9-NEXT: v_cmp_o_f32_e64 s[4:5], -v1, -v4
-; GFX9-NEXT: v_cndmask_b32_e64 v5, v10, -v5, s[4:5]
-; GFX9-NEXT: v_cmp_class_f32_e64 s[4:5], -v1, 32
-; GFX9-NEXT: v_cndmask_b32_e64 v1, v5, -v1, s[4:5]
-; GFX9-NEXT: v_cmp_class_f32_e64 s[4:5], -v4, 32
-; GFX9-NEXT: v_cndmask_b32_e64 v1, v1, -v4, s[4:5]
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v5
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
-; GFX9-NEXT: v_cmp_lt_f32_e64 vcc, -v0, -v3
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v3, v0, vcc
-; GFX9-NEXT: v_cmp_o_f32_e64 s[4:5], -v0, -v3
-; GFX9-NEXT: v_cndmask_b32_e64 v4, v10, -v4, s[4:5]
-; GFX9-NEXT: v_cmp_class_f32_e64 s[4:5], -v0, 32
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v4, -v0, s[4:5]
-; GFX9-NEXT: v_cmp_class_f32_e64 s[4:5], -v3, 32
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v0, -v3, s[4:5]
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
-; GFX9-NEXT: v_cmp_lt_f32_e64 s[4:5], v0, -v6
-; GFX9-NEXT: v_cndmask_b32_e64 v3, -v6, v0, s[4:5]
+; GFX9-NEXT: v_cmp_o_f32_e64 vcc, -v2, -v5
+; GFX9-NEXT: v_cndmask_b32_e32 v2, v10, v9, vcc
+; GFX9-NEXT: v_min_f32_e64 v5, -v1, -v4
+; GFX9-NEXT: v_cmp_o_f32_e64 vcc, -v1, -v4
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v10, v5, vcc
+; GFX9-NEXT: v_min_f32_e64 v4, -v0, -v3
+; GFX9-NEXT: v_cmp_o_f32_e64 vcc, -v0, -v3
+; GFX9-NEXT: v_cndmask_b32_e32 v0, v10, v4, vcc
+; GFX9-NEXT: v_min_f32_e64 v3, v0, -v6
; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v0, -v6
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v10, v3, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v0, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 s[4:5], -v6, 32
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v0, -v6, s[4:5]
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
-; GFX9-NEXT: v_cmp_lt_f32_e64 s[4:5], v1, -v7
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v3, -v7, v1, s[4:5]
+; GFX9-NEXT: v_cndmask_b32_e32 v0, v10, v3, vcc
+; GFX9-NEXT: v_min_f32_e64 v3, v1, -v7
; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v1, -v7
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v10, v3, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v1, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 s[4:5], -v7, 32
-; GFX9-NEXT: v_cndmask_b32_e64 v1, v1, -v7, s[4:5]
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
-; GFX9-NEXT: v_cmp_lt_f32_e64 s[4:5], v2, -v8
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v3, -v8, v2, s[4:5]
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v10, v3, vcc
+; GFX9-NEXT: v_min_f32_e64 v3, v2, -v8
; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v2, -v8
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v10, v3, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v2, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 s[4:5], -v8, 32
-; GFX9-NEXT: v_cndmask_b32_e64 v2, v2, -v8, s[4:5]
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v2, v10, v3, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%a.fneg = fneg <3 x float> %a
%b.fneg = fneg <3 x float> %b
@@ -1190,49 +878,25 @@ define <3 x float> @v_fminimum3_v3f32__inlineimm1(<3 x float> %a, <3 x float> %c
; GFX9-LABEL: v_fminimum3_v3f32__inlineimm1:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, 2.0, v2
-; GFX9-NEXT: v_cndmask_b32_e32 v6, 2.0, v2, vcc
+; GFX9-NEXT: v_min_f32_e32 v6, 2.0, v2
; GFX9-NEXT: v_mov_b32_e32 v7, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v2, v2
; GFX9-NEXT: v_cndmask_b32_e32 v2, v7, v6, vcc
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, 2.0, v1
-; GFX9-NEXT: v_cndmask_b32_e32 v6, 2.0, v1, vcc
+; GFX9-NEXT: v_min_f32_e32 v6, 2.0, v1
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, 2.0, v0
-; GFX9-NEXT: v_cndmask_b32_e32 v6, 2.0, v0, vcc
+; GFX9-NEXT: v_min_f32_e32 v6, 2.0, v0
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v0
; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v0, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v6, v3, v0, vcc
+; GFX9-NEXT: v_min_f32_e32 v6, v0, v3
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v6, v7, v6, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v0, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v3, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v6
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v1, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v4, v1, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
+; GFX9-NEXT: v_min_f32_e32 v3, v1, v4
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v7, v3, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v1, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v4, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v2, v5
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v5, v2, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v3, vcc
+; GFX9-NEXT: v_min_f32_e32 v3, v2, v5
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v2, v5
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v7, v3, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v2, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v5, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call <3 x float> @llvm.minimum.v3f32(<3 x float> %a, <3 x float> <float 2.0, float 2.0, float 2.0>)
%max1 = call <3 x float> @llvm.minimum.v3f32(<3 x float> %max0, <3 x float> %c)
@@ -1255,47 +919,23 @@ define <3 x float> @v_fminimum3_v3f32__inlineimm2(<3 x float> %a, <3 x float> %b
; GFX9-LABEL: v_fminimum3_v3f32__inlineimm2:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v2, v5
-; GFX9-NEXT: v_cndmask_b32_e32 v6, v5, v2, vcc
+; GFX9-NEXT: v_min_f32_e32 v6, v2, v5
; GFX9-NEXT: v_mov_b32_e32 v7, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v2, v5
-; GFX9-NEXT: v_cndmask_b32_e32 v6, v7, v6, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v2, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v6, v2, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v5, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v6
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v6, v2, vcc
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v1, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v5, v4, v1, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v2, v7, v6, vcc
+; GFX9-NEXT: v_min_f32_e32 v5, v1, v4
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v5, v7, v5, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v1, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v4, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v5
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v0, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v3, v0, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v5, vcc
+; GFX9-NEXT: v_min_f32_e32 v4, v0, v3
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v7, v4, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v0, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v3, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, 4.0, v0
-; GFX9-NEXT: v_cndmask_b32_e32 v3, 4.0, v0, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v4, vcc
+; GFX9-NEXT: v_min_f32_e32 v3, 4.0, v0
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v0
; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v3, vcc
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, 4.0, v1
-; GFX9-NEXT: v_cndmask_b32_e32 v3, 4.0, v1, vcc
+; GFX9-NEXT: v_min_f32_e32 v3, 4.0, v1
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v3, vcc
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, 4.0, v2
-; GFX9-NEXT: v_cndmask_b32_e32 v3, 4.0, v2, vcc
+; GFX9-NEXT: v_min_f32_e32 v3, 4.0, v2
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v2, v2
; GFX9-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
diff --git a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll
index 0db88d1c095d..08cf83fd2bd0 100644
--- a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll
+++ b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll
@@ -31,6 +31,7 @@
; GCN-O0-NEXT: AMDGPU Remove Incompatible Functions
; GCN-O0-NEXT: AMDGPU Printf lowering
; GCN-O0-NEXT: Lower ctors and dtors for AMDGPU
+; GCN-O0-NEXT: Expand variadic functions
; GCN-O0-NEXT: AMDGPU Inline All Functions
; GCN-O0-NEXT: Inliner for always_inline functions
; GCN-O0-NEXT: FunctionPass Manager
@@ -178,6 +179,7 @@
; GCN-O1-NEXT: AMDGPU Remove Incompatible Functions
; GCN-O1-NEXT: AMDGPU Printf lowering
; GCN-O1-NEXT: Lower ctors and dtors for AMDGPU
+; GCN-O1-NEXT: Expand variadic functions
; GCN-O1-NEXT: AMDGPU Inline All Functions
; GCN-O1-NEXT: Inliner for always_inline functions
; GCN-O1-NEXT: FunctionPass Manager
@@ -454,6 +456,7 @@
; GCN-O1-OPTS-NEXT: AMDGPU Remove Incompatible Functions
; GCN-O1-OPTS-NEXT: AMDGPU Printf lowering
; GCN-O1-OPTS-NEXT: Lower ctors and dtors for AMDGPU
+; GCN-O1-OPTS-NEXT: Expand variadic functions
; GCN-O1-OPTS-NEXT: AMDGPU Inline All Functions
; GCN-O1-OPTS-NEXT: Inliner for always_inline functions
; GCN-O1-OPTS-NEXT: FunctionPass Manager
@@ -760,6 +763,7 @@
; GCN-O2-NEXT: Lower ctors and dtors for AMDGPU
; GCN-O2-NEXT: FunctionPass Manager
; GCN-O2-NEXT: AMDGPU Image Intrinsic Optimizer
+; GCN-O2-NEXT: Expand variadic functions
; GCN-O2-NEXT: AMDGPU Inline All Functions
; GCN-O2-NEXT: Inliner for always_inline functions
; GCN-O2-NEXT: FunctionPass Manager
@@ -1070,6 +1074,7 @@
; GCN-O3-NEXT: Lower ctors and dtors for AMDGPU
; GCN-O3-NEXT: FunctionPass Manager
; GCN-O3-NEXT: AMDGPU Image Intrinsic Optimizer
+; GCN-O3-NEXT: Expand variadic functions
; GCN-O3-NEXT: AMDGPU Inline All Functions
; GCN-O3-NEXT: Inliner for always_inline functions
; GCN-O3-NEXT: FunctionPass Manager
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.maximum.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.maximum.f16.ll
index 7d7a46259710..fa7ee9e8d28f 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.maximum.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.maximum.f16.ll
@@ -554,28 +554,14 @@ define <2 x half> @v_maximum_v2f16(<2 x half> %src0, <2 x half> %src1) {
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_lshrrev_b32_e32 v2, 16, v1
; GFX8-NEXT: v_lshrrev_b32_e32 v3, 16, v0
-; GFX8-NEXT: v_cmp_gt_f16_e32 vcc, v3, v2
-; GFX8-NEXT: v_cndmask_b32_e32 v4, v2, v3, vcc
+; GFX8-NEXT: v_max_f16_e32 v4, v3, v2
; GFX8-NEXT: v_mov_b32_e32 v5, 0x7e00
; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v3, v2
-; GFX8-NEXT: v_cndmask_b32_e32 v4, v5, v4, vcc
-; GFX8-NEXT: v_cmp_class_f16_e64 vcc, v3, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc
-; GFX8-NEXT: v_cmp_class_f16_e64 vcc, v2, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
-; GFX8-NEXT: v_cmp_eq_f16_e32 vcc, 0, v4
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
-; GFX8-NEXT: v_cmp_gt_f16_e32 vcc, v0, v1
-; GFX8-NEXT: v_cndmask_b32_e32 v3, v1, v0, vcc
+; GFX8-NEXT: v_cndmask_b32_e32 v2, v5, v4, vcc
+; GFX8-NEXT: v_max_f16_e32 v3, v0, v1
; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
-; GFX8-NEXT: v_cndmask_b32_e32 v3, v5, v3, vcc
-; GFX8-NEXT: v_cmp_class_f16_e64 vcc, v0, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc
-; GFX8-NEXT: v_cmp_class_f16_e64 vcc, v1, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
-; GFX8-NEXT: v_cmp_eq_f16_e32 vcc, 0, v3
; GFX8-NEXT: v_lshlrev_b32_e32 v2, 16, v2
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc
+; GFX8-NEXT: v_cndmask_b32_e32 v0, v5, v3, vcc
; GFX8-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
@@ -654,46 +640,24 @@ define <2 x half> @v_maximum_v2f16__nnan(<2 x half> %src0, <2 x half> %src1) {
; GFX7-LABEL: v_maximum_v2f16__nnan:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
-; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3
; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2
; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
-; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3
; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
-; GFX7-NEXT: v_mov_b32_e32 v5, 0x7fc00000
-; GFX7-NEXT: v_max_f32_e32 v4, v0, v2
-; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc
-; GFX7-NEXT: v_max_f32_e32 v2, v1, v3
-; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v1, v3
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc
+; GFX7-NEXT: v_max_f32_e32 v0, v0, v2
+; GFX7-NEXT: v_max_f32_e32 v1, v1, v3
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_maximum_v2f16__nnan:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_lshrrev_b32_e32 v2, 16, v1
-; GFX8-NEXT: v_lshrrev_b32_e32 v3, 16, v0
-; GFX8-NEXT: v_cmp_gt_f16_e32 vcc, v3, v2
-; GFX8-NEXT: v_cndmask_b32_e32 v4, v2, v3, vcc
-; GFX8-NEXT: v_cmp_class_f16_e64 vcc, v3, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc
-; GFX8-NEXT: v_cmp_class_f16_e64 vcc, v2, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
-; GFX8-NEXT: v_cmp_eq_f16_e32 vcc, 0, v4
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
-; GFX8-NEXT: v_cmp_gt_f16_e32 vcc, v0, v1
-; GFX8-NEXT: v_cndmask_b32_e32 v3, v1, v0, vcc
-; GFX8-NEXT: v_cmp_class_f16_e64 vcc, v0, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc
-; GFX8-NEXT: v_cmp_class_f16_e64 vcc, v1, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
-; GFX8-NEXT: v_cmp_eq_f16_e32 vcc, 0, v3
-; GFX8-NEXT: v_lshlrev_b32_e32 v2, 16, v2
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc
-; GFX8-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX8-NEXT: v_max_f16_sdwa v2, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-NEXT: v_max_f16_e32 v0, v0, v1
+; GFX8-NEXT: v_or_b32_e32 v0, v0, v2
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_maximum_v2f16__nnan:
@@ -759,13 +723,11 @@ define <2 x half> @v_maximum_v2f16__nsz(<2 x half> %src0, <2 x half> %src1) {
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_lshrrev_b32_e32 v2, 16, v1
; GFX8-NEXT: v_lshrrev_b32_e32 v3, 16, v0
-; GFX8-NEXT: v_cmp_gt_f16_e32 vcc, v3, v2
-; GFX8-NEXT: v_cndmask_b32_e32 v4, v2, v3, vcc
+; GFX8-NEXT: v_max_f16_e32 v4, v3, v2
; GFX8-NEXT: v_mov_b32_e32 v5, 0x7e00
; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v3, v2
; GFX8-NEXT: v_cndmask_b32_e32 v2, v5, v4, vcc
-; GFX8-NEXT: v_cmp_gt_f16_e32 vcc, v0, v1
-; GFX8-NEXT: v_cndmask_b32_e32 v3, v1, v0, vcc
+; GFX8-NEXT: v_max_f16_e32 v3, v0, v1
; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
; GFX8-NEXT: v_lshlrev_b32_e32 v2, 16, v2
; GFX8-NEXT: v_cndmask_b32_e32 v0, v5, v3, vcc
@@ -847,34 +809,24 @@ define <2 x half> @v_maximum_v2f16__nnan_nsz(<2 x half> %src0, <2 x half> %src1)
; GFX7-LABEL: v_maximum_v2f16__nnan_nsz:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
-; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3
; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2
; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
-; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3
; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
-; GFX7-NEXT: v_mov_b32_e32 v5, 0x7fc00000
-; GFX7-NEXT: v_max_f32_e32 v4, v0, v2
-; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc
-; GFX7-NEXT: v_max_f32_e32 v2, v1, v3
-; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v1, v3
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc
+; GFX7-NEXT: v_max_f32_e32 v0, v0, v2
+; GFX7-NEXT: v_max_f32_e32 v1, v1, v3
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_maximum_v2f16__nnan_nsz:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_lshrrev_b32_e32 v2, 16, v1
-; GFX8-NEXT: v_lshrrev_b32_e32 v3, 16, v0
-; GFX8-NEXT: v_cmp_gt_f16_e32 vcc, v3, v2
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
-; GFX8-NEXT: v_cmp_gt_f16_e32 vcc, v0, v1
-; GFX8-NEXT: v_lshlrev_b32_e32 v2, 16, v2
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
-; GFX8-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX8-NEXT: v_max_f16_sdwa v2, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-NEXT: v_max_f16_e32 v0, v0, v1
+; GFX8-NEXT: v_or_b32_e32 v0, v0, v2
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_maximum_v2f16__nnan_nsz:
@@ -948,31 +900,15 @@ define void @s_maximum_v2f16(<2 x half> inreg %src0, <2 x half> inreg %src1) {
; GFX8-NEXT: s_lshr_b32 s6, s5, 16
; GFX8-NEXT: s_lshr_b32 s7, s4, 16
; GFX8-NEXT: v_mov_b32_e32 v0, s6
-; GFX8-NEXT: v_mov_b32_e32 v1, s7
-; GFX8-NEXT: v_cmp_gt_f16_e32 vcc, s7, v0
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v0, v1, vcc
-; GFX8-NEXT: v_mov_b32_e32 v3, 0x7e00
+; GFX8-NEXT: v_max_f16_e32 v1, s7, v0
+; GFX8-NEXT: v_mov_b32_e32 v2, 0x7e00
; GFX8-NEXT: v_cmp_o_f16_e32 vcc, s7, v0
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
-; GFX8-NEXT: v_cmp_class_f16_e64 vcc, s7, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
-; GFX8-NEXT: v_cmp_class_f16_e64 vcc, s6, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
-; GFX8-NEXT: v_cmp_eq_f16_e32 vcc, 0, v2
+; GFX8-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
; GFX8-NEXT: v_mov_b32_e32 v1, s5
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
-; GFX8-NEXT: v_mov_b32_e32 v2, s4
-; GFX8-NEXT: v_cmp_gt_f16_e32 vcc, s4, v1
-; GFX8-NEXT: v_cndmask_b32_e32 v4, v1, v2, vcc
+; GFX8-NEXT: v_max_f16_e32 v3, s4, v1
; GFX8-NEXT: v_cmp_o_f16_e32 vcc, s4, v1
-; GFX8-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc
-; GFX8-NEXT: v_cmp_class_f16_e64 vcc, s4, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
-; GFX8-NEXT: v_cmp_class_f16_e64 vcc, s5, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
-; GFX8-NEXT: v_cmp_eq_f16_e32 vcc, 0, v3
; GFX8-NEXT: v_lshlrev_b32_e32 v0, 16, v0
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
+; GFX8-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc
; GFX8-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: ;;#ASMSTART
; GFX8-NEXT: ; use v0
@@ -1216,28 +1152,21 @@ define <3 x half> @v_maximum_v3f16__nnan(<3 x half> %src0, <3 x half> %src1) {
; GFX7-LABEL: v_maximum_v3f16__nnan:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_cvt_f16_f32_e32 v5, v5
+; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
+; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v4
; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
-; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v4
; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
-; GFX7-NEXT: v_cvt_f16_f32_e32 v5, v5
-; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
+; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v5
+; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v4
; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3
; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
-; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v4
; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
-; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v5
; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2
-; GFX7-NEXT: v_max_f32_e32 v6, v0, v3
-; GFX7-NEXT: v_mov_b32_e32 v7, 0x7fc00000
-; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v0, v3
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
-; GFX7-NEXT: v_max_f32_e32 v3, v1, v4
-; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v1, v4
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v7, v3, vcc
-; GFX7-NEXT: v_max_f32_e32 v3, v2, v5
-; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v2, v5
-; GFX7-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc
+; GFX7-NEXT: v_max_f32_e32 v0, v0, v3
+; GFX7-NEXT: v_max_f32_e32 v1, v1, v4
+; GFX7-NEXT: v_max_f32_e32 v2, v2, v5
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_maximum_v3f16__nnan:
@@ -1427,28 +1356,21 @@ define <3 x half> @v_maximum_v3f16__nnan_nsz(<3 x half> %src0, <3 x half> %src1)
; GFX7-LABEL: v_maximum_v3f16__nnan_nsz:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_cvt_f16_f32_e32 v5, v5
+; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
+; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v4
; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
-; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v4
; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
-; GFX7-NEXT: v_cvt_f16_f32_e32 v5, v5
-; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
+; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v5
+; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v4
; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3
; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
-; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v4
; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
-; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v5
; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2
-; GFX7-NEXT: v_max_f32_e32 v6, v0, v3
-; GFX7-NEXT: v_mov_b32_e32 v7, 0x7fc00000
-; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v0, v3
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
-; GFX7-NEXT: v_max_f32_e32 v3, v1, v4
-; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v1, v4
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v7, v3, vcc
-; GFX7-NEXT: v_max_f32_e32 v3, v2, v5
-; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v2, v5
-; GFX7-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc
+; GFX7-NEXT: v_max_f32_e32 v0, v0, v3
+; GFX7-NEXT: v_max_f32_e32 v1, v1, v4
+; GFX7-NEXT: v_max_f32_e32 v2, v2, v5
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_maximum_v3f16__nnan_nsz:
@@ -1671,35 +1593,26 @@ define <4 x half> @v_maximum_v4f16__nnan(<4 x half> %src0, <4 x half> %src1) {
; GFX7-LABEL: v_maximum_v4f16__nnan:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_cvt_f16_f32_e32 v7, v7
+; GFX7-NEXT: v_cvt_f16_f32_e32 v6, v6
+; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
+; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
+; GFX7-NEXT: v_cvt_f16_f32_e32 v5, v5
; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v4
; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
-; GFX7-NEXT: v_cvt_f16_f32_e32 v5, v5
; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
-; GFX7-NEXT: v_cvt_f16_f32_e32 v6, v6
-; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
-; GFX7-NEXT: v_cvt_f16_f32_e32 v7, v7
-; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
+; GFX7-NEXT: v_cvt_f32_f16_e32 v7, v7
+; GFX7-NEXT: v_cvt_f32_f16_e32 v6, v6
+; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v5
; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v4
; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
-; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v5
; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
-; GFX7-NEXT: v_cvt_f32_f16_e32 v6, v6
; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2
-; GFX7-NEXT: v_cvt_f32_f16_e32 v7, v7
; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3
-; GFX7-NEXT: v_max_f32_e32 v8, v0, v4
-; GFX7-NEXT: v_mov_b32_e32 v9, 0x7fc00000
-; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v0, v4
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v9, v8, vcc
-; GFX7-NEXT: v_max_f32_e32 v4, v1, v5
-; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v1, v5
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v9, v4, vcc
-; GFX7-NEXT: v_max_f32_e32 v4, v2, v6
-; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v2, v6
-; GFX7-NEXT: v_cndmask_b32_e32 v2, v9, v4, vcc
-; GFX7-NEXT: v_max_f32_e32 v4, v3, v7
-; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v3, v7
-; GFX7-NEXT: v_cndmask_b32_e32 v3, v9, v4, vcc
+; GFX7-NEXT: v_max_f32_e32 v0, v0, v4
+; GFX7-NEXT: v_max_f32_e32 v1, v1, v5
+; GFX7-NEXT: v_max_f32_e32 v2, v2, v6
+; GFX7-NEXT: v_max_f32_e32 v3, v3, v7
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_maximum_v4f16__nnan:
@@ -1924,35 +1837,26 @@ define <4 x half> @v_maximum_v4f16__nnan_nsz(<4 x half> %src0, <4 x half> %src1)
; GFX7-LABEL: v_maximum_v4f16__nnan_nsz:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_cvt_f16_f32_e32 v7, v7
+; GFX7-NEXT: v_cvt_f16_f32_e32 v6, v6
+; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
+; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
+; GFX7-NEXT: v_cvt_f16_f32_e32 v5, v5
; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v4
; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
-; GFX7-NEXT: v_cvt_f16_f32_e32 v5, v5
; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
-; GFX7-NEXT: v_cvt_f16_f32_e32 v6, v6
-; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
-; GFX7-NEXT: v_cvt_f16_f32_e32 v7, v7
-; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
+; GFX7-NEXT: v_cvt_f32_f16_e32 v7, v7
+; GFX7-NEXT: v_cvt_f32_f16_e32 v6, v6
+; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v5
; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v4
; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
-; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v5
; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
-; GFX7-NEXT: v_cvt_f32_f16_e32 v6, v6
; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2
-; GFX7-NEXT: v_cvt_f32_f16_e32 v7, v7
; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3
-; GFX7-NEXT: v_max_f32_e32 v8, v0, v4
-; GFX7-NEXT: v_mov_b32_e32 v9, 0x7fc00000
-; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v0, v4
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v9, v8, vcc
-; GFX7-NEXT: v_max_f32_e32 v4, v1, v5
-; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v1, v5
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v9, v4, vcc
-; GFX7-NEXT: v_max_f32_e32 v4, v2, v6
-; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v2, v6
-; GFX7-NEXT: v_cndmask_b32_e32 v2, v9, v4, vcc
-; GFX7-NEXT: v_max_f32_e32 v4, v3, v7
-; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v3, v7
-; GFX7-NEXT: v_cndmask_b32_e32 v3, v9, v4, vcc
+; GFX7-NEXT: v_max_f32_e32 v0, v0, v4
+; GFX7-NEXT: v_max_f32_e32 v1, v1, v5
+; GFX7-NEXT: v_max_f32_e32 v2, v2, v6
+; GFX7-NEXT: v_max_f32_e32 v3, v3, v7
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_maximum_v4f16__nnan_nsz:
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.maximum.f32.ll b/llvm/test/CodeGen/AMDGPU/llvm.maximum.f32.ll
index 7c5bc7da4df2..f4aa40dbd9bc 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.maximum.f32.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.maximum.f32.ll
@@ -495,167 +495,73 @@ define <2 x float> @v_maximum_v2f32(<2 x float> %src0, <2 x float> %src1) {
; GFX7-LABEL: v_maximum_v2f32:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_max_legacy_f32_e32 v4, v0, v2
+; GFX7-NEXT: v_max_f32_e32 v4, v0, v2
; GFX7-NEXT: v_mov_b32_e32 v5, 0x7fc00000
; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
-; GFX7-NEXT: v_cndmask_b32_e32 v4, v5, v4, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v0, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v2, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
-; GFX7-NEXT: v_max_legacy_f32_e32 v2, v1, v3
+; GFX7-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc
+; GFX7-NEXT: v_max_f32_e32 v2, v1, v3
; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v1, v3
-; GFX7-NEXT: v_cndmask_b32_e32 v2, v5, v2, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v1, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v3, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v2
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
+; GFX7-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_maximum_v2f32:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2
-; GFX8-NEXT: v_cndmask_b32_e32 v4, v2, v0, vcc
+; GFX8-NEXT: v_max_f32_e32 v4, v0, v2
; GFX8-NEXT: v_mov_b32_e32 v5, 0x7fc00000
; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
-; GFX8-NEXT: v_cndmask_b32_e32 v4, v5, v4, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v0, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v2, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
-; GFX8-NEXT: v_cmp_gt_f32_e32 vcc, v1, v3
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v3, v1, vcc
+; GFX8-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc
+; GFX8-NEXT: v_max_f32_e32 v2, v1, v3
; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v1, v3
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v5, v2, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v1, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v3, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v2
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
+; GFX8-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_maximum_v2f32:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v2, v0, vcc
+; GFX9-NEXT: v_max_f32_e32 v4, v0, v2
; GFX9-NEXT: v_mov_b32_e32 v5, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v5, v4, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v0, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v2, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v1, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v3, v1, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc
+; GFX9-NEXT: v_max_f32_e32 v2, v1, v3
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v5, v2, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v1, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v3, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v2
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX940-LABEL: v_maximum_v2f32:
; GFX940: ; %bb.0:
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2
+; GFX940-NEXT: v_max_f32_e32 v4, v0, v2
; GFX940-NEXT: v_mov_b32_e32 v5, 0x7fc00000
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v2, v0, vcc
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v5, v4, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v0, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v2, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
-; GFX940-NEXT: v_cmp_gt_f32_e32 vcc, v1, v3
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v3, v1, vcc
+; GFX940-NEXT: v_max_f32_e32 v2, v1, v3
+; GFX940-NEXT: s_nop 0
+; GFX940-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v3
; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v5, v2, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v1, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v3, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v2
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc
; GFX940-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximum_v2f32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, v0, v2
-; GFX10-NEXT: v_cndmask_b32_e32 v4, v2, v0, vcc_lo
-; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, v1, v3
-; GFX10-NEXT: v_cndmask_b32_e32 v5, v3, v1, vcc_lo
+; GFX10-NEXT: v_max_f32_e32 v4, v0, v2
; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v0, v2
-; GFX10-NEXT: v_cndmask_b32_e32 v4, 0x7fc00000, v4, vcc_lo
+; GFX10-NEXT: v_max_f32_e32 v5, v1, v3
+; GFX10-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v4, vcc_lo
; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v1, v3
-; GFX10-NEXT: v_cndmask_b32_e32 v5, 0x7fc00000, v5, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v0, 64
-; GFX10-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v1, 64
-; GFX10-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v2, 64
-; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v3, 64
-; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc_lo
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v4
-; GFX10-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc_lo
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v5
-; GFX10-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x7fc00000, v5, vcc_lo
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_maximum_v2f32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, v0, v2
-; GFX11-NEXT: v_cndmask_b32_e32 v4, v2, v0, vcc_lo
-; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, v1, v3
-; GFX11-NEXT: v_cndmask_b32_e32 v5, v3, v1, vcc_lo
+; GFX11-NEXT: v_dual_max_f32 v4, v0, v2 :: v_dual_max_f32 v5, v1, v3
; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v0, v2
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4)
-; GFX11-NEXT: v_cndmask_b32_e32 v4, 0x7fc00000, v4, vcc_lo
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_4)
+; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v4, vcc_lo
; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v1, v3
-; GFX11-NEXT: v_cndmask_b32_e32 v5, 0x7fc00000, v5, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v0, 64
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4)
-; GFX11-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v1, 64
-; GFX11-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v2, 64
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4)
-; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v3, 64
-; GFX11-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc_lo
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v4
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4)
-; GFX11-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc_lo
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v5
-; GFX11-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc_lo
+; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7fc00000, v5, vcc_lo
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_maximum_v2f32:
@@ -676,136 +582,42 @@ define <2 x float> @v_maximum_v2f32__nnan(<2 x float> %src0, <2 x float> %src1)
; GFX7-LABEL: v_maximum_v2f32__nnan:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_max_legacy_f32_e32 v4, v0, v2
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v0, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v2, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
-; GFX7-NEXT: v_max_legacy_f32_e32 v2, v1, v3
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v1, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v3, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v2
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
+; GFX7-NEXT: v_max_f32_e32 v0, v0, v2
+; GFX7-NEXT: v_max_f32_e32 v1, v1, v3
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_maximum_v2f32__nnan:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2
-; GFX8-NEXT: v_cndmask_b32_e32 v4, v2, v0, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v0, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v2, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
-; GFX8-NEXT: v_cmp_gt_f32_e32 vcc, v1, v3
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v3, v1, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v1, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v3, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v2
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
+; GFX8-NEXT: v_max_f32_e32 v0, v0, v2
+; GFX8-NEXT: v_max_f32_e32 v1, v1, v3
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_maximum_v2f32__nnan:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v2, v0, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v0, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v2, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v1, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v3, v1, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v1, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v3, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v2
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
+; GFX9-NEXT: v_max_f32_e32 v0, v0, v2
+; GFX9-NEXT: v_max_f32_e32 v1, v1, v3
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX940-LABEL: v_maximum_v2f32__nnan:
; GFX940: ; %bb.0:
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v2, v0, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v0, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v2, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
-; GFX940-NEXT: v_cmp_gt_f32_e32 vcc, v1, v3
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v3, v1, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v1, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v3, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v2
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
+; GFX940-NEXT: v_max_f32_e32 v0, v0, v2
+; GFX940-NEXT: v_max_f32_e32 v1, v1, v3
; GFX940-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximum_v2f32__nnan:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, v0, v2
-; GFX10-NEXT: v_cndmask_b32_e32 v4, v2, v0, vcc_lo
-; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, v1, v3
-; GFX10-NEXT: v_cndmask_b32_e32 v5, v3, v1, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v0, 64
-; GFX10-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v1, 64
-; GFX10-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v2, 64
-; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v3, 64
-; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc_lo
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v4
-; GFX10-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc_lo
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v5
-; GFX10-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc_lo
+; GFX10-NEXT: v_max_f32_e32 v0, v0, v2
+; GFX10-NEXT: v_max_f32_e32 v1, v1, v3
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_maximum_v2f32__nnan:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, v0, v2
-; GFX11-NEXT: v_cndmask_b32_e32 v4, v2, v0, vcc_lo
-; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, v1, v3
-; GFX11-NEXT: v_cndmask_b32_e32 v5, v3, v1, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v0, 64
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4)
-; GFX11-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v1, 64
-; GFX11-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v2, 64
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4)
-; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v3, 64
-; GFX11-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc_lo
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v4
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4)
-; GFX11-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc_lo
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v5
-; GFX11-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc_lo
+; GFX11-NEXT: v_dual_max_f32 v0, v0, v2 :: v_dual_max_f32 v1, v1, v3
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_maximum_v2f32__nnan:
@@ -826,11 +638,11 @@ define <2 x float> @v_maximum_v2f32__nsz(<2 x float> %src0, <2 x float> %src1) {
; GFX7-LABEL: v_maximum_v2f32__nsz:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_max_legacy_f32_e32 v4, v0, v2
+; GFX7-NEXT: v_max_f32_e32 v4, v0, v2
; GFX7-NEXT: v_mov_b32_e32 v5, 0x7fc00000
; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
; GFX7-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc
-; GFX7-NEXT: v_max_legacy_f32_e32 v2, v1, v3
+; GFX7-NEXT: v_max_f32_e32 v2, v1, v3
; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v1, v3
; GFX7-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc
; GFX7-NEXT: s_setpc_b64 s[30:31]
@@ -838,13 +650,11 @@ define <2 x float> @v_maximum_v2f32__nsz(<2 x float> %src0, <2 x float> %src1) {
; GFX8-LABEL: v_maximum_v2f32__nsz:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2
-; GFX8-NEXT: v_cndmask_b32_e32 v4, v2, v0, vcc
+; GFX8-NEXT: v_max_f32_e32 v4, v0, v2
; GFX8-NEXT: v_mov_b32_e32 v5, 0x7fc00000
; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
; GFX8-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc
-; GFX8-NEXT: v_cmp_gt_f32_e32 vcc, v1, v3
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v3, v1, vcc
+; GFX8-NEXT: v_max_f32_e32 v2, v1, v3
; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v1, v3
; GFX8-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc
; GFX8-NEXT: s_setpc_b64 s[30:31]
@@ -852,13 +662,11 @@ define <2 x float> @v_maximum_v2f32__nsz(<2 x float> %src0, <2 x float> %src1) {
; GFX9-LABEL: v_maximum_v2f32__nsz:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v2, v0, vcc
+; GFX9-NEXT: v_max_f32_e32 v4, v0, v2
; GFX9-NEXT: v_mov_b32_e32 v5, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
; GFX9-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v1, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v3, v1, vcc
+; GFX9-NEXT: v_max_f32_e32 v2, v1, v3
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v3
; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
@@ -866,16 +674,12 @@ define <2 x float> @v_maximum_v2f32__nsz(<2 x float> %src0, <2 x float> %src1) {
; GFX940-LABEL: v_maximum_v2f32__nsz:
; GFX940: ; %bb.0:
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2
+; GFX940-NEXT: v_max_f32_e32 v4, v0, v2
; GFX940-NEXT: v_mov_b32_e32 v5, 0x7fc00000
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v2, v0, vcc
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
-; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_max_f32_e32 v2, v1, v3
+; GFX940-NEXT: s_nop 0
; GFX940-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc
-; GFX940-NEXT: v_cmp_gt_f32_e32 vcc, v1, v3
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v3, v1, vcc
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v3
; GFX940-NEXT: s_nop 1
; GFX940-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc
@@ -884,11 +688,9 @@ define <2 x float> @v_maximum_v2f32__nsz(<2 x float> %src0, <2 x float> %src1) {
; GFX10-LABEL: v_maximum_v2f32__nsz:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, v0, v2
-; GFX10-NEXT: v_cndmask_b32_e32 v4, v2, v0, vcc_lo
-; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, v1, v3
-; GFX10-NEXT: v_cndmask_b32_e32 v5, v3, v1, vcc_lo
+; GFX10-NEXT: v_max_f32_e32 v4, v0, v2
; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v0, v2
+; GFX10-NEXT: v_max_f32_e32 v5, v1, v3
; GFX10-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v4, vcc_lo
; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v1, v3
; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x7fc00000, v5, vcc_lo
@@ -897,12 +699,9 @@ define <2 x float> @v_maximum_v2f32__nsz(<2 x float> %src0, <2 x float> %src1) {
; GFX11-LABEL: v_maximum_v2f32__nsz:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, v0, v2
-; GFX11-NEXT: v_cndmask_b32_e32 v4, v2, v0, vcc_lo
-; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, v1, v3
-; GFX11-NEXT: v_cndmask_b32_e32 v5, v3, v1, vcc_lo
+; GFX11-NEXT: v_dual_max_f32 v4, v0, v2 :: v_dual_max_f32 v5, v1, v3
; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v0, v2
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4)
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_4)
; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v4, vcc_lo
; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v1, v3
; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7fc00000, v5, vcc_lo
@@ -926,55 +725,42 @@ define <2 x float> @v_maximum_v2f32__nnan_nsz(<2 x float> %src0, <2 x float> %sr
; GFX7-LABEL: v_maximum_v2f32__nnan_nsz:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_max_legacy_f32_e32 v0, v0, v2
-; GFX7-NEXT: v_max_legacy_f32_e32 v1, v1, v3
+; GFX7-NEXT: v_max_f32_e32 v0, v0, v2
+; GFX7-NEXT: v_max_f32_e32 v1, v1, v3
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_maximum_v2f32__nnan_nsz:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
-; GFX8-NEXT: v_cmp_gt_f32_e32 vcc, v1, v3
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
+; GFX8-NEXT: v_max_f32_e32 v0, v0, v2
+; GFX8-NEXT: v_max_f32_e32 v1, v1, v3
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_maximum_v2f32__nnan_nsz:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v1, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
+; GFX9-NEXT: v_max_f32_e32 v0, v0, v2
+; GFX9-NEXT: v_max_f32_e32 v1, v1, v3
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX940-LABEL: v_maximum_v2f32__nnan_nsz:
; GFX940: ; %bb.0:
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
-; GFX940-NEXT: v_cmp_gt_f32_e32 vcc, v1, v3
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
+; GFX940-NEXT: v_max_f32_e32 v0, v0, v2
+; GFX940-NEXT: v_max_f32_e32 v1, v1, v3
; GFX940-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximum_v2f32__nnan_nsz:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, v0, v2
-; GFX10-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo
-; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, v1, v3
-; GFX10-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc_lo
+; GFX10-NEXT: v_max_f32_e32 v0, v0, v2
+; GFX10-NEXT: v_max_f32_e32 v1, v1, v3
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_maximum_v2f32__nnan_nsz:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, v0, v2
-; GFX11-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo
-; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, v1, v3
-; GFX11-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc_lo
+; GFX11-NEXT: v_dual_max_f32 v0, v0, v2 :: v_dual_max_f32 v1, v1, v3
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_maximum_v2f32__nnan_nsz:
@@ -996,28 +782,14 @@ define void @s_maximum_v2f32(<2 x float> inreg %src0, <2 x float> inreg %src1) {
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7-NEXT: v_mov_b32_e32 v0, s7
-; GFX7-NEXT: v_max_legacy_f32_e32 v1, s5, v0
+; GFX7-NEXT: v_max_f32_e32 v1, s5, v0
; GFX7-NEXT: v_mov_b32_e32 v2, 0x7fc00000
; GFX7-NEXT: v_cmp_o_f32_e32 vcc, s5, v0
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
-; GFX7-NEXT: v_mov_b32_e32 v3, s5
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, s5, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v3, v1, v3, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, s7, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v1
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v1, v0, vcc
; GFX7-NEXT: v_mov_b32_e32 v0, s6
-; GFX7-NEXT: v_max_legacy_f32_e32 v3, s4, v0
+; GFX7-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
+; GFX7-NEXT: v_max_f32_e32 v3, s4, v0
; GFX7-NEXT: v_cmp_o_f32_e32 vcc, s4, v0
-; GFX7-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
-; GFX7-NEXT: v_mov_b32_e32 v3, s4
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, s4, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v3, v2, v3, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, s6, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v2
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
+; GFX7-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc
; GFX7-NEXT: ;;#ASMSTART
; GFX7-NEXT: ; use v[0:1]
; GFX7-NEXT: ;;#ASMEND
@@ -1027,30 +799,14 @@ define void @s_maximum_v2f32(<2 x float> inreg %src0, <2 x float> inreg %src1) {
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_mov_b32_e32 v0, s7
-; GFX8-NEXT: v_mov_b32_e32 v1, s5
-; GFX8-NEXT: v_cmp_gt_f32_e32 vcc, s5, v0
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v0, v1, vcc
-; GFX8-NEXT: v_mov_b32_e32 v3, 0x7fc00000
+; GFX8-NEXT: v_max_f32_e32 v1, s5, v0
+; GFX8-NEXT: v_mov_b32_e32 v2, 0x7fc00000
; GFX8-NEXT: v_cmp_o_f32_e32 vcc, s5, v0
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, s5, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, s7, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v2
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v2, v0, vcc
; GFX8-NEXT: v_mov_b32_e32 v0, s6
-; GFX8-NEXT: v_mov_b32_e32 v2, s4
-; GFX8-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
-; GFX8-NEXT: v_cndmask_b32_e32 v4, v0, v2, vcc
+; GFX8-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
+; GFX8-NEXT: v_max_f32_e32 v3, s4, v0
; GFX8-NEXT: v_cmp_o_f32_e32 vcc, s4, v0
-; GFX8-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, s4, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, s6, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc
+; GFX8-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc
; GFX8-NEXT: ;;#ASMSTART
; GFX8-NEXT: ; use v[0:1]
; GFX8-NEXT: ;;#ASMEND
@@ -1060,30 +816,14 @@ define void @s_maximum_v2f32(<2 x float> inreg %src0, <2 x float> inreg %src1) {
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v0, s7
-; GFX9-NEXT: v_mov_b32_e32 v1, s5
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, s5, v0
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v0, v1, vcc
-; GFX9-NEXT: v_mov_b32_e32 v3, 0x7fc00000
+; GFX9-NEXT: v_max_f32_e32 v1, s5, v0
+; GFX9-NEXT: v_mov_b32_e32 v2, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, s5, v0
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, s5, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, s7, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v2
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v2, v0, vcc
; GFX9-NEXT: v_mov_b32_e32 v0, s6
-; GFX9-NEXT: v_mov_b32_e32 v2, s4
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v0, v2, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
+; GFX9-NEXT: v_max_f32_e32 v3, s4, v0
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, s4, v0
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, s4, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, s6, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; use v[0:1]
; GFX9-NEXT: ;;#ASMEND
@@ -1093,40 +833,15 @@ define void @s_maximum_v2f32(<2 x float> inreg %src0, <2 x float> inreg %src1) {
; GFX940: ; %bb.0:
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX940-NEXT: v_mov_b32_e32 v0, s3
-; GFX940-NEXT: v_mov_b32_e32 v1, s1
-; GFX940-NEXT: v_cmp_gt_f32_e32 vcc, s1, v0
-; GFX940-NEXT: v_mov_b32_e32 v3, 0x7fc00000
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v0, v1, vcc
+; GFX940-NEXT: v_max_f32_e32 v1, s1, v0
+; GFX940-NEXT: v_mov_b32_e32 v2, 0x7fc00000
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, s1, v0
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, s1, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, s3, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v2
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v2, v0, vcc
; GFX940-NEXT: v_mov_b32_e32 v0, s2
-; GFX940-NEXT: v_mov_b32_e32 v2, s0
-; GFX940-NEXT: v_cmp_gt_f32_e32 vcc, s0, v0
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v0, v2, vcc
+; GFX940-NEXT: v_max_f32_e32 v3, s0, v0
+; GFX940-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, s0, v0
; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, s0, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, s2, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc
; GFX940-NEXT: ;;#ASMSTART
; GFX940-NEXT: ; use v[0:1]
; GFX940-NEXT: ;;#ASMEND
@@ -1135,28 +850,12 @@ define void @s_maximum_v2f32(<2 x float> inreg %src0, <2 x float> inreg %src1) {
; GFX10-LABEL: s_maximum_v2f32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_mov_b32_e32 v0, s5
-; GFX10-NEXT: v_cmp_gt_f32_e64 vcc_lo, s5, s7
-; GFX10-NEXT: v_mov_b32_e32 v1, s4
-; GFX10-NEXT: v_cmp_class_f32_e64 s8, s5, 64
-; GFX10-NEXT: v_cndmask_b32_e32 v0, s7, v0, vcc_lo
-; GFX10-NEXT: v_cmp_gt_f32_e64 vcc_lo, s4, s6
-; GFX10-NEXT: v_cndmask_b32_e32 v1, s6, v1, vcc_lo
+; GFX10-NEXT: v_max_f32_e64 v0, s5, s7
; GFX10-NEXT: v_cmp_o_f32_e64 vcc_lo, s5, s7
-; GFX10-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v0, vcc_lo
+; GFX10-NEXT: v_max_f32_e64 v2, s4, s6
+; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x7fc00000, v0, vcc_lo
; GFX10-NEXT: v_cmp_o_f32_e64 vcc_lo, s4, s6
-; GFX10-NEXT: v_cndmask_b32_e32 v2, 0x7fc00000, v1, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v1, v0, s5, s8
-; GFX10-NEXT: v_cmp_class_f32_e64 s5, s4, 64
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v0
-; GFX10-NEXT: v_cndmask_b32_e64 v3, v2, s4, s5
-; GFX10-NEXT: v_cmp_class_f32_e64 s4, s7, 64
-; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s7, s4
-; GFX10-NEXT: v_cmp_class_f32_e64 s4, s6, 64
-; GFX10-NEXT: v_cndmask_b32_e32 v1, v0, v1, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v3, v3, s6, s4
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v2
-; GFX10-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v2, vcc_lo
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ; use v[0:1]
; GFX10-NEXT: ;;#ASMEND
@@ -1165,32 +864,13 @@ define void @s_maximum_v2f32(<2 x float> inreg %src0, <2 x float> inreg %src1) {
; GFX11-LABEL: s_maximum_v2f32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_dual_mov_b32 v0, s1 :: v_dual_mov_b32 v1, s0
-; GFX11-NEXT: v_cmp_gt_f32_e64 vcc_lo, s1, s3
-; GFX11-NEXT: v_cmp_class_f32_e64 s4, s1, 64
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_3) | instid1(VALU_DEP_4)
-; GFX11-NEXT: v_cndmask_b32_e32 v0, s3, v0, vcc_lo
-; GFX11-NEXT: v_cmp_gt_f32_e64 vcc_lo, s0, s2
-; GFX11-NEXT: v_cndmask_b32_e32 v1, s2, v1, vcc_lo
+; GFX11-NEXT: v_max_f32_e64 v0, s1, s3
; GFX11-NEXT: v_cmp_o_f32_e64 vcc_lo, s1, s3
-; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v0, vcc_lo
+; GFX11-NEXT: v_max_f32_e64 v2, s0, s2
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3)
+; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7fc00000, v0, vcc_lo
; GFX11-NEXT: v_cmp_o_f32_e64 vcc_lo, s0, s2
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NEXT: v_cndmask_b32_e32 v2, 0x7fc00000, v1, vcc_lo
-; GFX11-NEXT: v_cndmask_b32_e64 v1, v0, s1, s4
-; GFX11-NEXT: v_cmp_class_f32_e64 s1, s0, 64
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v0
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1)
-; GFX11-NEXT: v_cndmask_b32_e64 v3, v2, s0, s1
-; GFX11-NEXT: v_cmp_class_f32_e64 s0, s3, 64
-; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s3, s0
-; GFX11-NEXT: v_cmp_class_f32_e64 s0, s2, 64
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NEXT: v_cndmask_b32_e64 v3, v3, s2, s0
-; GFX11-NEXT: v_cndmask_b32_e32 v1, v0, v1, vcc_lo
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v2
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3)
-; GFX11-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc_lo
+; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v2, vcc_lo
; GFX11-NEXT: ;;#ASMSTART
; GFX11-NEXT: ; use v[0:1]
; GFX11-NEXT: ;;#ASMEND
@@ -1218,227 +898,92 @@ define <3 x float> @v_maximum_v3f32(<3 x float> %src0, <3 x float> %src1) {
; GFX7-LABEL: v_maximum_v3f32:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_max_legacy_f32_e32 v6, v0, v3
+; GFX7-NEXT: v_max_f32_e32 v6, v0, v3
; GFX7-NEXT: v_mov_b32_e32 v7, 0x7fc00000
; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v0, v3
-; GFX7-NEXT: v_cndmask_b32_e32 v6, v7, v6, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v0, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v3, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v6
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc
-; GFX7-NEXT: v_max_legacy_f32_e32 v3, v1, v4
+; GFX7-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
+; GFX7-NEXT: v_max_f32_e32 v3, v1, v4
; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v1, v4
-; GFX7-NEXT: v_cndmask_b32_e32 v3, v7, v3, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v1, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v4, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; GFX7-NEXT: v_max_legacy_f32_e32 v3, v2, v5
+; GFX7-NEXT: v_cndmask_b32_e32 v1, v7, v3, vcc
+; GFX7-NEXT: v_max_f32_e32 v3, v2, v5
; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v2, v5
-; GFX7-NEXT: v_cndmask_b32_e32 v3, v7, v3, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v2, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v5, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
-; GFX7-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
+; GFX7-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_maximum_v3f32:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_cmp_gt_f32_e32 vcc, v0, v3
-; GFX8-NEXT: v_cndmask_b32_e32 v6, v3, v0, vcc
+; GFX8-NEXT: v_max_f32_e32 v6, v0, v3
; GFX8-NEXT: v_mov_b32_e32 v7, 0x7fc00000
; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v0, v3
-; GFX8-NEXT: v_cndmask_b32_e32 v6, v7, v6, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v0, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v3, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v6
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc
-; GFX8-NEXT: v_cmp_gt_f32_e32 vcc, v1, v4
-; GFX8-NEXT: v_cndmask_b32_e32 v3, v4, v1, vcc
+; GFX8-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
+; GFX8-NEXT: v_max_f32_e32 v3, v1, v4
; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v1, v4
-; GFX8-NEXT: v_cndmask_b32_e32 v3, v7, v3, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v1, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v4, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; GFX8-NEXT: v_cmp_gt_f32_e32 vcc, v2, v5
-; GFX8-NEXT: v_cndmask_b32_e32 v3, v5, v2, vcc
+; GFX8-NEXT: v_cndmask_b32_e32 v1, v7, v3, vcc
+; GFX8-NEXT: v_max_f32_e32 v3, v2, v5
; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v2, v5
-; GFX8-NEXT: v_cndmask_b32_e32 v3, v7, v3, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v2, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v5, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
+; GFX8-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_maximum_v3f32:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v0, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v6, v3, v0, vcc
+; GFX9-NEXT: v_max_f32_e32 v6, v0, v3
; GFX9-NEXT: v_mov_b32_e32 v7, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v6, v7, v6, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v0, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v3, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v6
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v1, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v4, v1, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
+; GFX9-NEXT: v_max_f32_e32 v3, v1, v4
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v7, v3, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v1, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v4, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v2, v5
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v5, v2, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v3, vcc
+; GFX9-NEXT: v_max_f32_e32 v3, v2, v5
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v2, v5
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v7, v3, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v2, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v5, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX940-LABEL: v_maximum_v3f32:
; GFX940: ; %bb.0:
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_cmp_gt_f32_e32 vcc, v0, v3
+; GFX940-NEXT: v_max_f32_e32 v6, v0, v3
; GFX940-NEXT: v_mov_b32_e32 v7, 0x7fc00000
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v6, v3, v0, vcc
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v3
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v6, v7, v6, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v0, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v3, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v6
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc
-; GFX940-NEXT: v_cmp_gt_f32_e32 vcc, v1, v4
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v4, v1, vcc
+; GFX940-NEXT: v_max_f32_e32 v3, v1, v4
+; GFX940-NEXT: s_nop 0
+; GFX940-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v4
; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v7, v3, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v1, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v4, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; GFX940-NEXT: v_cmp_gt_f32_e32 vcc, v2, v5
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v5, v2, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v1, v7, v3, vcc
+; GFX940-NEXT: v_max_f32_e32 v3, v2, v5
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v2, v5
; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v7, v3, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v2, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v5, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc
; GFX940-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximum_v3f32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, v0, v3
-; GFX10-NEXT: v_cndmask_b32_e32 v6, v3, v0, vcc_lo
-; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, v1, v4
-; GFX10-NEXT: v_cndmask_b32_e32 v7, v4, v1, vcc_lo
-; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, v2, v5
-; GFX10-NEXT: v_cndmask_b32_e32 v8, v5, v2, vcc_lo
+; GFX10-NEXT: v_max_f32_e32 v6, v0, v3
; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v0, v3
-; GFX10-NEXT: v_cndmask_b32_e32 v6, 0x7fc00000, v6, vcc_lo
+; GFX10-NEXT: v_max_f32_e32 v7, v1, v4
+; GFX10-NEXT: v_max_f32_e32 v8, v2, v5
+; GFX10-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v6, vcc_lo
; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v1, v4
-; GFX10-NEXT: v_cndmask_b32_e32 v7, 0x7fc00000, v7, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x7fc00000, v7, vcc_lo
; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v2, v5
-; GFX10-NEXT: v_cndmask_b32_e32 v8, 0x7fc00000, v8, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v0, 64
-; GFX10-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v1, 64
-; GFX10-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v2, 64
-; GFX10-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v3, 64
-; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v4, 64
-; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v5, 64
-; GFX10-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc_lo
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v6
-; GFX10-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc_lo
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v7
-; GFX10-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc_lo
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v8
-; GFX10-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e32 v2, 0x7fc00000, v8, vcc_lo
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_maximum_v3f32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, v0, v3
-; GFX11-NEXT: v_cndmask_b32_e32 v6, v3, v0, vcc_lo
-; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, v1, v4
-; GFX11-NEXT: v_cndmask_b32_e32 v7, v4, v1, vcc_lo
-; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, v2, v5
-; GFX11-NEXT: v_cndmask_b32_e32 v8, v5, v2, vcc_lo
+; GFX11-NEXT: v_dual_max_f32 v6, v0, v3 :: v_dual_max_f32 v7, v1, v4
; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v0, v3
-; GFX11-NEXT: v_cndmask_b32_e32 v6, 0x7fc00000, v6, vcc_lo
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_4)
+; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v6, vcc_lo
; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v1, v4
-; GFX11-NEXT: v_cndmask_b32_e32 v7, 0x7fc00000, v7, vcc_lo
+; GFX11-NEXT: v_dual_max_f32 v8, v2, v5 :: v_dual_cndmask_b32 v1, 0x7fc00000, v7
; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v2, v5
-; GFX11-NEXT: v_cndmask_b32_e32 v8, 0x7fc00000, v8, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v0, 64
-; GFX11-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v1, 64
-; GFX11-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v2, 64
-; GFX11-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v3, 64
-; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v4, 64
-; GFX11-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v5, 64
-; GFX11-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc_lo
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v6
-; GFX11-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc_lo
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v7
-; GFX11-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc_lo
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v8
-; GFX11-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc_lo
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX11-NEXT: v_cndmask_b32_e32 v2, 0x7fc00000, v8, vcc_lo
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_maximum_v3f32:
@@ -1460,184 +1005,48 @@ define <3 x float> @v_maximum_v3f32__nnan(<3 x float> %src0, <3 x float> %src1)
; GFX7-LABEL: v_maximum_v3f32__nnan:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_max_legacy_f32_e32 v6, v0, v3
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v0, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v3, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v6
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc
-; GFX7-NEXT: v_max_legacy_f32_e32 v3, v1, v4
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v1, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v4, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; GFX7-NEXT: v_max_legacy_f32_e32 v3, v2, v5
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v2, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v5, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
-; GFX7-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
+; GFX7-NEXT: v_max_f32_e32 v0, v0, v3
+; GFX7-NEXT: v_max_f32_e32 v1, v1, v4
+; GFX7-NEXT: v_max_f32_e32 v2, v2, v5
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_maximum_v3f32__nnan:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_cmp_gt_f32_e32 vcc, v0, v3
-; GFX8-NEXT: v_cndmask_b32_e32 v6, v3, v0, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v0, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v3, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v6
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc
-; GFX8-NEXT: v_cmp_gt_f32_e32 vcc, v1, v4
-; GFX8-NEXT: v_cndmask_b32_e32 v3, v4, v1, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v1, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v4, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; GFX8-NEXT: v_cmp_gt_f32_e32 vcc, v2, v5
-; GFX8-NEXT: v_cndmask_b32_e32 v3, v5, v2, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v2, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v5, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
+; GFX8-NEXT: v_max_f32_e32 v0, v0, v3
+; GFX8-NEXT: v_max_f32_e32 v1, v1, v4
+; GFX8-NEXT: v_max_f32_e32 v2, v2, v5
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_maximum_v3f32__nnan:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v0, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v6, v3, v0, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v0, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v3, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v6
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v1, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v4, v1, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v1, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v4, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v2, v5
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v5, v2, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v2, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v5, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
+; GFX9-NEXT: v_max_f32_e32 v0, v0, v3
+; GFX9-NEXT: v_max_f32_e32 v1, v1, v4
+; GFX9-NEXT: v_max_f32_e32 v2, v2, v5
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX940-LABEL: v_maximum_v3f32__nnan:
; GFX940: ; %bb.0:
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_cmp_gt_f32_e32 vcc, v0, v3
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v6, v3, v0, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v0, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v3, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v6
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc
-; GFX940-NEXT: v_cmp_gt_f32_e32 vcc, v1, v4
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v4, v1, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v1, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v4, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; GFX940-NEXT: v_cmp_gt_f32_e32 vcc, v2, v5
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v5, v2, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v2, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v5, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
+; GFX940-NEXT: v_max_f32_e32 v0, v0, v3
+; GFX940-NEXT: v_max_f32_e32 v1, v1, v4
+; GFX940-NEXT: v_max_f32_e32 v2, v2, v5
; GFX940-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximum_v3f32__nnan:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, v0, v3
-; GFX10-NEXT: v_cndmask_b32_e32 v6, v3, v0, vcc_lo
-; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, v1, v4
-; GFX10-NEXT: v_cndmask_b32_e32 v7, v4, v1, vcc_lo
-; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, v2, v5
-; GFX10-NEXT: v_cndmask_b32_e32 v8, v5, v2, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v0, 64
-; GFX10-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v1, 64
-; GFX10-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v2, 64
-; GFX10-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v3, 64
-; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v4, 64
-; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v5, 64
-; GFX10-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc_lo
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v6
-; GFX10-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc_lo
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v7
-; GFX10-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc_lo
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v8
-; GFX10-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc_lo
+; GFX10-NEXT: v_max_f32_e32 v0, v0, v3
+; GFX10-NEXT: v_max_f32_e32 v1, v1, v4
+; GFX10-NEXT: v_max_f32_e32 v2, v2, v5
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_maximum_v3f32__nnan:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, v0, v3
-; GFX11-NEXT: v_cndmask_b32_e32 v6, v3, v0, vcc_lo
-; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, v1, v4
-; GFX11-NEXT: v_cndmask_b32_e32 v7, v4, v1, vcc_lo
-; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, v2, v5
-; GFX11-NEXT: v_cndmask_b32_e32 v8, v5, v2, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v0, 64
-; GFX11-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v1, 64
-; GFX11-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v2, 64
-; GFX11-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v3, 64
-; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v4, 64
-; GFX11-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v5, 64
-; GFX11-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc_lo
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v6
-; GFX11-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc_lo
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v7
-; GFX11-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc_lo
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v8
-; GFX11-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc_lo
+; GFX11-NEXT: v_dual_max_f32 v0, v0, v3 :: v_dual_max_f32 v1, v1, v4
+; GFX11-NEXT: v_max_f32_e32 v2, v2, v5
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_maximum_v3f32__nnan:
@@ -1659,14 +1068,14 @@ define <3 x float> @v_maximum_v3f32__nsz(<3 x float> %src0, <3 x float> %src1) {
; GFX7-LABEL: v_maximum_v3f32__nsz:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_max_legacy_f32_e32 v6, v0, v3
+; GFX7-NEXT: v_max_f32_e32 v6, v0, v3
; GFX7-NEXT: v_mov_b32_e32 v7, 0x7fc00000
; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v0, v3
; GFX7-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
-; GFX7-NEXT: v_max_legacy_f32_e32 v3, v1, v4
+; GFX7-NEXT: v_max_f32_e32 v3, v1, v4
; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v1, v4
; GFX7-NEXT: v_cndmask_b32_e32 v1, v7, v3, vcc
-; GFX7-NEXT: v_max_legacy_f32_e32 v3, v2, v5
+; GFX7-NEXT: v_max_f32_e32 v3, v2, v5
; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v2, v5
; GFX7-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc
; GFX7-NEXT: s_setpc_b64 s[30:31]
@@ -1674,17 +1083,14 @@ define <3 x float> @v_maximum_v3f32__nsz(<3 x float> %src0, <3 x float> %src1) {
; GFX8-LABEL: v_maximum_v3f32__nsz:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_cmp_gt_f32_e32 vcc, v0, v3
-; GFX8-NEXT: v_cndmask_b32_e32 v6, v3, v0, vcc
+; GFX8-NEXT: v_max_f32_e32 v6, v0, v3
; GFX8-NEXT: v_mov_b32_e32 v7, 0x7fc00000
; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v0, v3
; GFX8-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
-; GFX8-NEXT: v_cmp_gt_f32_e32 vcc, v1, v4
-; GFX8-NEXT: v_cndmask_b32_e32 v3, v4, v1, vcc
+; GFX8-NEXT: v_max_f32_e32 v3, v1, v4
; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v1, v4
; GFX8-NEXT: v_cndmask_b32_e32 v1, v7, v3, vcc
-; GFX8-NEXT: v_cmp_gt_f32_e32 vcc, v2, v5
-; GFX8-NEXT: v_cndmask_b32_e32 v3, v5, v2, vcc
+; GFX8-NEXT: v_max_f32_e32 v3, v2, v5
; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v2, v5
; GFX8-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc
; GFX8-NEXT: s_setpc_b64 s[30:31]
@@ -1692,17 +1098,14 @@ define <3 x float> @v_maximum_v3f32__nsz(<3 x float> %src0, <3 x float> %src1) {
; GFX9-LABEL: v_maximum_v3f32__nsz:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v0, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v6, v3, v0, vcc
+; GFX9-NEXT: v_max_f32_e32 v6, v0, v3
; GFX9-NEXT: v_mov_b32_e32 v7, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v3
; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v1, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v4, v1, vcc
+; GFX9-NEXT: v_max_f32_e32 v3, v1, v4
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v4
; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v3, vcc
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v2, v5
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v5, v2, vcc
+; GFX9-NEXT: v_max_f32_e32 v3, v2, v5
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v2, v5
; GFX9-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
@@ -1710,22 +1113,16 @@ define <3 x float> @v_maximum_v3f32__nsz(<3 x float> %src0, <3 x float> %src1) {
; GFX940-LABEL: v_maximum_v3f32__nsz:
; GFX940: ; %bb.0:
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_cmp_gt_f32_e32 vcc, v0, v3
+; GFX940-NEXT: v_max_f32_e32 v6, v0, v3
; GFX940-NEXT: v_mov_b32_e32 v7, 0x7fc00000
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v6, v3, v0, vcc
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v3
-; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_max_f32_e32 v3, v1, v4
+; GFX940-NEXT: s_nop 0
; GFX940-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
-; GFX940-NEXT: v_cmp_gt_f32_e32 vcc, v1, v4
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v4, v1, vcc
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v4
; GFX940-NEXT: s_nop 1
; GFX940-NEXT: v_cndmask_b32_e32 v1, v7, v3, vcc
-; GFX940-NEXT: v_cmp_gt_f32_e32 vcc, v2, v5
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v5, v2, vcc
+; GFX940-NEXT: v_max_f32_e32 v3, v2, v5
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v2, v5
; GFX940-NEXT: s_nop 1
; GFX940-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc
@@ -1734,13 +1131,10 @@ define <3 x float> @v_maximum_v3f32__nsz(<3 x float> %src0, <3 x float> %src1) {
; GFX10-LABEL: v_maximum_v3f32__nsz:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, v0, v3
-; GFX10-NEXT: v_cndmask_b32_e32 v6, v3, v0, vcc_lo
-; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, v1, v4
-; GFX10-NEXT: v_cndmask_b32_e32 v7, v4, v1, vcc_lo
-; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, v2, v5
-; GFX10-NEXT: v_cndmask_b32_e32 v8, v5, v2, vcc_lo
+; GFX10-NEXT: v_max_f32_e32 v6, v0, v3
; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v0, v3
+; GFX10-NEXT: v_max_f32_e32 v7, v1, v4
+; GFX10-NEXT: v_max_f32_e32 v8, v2, v5
; GFX10-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v6, vcc_lo
; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v1, v4
; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x7fc00000, v7, vcc_lo
@@ -1751,17 +1145,14 @@ define <3 x float> @v_maximum_v3f32__nsz(<3 x float> %src0, <3 x float> %src1) {
; GFX11-LABEL: v_maximum_v3f32__nsz:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, v0, v3
-; GFX11-NEXT: v_cndmask_b32_e32 v6, v3, v0, vcc_lo
-; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, v1, v4
-; GFX11-NEXT: v_cndmask_b32_e32 v7, v4, v1, vcc_lo
-; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, v2, v5
-; GFX11-NEXT: v_cndmask_b32_e32 v8, v5, v2, vcc_lo
+; GFX11-NEXT: v_dual_max_f32 v6, v0, v3 :: v_dual_max_f32 v7, v1, v4
; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v0, v3
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_4)
; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v6, vcc_lo
; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v1, v4
-; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7fc00000, v7, vcc_lo
+; GFX11-NEXT: v_dual_max_f32 v8, v2, v5 :: v_dual_cndmask_b32 v1, 0x7fc00000, v7
; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v2, v5
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX11-NEXT: v_cndmask_b32_e32 v2, 0x7fc00000, v8, vcc_lo
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
@@ -1784,67 +1175,48 @@ define <3 x float> @v_maximum_v3f32__nnan_nsz(<3 x float> %src0, <3 x float> %sr
; GFX7-LABEL: v_maximum_v3f32__nnan_nsz:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_max_legacy_f32_e32 v0, v0, v3
-; GFX7-NEXT: v_max_legacy_f32_e32 v1, v1, v4
-; GFX7-NEXT: v_max_legacy_f32_e32 v2, v2, v5
+; GFX7-NEXT: v_max_f32_e32 v0, v0, v3
+; GFX7-NEXT: v_max_f32_e32 v1, v1, v4
+; GFX7-NEXT: v_max_f32_e32 v2, v2, v5
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_maximum_v3f32__nnan_nsz:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_cmp_gt_f32_e32 vcc, v0, v3
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc
-; GFX8-NEXT: v_cmp_gt_f32_e32 vcc, v1, v4
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc
-; GFX8-NEXT: v_cmp_gt_f32_e32 vcc, v2, v5
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v5, v2, vcc
+; GFX8-NEXT: v_max_f32_e32 v0, v0, v3
+; GFX8-NEXT: v_max_f32_e32 v1, v1, v4
+; GFX8-NEXT: v_max_f32_e32 v2, v2, v5
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_maximum_v3f32__nnan_nsz:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v0, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v1, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v2, v5
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v5, v2, vcc
+; GFX9-NEXT: v_max_f32_e32 v0, v0, v3
+; GFX9-NEXT: v_max_f32_e32 v1, v1, v4
+; GFX9-NEXT: v_max_f32_e32 v2, v2, v5
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX940-LABEL: v_maximum_v3f32__nnan_nsz:
; GFX940: ; %bb.0:
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_cmp_gt_f32_e32 vcc, v0, v3
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc
-; GFX940-NEXT: v_cmp_gt_f32_e32 vcc, v1, v4
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc
-; GFX940-NEXT: v_cmp_gt_f32_e32 vcc, v2, v5
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v5, v2, vcc
+; GFX940-NEXT: v_max_f32_e32 v0, v0, v3
+; GFX940-NEXT: v_max_f32_e32 v1, v1, v4
+; GFX940-NEXT: v_max_f32_e32 v2, v2, v5
; GFX940-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximum_v3f32__nnan_nsz:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, v0, v3
-; GFX10-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc_lo
-; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, v1, v4
-; GFX10-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc_lo
-; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, v2, v5
-; GFX10-NEXT: v_cndmask_b32_e32 v2, v5, v2, vcc_lo
+; GFX10-NEXT: v_max_f32_e32 v0, v0, v3
+; GFX10-NEXT: v_max_f32_e32 v1, v1, v4
+; GFX10-NEXT: v_max_f32_e32 v2, v2, v5
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_maximum_v3f32__nnan_nsz:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, v0, v3
-; GFX11-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc_lo
-; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, v1, v4
-; GFX11-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc_lo
-; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, v2, v5
-; GFX11-NEXT: v_cndmask_b32_e32 v2, v5, v2, vcc_lo
+; GFX11-NEXT: v_dual_max_f32 v0, v0, v3 :: v_dual_max_f32 v1, v1, v4
+; GFX11-NEXT: v_max_f32_e32 v2, v2, v5
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_maximum_v3f32__nnan_nsz:
@@ -1866,292 +1238,111 @@ define <4 x float> @v_maximum_v4f32(<4 x float> %src0, <4 x float> %src1) {
; GFX7-LABEL: v_maximum_v4f32:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_max_legacy_f32_e32 v8, v0, v4
+; GFX7-NEXT: v_max_f32_e32 v8, v0, v4
; GFX7-NEXT: v_mov_b32_e32 v9, 0x7fc00000
; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v0, v4
-; GFX7-NEXT: v_cndmask_b32_e32 v8, v9, v8, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v0, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v4, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v8
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc
-; GFX7-NEXT: v_max_legacy_f32_e32 v4, v1, v5
+; GFX7-NEXT: v_cndmask_b32_e32 v0, v9, v8, vcc
+; GFX7-NEXT: v_max_f32_e32 v4, v1, v5
; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v1, v5
-; GFX7-NEXT: v_cndmask_b32_e32 v4, v9, v4, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v1, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v5, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc
-; GFX7-NEXT: v_max_legacy_f32_e32 v4, v2, v6
+; GFX7-NEXT: v_cndmask_b32_e32 v1, v9, v4, vcc
+; GFX7-NEXT: v_max_f32_e32 v4, v2, v6
; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v2, v6
-; GFX7-NEXT: v_cndmask_b32_e32 v4, v9, v4, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v2, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v6, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4
-; GFX7-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
-; GFX7-NEXT: v_max_legacy_f32_e32 v4, v3, v7
+; GFX7-NEXT: v_cndmask_b32_e32 v2, v9, v4, vcc
+; GFX7-NEXT: v_max_f32_e32 v4, v3, v7
; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v3, v7
-; GFX7-NEXT: v_cndmask_b32_e32 v4, v9, v4, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v3, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v7, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v3, v3, v7, vcc
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4
-; GFX7-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc
+; GFX7-NEXT: v_cndmask_b32_e32 v3, v9, v4, vcc
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_maximum_v4f32:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_cmp_gt_f32_e32 vcc, v0, v4
-; GFX8-NEXT: v_cndmask_b32_e32 v8, v4, v0, vcc
+; GFX8-NEXT: v_max_f32_e32 v8, v0, v4
; GFX8-NEXT: v_mov_b32_e32 v9, 0x7fc00000
; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v0, v4
-; GFX8-NEXT: v_cndmask_b32_e32 v8, v9, v8, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v0, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v4, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v8
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc
-; GFX8-NEXT: v_cmp_gt_f32_e32 vcc, v1, v5
-; GFX8-NEXT: v_cndmask_b32_e32 v4, v5, v1, vcc
+; GFX8-NEXT: v_cndmask_b32_e32 v0, v9, v8, vcc
+; GFX8-NEXT: v_max_f32_e32 v4, v1, v5
; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v1, v5
-; GFX8-NEXT: v_cndmask_b32_e32 v4, v9, v4, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v1, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v5, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc
-; GFX8-NEXT: v_cmp_gt_f32_e32 vcc, v2, v6
-; GFX8-NEXT: v_cndmask_b32_e32 v4, v6, v2, vcc
+; GFX8-NEXT: v_cndmask_b32_e32 v1, v9, v4, vcc
+; GFX8-NEXT: v_max_f32_e32 v4, v2, v6
; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v2, v6
-; GFX8-NEXT: v_cndmask_b32_e32 v4, v9, v4, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v2, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v6, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
-; GFX8-NEXT: v_cmp_gt_f32_e32 vcc, v3, v7
-; GFX8-NEXT: v_cndmask_b32_e32 v4, v7, v3, vcc
+; GFX8-NEXT: v_cndmask_b32_e32 v2, v9, v4, vcc
+; GFX8-NEXT: v_max_f32_e32 v4, v3, v7
; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v3, v7
-; GFX8-NEXT: v_cndmask_b32_e32 v4, v9, v4, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v3, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v7, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v3, v3, v7, vcc
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4
-; GFX8-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc
+; GFX8-NEXT: v_cndmask_b32_e32 v3, v9, v4, vcc
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_maximum_v4f32:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v0, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v8, v4, v0, vcc
+; GFX9-NEXT: v_max_f32_e32 v8, v0, v4
; GFX9-NEXT: v_mov_b32_e32 v9, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v8, v9, v8, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v0, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v4, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v8
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v1, v5
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v5, v1, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v0, v9, v8, vcc
+; GFX9-NEXT: v_max_f32_e32 v4, v1, v5
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v5
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v9, v4, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v1, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v5, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v2, v6
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v6, v2, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v9, v4, vcc
+; GFX9-NEXT: v_max_f32_e32 v4, v2, v6
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v2, v6
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v9, v4, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v2, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v6, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v3, v7
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v7, v3, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v2, v9, v4, vcc
+; GFX9-NEXT: v_max_f32_e32 v4, v3, v7
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v3, v7
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v9, v4, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v3, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v7, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v3, v7, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v3, v9, v4, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX940-LABEL: v_maximum_v4f32:
; GFX940: ; %bb.0:
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_cmp_gt_f32_e32 vcc, v0, v4
+; GFX940-NEXT: v_max_f32_e32 v8, v0, v4
; GFX940-NEXT: v_mov_b32_e32 v9, 0x7fc00000
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v8, v4, v0, vcc
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v4
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v8, v9, v8, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v0, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v4, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v8
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc
-; GFX940-NEXT: v_cmp_gt_f32_e32 vcc, v1, v5
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v5, v1, vcc
+; GFX940-NEXT: v_max_f32_e32 v4, v1, v5
+; GFX940-NEXT: s_nop 0
+; GFX940-NEXT: v_cndmask_b32_e32 v0, v9, v8, vcc
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v5
; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v9, v4, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v1, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v5, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc
-; GFX940-NEXT: v_cmp_gt_f32_e32 vcc, v2, v6
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v6, v2, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v1, v9, v4, vcc
+; GFX940-NEXT: v_max_f32_e32 v4, v2, v6
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v2, v6
; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v9, v4, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v2, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v6, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
-; GFX940-NEXT: v_cmp_gt_f32_e32 vcc, v3, v7
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v7, v3, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v2, v9, v4, vcc
+; GFX940-NEXT: v_max_f32_e32 v4, v3, v7
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v3, v7
; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v9, v4, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v3, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v7, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v3, v7, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v3, v9, v4, vcc
; GFX940-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximum_v4f32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, v0, v4
-; GFX10-NEXT: v_cndmask_b32_e32 v8, v4, v0, vcc_lo
+; GFX10-NEXT: v_max_f32_e32 v8, v0, v4
; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v0, v4
-; GFX10-NEXT: v_cndmask_b32_e32 v8, 0x7fc00000, v8, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v0, 64
-; GFX10-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc_lo
-; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, v1, v5
-; GFX10-NEXT: v_cndmask_b32_e32 v9, v5, v1, vcc_lo
-; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, v2, v6
-; GFX10-NEXT: v_cndmask_b32_e32 v10, v6, v2, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v4, 64
-; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc_lo
-; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, v3, v7
-; GFX10-NEXT: v_cndmask_b32_e32 v4, v7, v3, vcc_lo
+; GFX10-NEXT: v_max_f32_e32 v9, v1, v5
+; GFX10-NEXT: v_max_f32_e32 v4, v2, v6
+; GFX10-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v8, vcc_lo
; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v1, v5
-; GFX10-NEXT: v_cndmask_b32_e32 v9, 0x7fc00000, v9, vcc_lo
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v8
-; GFX10-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc_lo
+; GFX10-NEXT: v_max_f32_e32 v8, v3, v7
+; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x7fc00000, v9, vcc_lo
; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v2, v6
-; GFX10-NEXT: v_cndmask_b32_e32 v8, 0x7fc00000, v10, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e32 v2, 0x7fc00000, v4, vcc_lo
; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v3, v7
-; GFX10-NEXT: v_cndmask_b32_e32 v4, 0x7fc00000, v4, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v1, 64
-; GFX10-NEXT: v_cndmask_b32_e32 v1, v9, v1, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v2, 64
-; GFX10-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v3, 64
-; GFX10-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v5, 64
-; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v6, 64
-; GFX10-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v7, 64
-; GFX10-NEXT: v_cndmask_b32_e32 v3, v3, v7, vcc_lo
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v9
-; GFX10-NEXT: v_cndmask_b32_e32 v1, v9, v1, vcc_lo
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v8
-; GFX10-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc_lo
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v4
-; GFX10-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e32 v3, 0x7fc00000, v8, vcc_lo
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_maximum_v4f32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, v0, v4
-; GFX11-NEXT: v_cndmask_b32_e32 v8, v4, v0, vcc_lo
+; GFX11-NEXT: v_dual_max_f32 v8, v0, v4 :: v_dual_max_f32 v9, v1, v5
; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v0, v4
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-NEXT: v_cndmask_b32_e32 v8, 0x7fc00000, v8, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v0, 64
-; GFX11-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc_lo
-; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, v1, v5
-; GFX11-NEXT: v_cndmask_b32_e32 v9, v5, v1, vcc_lo
-; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, v2, v6
-; GFX11-NEXT: v_cndmask_b32_e32 v10, v6, v2, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v4, 64
-; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc_lo
-; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, v3, v7
-; GFX11-NEXT: v_cndmask_b32_e32 v4, v7, v3, vcc_lo
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_4) | instid1(VALU_DEP_3)
+; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v8, vcc_lo
; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v1, v5
-; GFX11-NEXT: v_cndmask_b32_e32 v9, 0x7fc00000, v9, vcc_lo
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v8
-; GFX11-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc_lo
+; GFX11-NEXT: v_max_f32_e32 v4, v2, v6
+; GFX11-NEXT: v_dual_max_f32 v8, v3, v7 :: v_dual_cndmask_b32 v1, 0x7fc00000, v9
; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v2, v6
-; GFX11-NEXT: v_cndmask_b32_e32 v8, 0x7fc00000, v10, vcc_lo
+; GFX11-NEXT: v_cndmask_b32_e32 v2, 0x7fc00000, v4, vcc_lo
; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v3, v7
-; GFX11-NEXT: v_cndmask_b32_e32 v4, 0x7fc00000, v4, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v1, 64
-; GFX11-NEXT: v_cndmask_b32_e32 v1, v9, v1, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v2, 64
-; GFX11-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v3, 64
-; GFX11-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v5, 64
-; GFX11-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v6, 64
-; GFX11-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v7, 64
-; GFX11-NEXT: v_cndmask_b32_e32 v3, v3, v7, vcc_lo
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v9
-; GFX11-NEXT: v_cndmask_b32_e32 v1, v9, v1, vcc_lo
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v8
-; GFX11-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc_lo
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v4
-; GFX11-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc_lo
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4)
+; GFX11-NEXT: v_cndmask_b32_e32 v3, 0x7fc00000, v8, vcc_lo
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_maximum_v4f32:
@@ -2174,236 +1365,53 @@ define <4 x float> @v_maximum_v4f32__nnan(<4 x float> %src0, <4 x float> %src1)
; GFX7-LABEL: v_maximum_v4f32__nnan:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_max_legacy_f32_e32 v8, v0, v4
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v0, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v4, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v8
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc
-; GFX7-NEXT: v_max_legacy_f32_e32 v4, v1, v5
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v1, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v5, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc
-; GFX7-NEXT: v_max_legacy_f32_e32 v4, v2, v6
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v2, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v6, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4
-; GFX7-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
-; GFX7-NEXT: v_max_legacy_f32_e32 v4, v3, v7
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v3, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v7, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v3, v3, v7, vcc
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4
-; GFX7-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc
+; GFX7-NEXT: v_max_f32_e32 v0, v0, v4
+; GFX7-NEXT: v_max_f32_e32 v1, v1, v5
+; GFX7-NEXT: v_max_f32_e32 v2, v2, v6
+; GFX7-NEXT: v_max_f32_e32 v3, v3, v7
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_maximum_v4f32__nnan:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_cmp_gt_f32_e32 vcc, v0, v4
-; GFX8-NEXT: v_cndmask_b32_e32 v8, v4, v0, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v0, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v4, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v8
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc
-; GFX8-NEXT: v_cmp_gt_f32_e32 vcc, v1, v5
-; GFX8-NEXT: v_cndmask_b32_e32 v4, v5, v1, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v1, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v5, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc
-; GFX8-NEXT: v_cmp_gt_f32_e32 vcc, v2, v6
-; GFX8-NEXT: v_cndmask_b32_e32 v4, v6, v2, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v2, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v6, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
-; GFX8-NEXT: v_cmp_gt_f32_e32 vcc, v3, v7
-; GFX8-NEXT: v_cndmask_b32_e32 v4, v7, v3, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v3, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v7, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v3, v3, v7, vcc
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4
-; GFX8-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc
+; GFX8-NEXT: v_max_f32_e32 v0, v0, v4
+; GFX8-NEXT: v_max_f32_e32 v1, v1, v5
+; GFX8-NEXT: v_max_f32_e32 v2, v2, v6
+; GFX8-NEXT: v_max_f32_e32 v3, v3, v7
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_maximum_v4f32__nnan:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v0, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v8, v4, v0, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v0, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v4, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v8
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v1, v5
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v5, v1, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v1, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v5, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v2, v6
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v6, v2, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v2, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v6, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v3, v7
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v7, v3, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v3, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v7, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v3, v7, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc
+; GFX9-NEXT: v_max_f32_e32 v0, v0, v4
+; GFX9-NEXT: v_max_f32_e32 v1, v1, v5
+; GFX9-NEXT: v_max_f32_e32 v2, v2, v6
+; GFX9-NEXT: v_max_f32_e32 v3, v3, v7
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX940-LABEL: v_maximum_v4f32__nnan:
; GFX940: ; %bb.0:
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_cmp_gt_f32_e32 vcc, v0, v4
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v8, v4, v0, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v0, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v4, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v8
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc
-; GFX940-NEXT: v_cmp_gt_f32_e32 vcc, v1, v5
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v5, v1, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v1, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v5, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc
-; GFX940-NEXT: v_cmp_gt_f32_e32 vcc, v2, v6
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v6, v2, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v2, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v6, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
-; GFX940-NEXT: v_cmp_gt_f32_e32 vcc, v3, v7
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v7, v3, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v3, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v7, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v3, v7, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc
+; GFX940-NEXT: v_max_f32_e32 v0, v0, v4
+; GFX940-NEXT: v_max_f32_e32 v1, v1, v5
+; GFX940-NEXT: v_max_f32_e32 v2, v2, v6
+; GFX940-NEXT: v_max_f32_e32 v3, v3, v7
; GFX940-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximum_v4f32__nnan:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, v0, v4
-; GFX10-NEXT: v_cndmask_b32_e32 v8, v4, v0, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v0, 64
-; GFX10-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v4, 64
-; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc_lo
-; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, v1, v5
-; GFX10-NEXT: v_cndmask_b32_e32 v4, v5, v1, vcc_lo
-; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, v2, v6
-; GFX10-NEXT: v_cndmask_b32_e32 v9, v6, v2, vcc_lo
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v8
-; GFX10-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc_lo
-; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, v3, v7
-; GFX10-NEXT: v_cndmask_b32_e32 v8, v7, v3, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v1, 64
-; GFX10-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v2, 64
-; GFX10-NEXT: v_cndmask_b32_e32 v2, v9, v2, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v3, 64
-; GFX10-NEXT: v_cndmask_b32_e32 v3, v8, v3, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v5, 64
-; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v6, 64
-; GFX10-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v7, 64
-; GFX10-NEXT: v_cndmask_b32_e32 v3, v3, v7, vcc_lo
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v4
-; GFX10-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc_lo
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v9
-; GFX10-NEXT: v_cndmask_b32_e32 v2, v9, v2, vcc_lo
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v8
-; GFX10-NEXT: v_cndmask_b32_e32 v3, v8, v3, vcc_lo
+; GFX10-NEXT: v_max_f32_e32 v0, v0, v4
+; GFX10-NEXT: v_max_f32_e32 v1, v1, v5
+; GFX10-NEXT: v_max_f32_e32 v2, v2, v6
+; GFX10-NEXT: v_max_f32_e32 v3, v3, v7
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_maximum_v4f32__nnan:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, v0, v4
-; GFX11-NEXT: v_cndmask_b32_e32 v8, v4, v0, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v0, 64
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v4, 64
-; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc_lo
-; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, v1, v5
-; GFX11-NEXT: v_cndmask_b32_e32 v4, v5, v1, vcc_lo
-; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, v2, v6
-; GFX11-NEXT: v_cndmask_b32_e32 v9, v6, v2, vcc_lo
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v8
-; GFX11-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc_lo
-; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, v3, v7
-; GFX11-NEXT: v_cndmask_b32_e32 v8, v7, v3, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v1, 64
-; GFX11-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v2, 64
-; GFX11-NEXT: v_cndmask_b32_e32 v2, v9, v2, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v3, 64
-; GFX11-NEXT: v_cndmask_b32_e32 v3, v8, v3, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v5, 64
-; GFX11-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v6, 64
-; GFX11-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v7, 64
-; GFX11-NEXT: v_cndmask_b32_e32 v3, v3, v7, vcc_lo
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v4
-; GFX11-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc_lo
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v9
-; GFX11-NEXT: v_cndmask_b32_e32 v2, v9, v2, vcc_lo
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v8
-; GFX11-NEXT: v_cndmask_b32_e32 v3, v8, v3, vcc_lo
+; GFX11-NEXT: v_dual_max_f32 v0, v0, v4 :: v_dual_max_f32 v1, v1, v5
+; GFX11-NEXT: v_dual_max_f32 v2, v2, v6 :: v_dual_max_f32 v3, v3, v7
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_maximum_v4f32__nnan:
@@ -2426,17 +1434,17 @@ define <4 x float> @v_maximum_v4f32__nsz(<4 x float> %src0, <4 x float> %src1) {
; GFX7-LABEL: v_maximum_v4f32__nsz:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_max_legacy_f32_e32 v8, v0, v4
+; GFX7-NEXT: v_max_f32_e32 v8, v0, v4
; GFX7-NEXT: v_mov_b32_e32 v9, 0x7fc00000
; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v0, v4
; GFX7-NEXT: v_cndmask_b32_e32 v0, v9, v8, vcc
-; GFX7-NEXT: v_max_legacy_f32_e32 v4, v1, v5
+; GFX7-NEXT: v_max_f32_e32 v4, v1, v5
; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v1, v5
; GFX7-NEXT: v_cndmask_b32_e32 v1, v9, v4, vcc
-; GFX7-NEXT: v_max_legacy_f32_e32 v4, v2, v6
+; GFX7-NEXT: v_max_f32_e32 v4, v2, v6
; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v2, v6
; GFX7-NEXT: v_cndmask_b32_e32 v2, v9, v4, vcc
-; GFX7-NEXT: v_max_legacy_f32_e32 v4, v3, v7
+; GFX7-NEXT: v_max_f32_e32 v4, v3, v7
; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v3, v7
; GFX7-NEXT: v_cndmask_b32_e32 v3, v9, v4, vcc
; GFX7-NEXT: s_setpc_b64 s[30:31]
@@ -2444,21 +1452,17 @@ define <4 x float> @v_maximum_v4f32__nsz(<4 x float> %src0, <4 x float> %src1) {
; GFX8-LABEL: v_maximum_v4f32__nsz:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_cmp_gt_f32_e32 vcc, v0, v4
-; GFX8-NEXT: v_cndmask_b32_e32 v8, v4, v0, vcc
+; GFX8-NEXT: v_max_f32_e32 v8, v0, v4
; GFX8-NEXT: v_mov_b32_e32 v9, 0x7fc00000
; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v0, v4
; GFX8-NEXT: v_cndmask_b32_e32 v0, v9, v8, vcc
-; GFX8-NEXT: v_cmp_gt_f32_e32 vcc, v1, v5
-; GFX8-NEXT: v_cndmask_b32_e32 v4, v5, v1, vcc
+; GFX8-NEXT: v_max_f32_e32 v4, v1, v5
; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v1, v5
; GFX8-NEXT: v_cndmask_b32_e32 v1, v9, v4, vcc
-; GFX8-NEXT: v_cmp_gt_f32_e32 vcc, v2, v6
-; GFX8-NEXT: v_cndmask_b32_e32 v4, v6, v2, vcc
+; GFX8-NEXT: v_max_f32_e32 v4, v2, v6
; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v2, v6
; GFX8-NEXT: v_cndmask_b32_e32 v2, v9, v4, vcc
-; GFX8-NEXT: v_cmp_gt_f32_e32 vcc, v3, v7
-; GFX8-NEXT: v_cndmask_b32_e32 v4, v7, v3, vcc
+; GFX8-NEXT: v_max_f32_e32 v4, v3, v7
; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v3, v7
; GFX8-NEXT: v_cndmask_b32_e32 v3, v9, v4, vcc
; GFX8-NEXT: s_setpc_b64 s[30:31]
@@ -2466,21 +1470,17 @@ define <4 x float> @v_maximum_v4f32__nsz(<4 x float> %src0, <4 x float> %src1) {
; GFX9-LABEL: v_maximum_v4f32__nsz:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v0, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v8, v4, v0, vcc
+; GFX9-NEXT: v_max_f32_e32 v8, v0, v4
; GFX9-NEXT: v_mov_b32_e32 v9, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v4
; GFX9-NEXT: v_cndmask_b32_e32 v0, v9, v8, vcc
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v1, v5
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v5, v1, vcc
+; GFX9-NEXT: v_max_f32_e32 v4, v1, v5
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v5
; GFX9-NEXT: v_cndmask_b32_e32 v1, v9, v4, vcc
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v2, v6
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v6, v2, vcc
+; GFX9-NEXT: v_max_f32_e32 v4, v2, v6
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v2, v6
; GFX9-NEXT: v_cndmask_b32_e32 v2, v9, v4, vcc
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v3, v7
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v7, v3, vcc
+; GFX9-NEXT: v_max_f32_e32 v4, v3, v7
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v3, v7
; GFX9-NEXT: v_cndmask_b32_e32 v3, v9, v4, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
@@ -2488,28 +1488,20 @@ define <4 x float> @v_maximum_v4f32__nsz(<4 x float> %src0, <4 x float> %src1) {
; GFX940-LABEL: v_maximum_v4f32__nsz:
; GFX940: ; %bb.0:
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_cmp_gt_f32_e32 vcc, v0, v4
+; GFX940-NEXT: v_max_f32_e32 v8, v0, v4
; GFX940-NEXT: v_mov_b32_e32 v9, 0x7fc00000
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v8, v4, v0, vcc
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v4
-; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_max_f32_e32 v4, v1, v5
+; GFX940-NEXT: s_nop 0
; GFX940-NEXT: v_cndmask_b32_e32 v0, v9, v8, vcc
-; GFX940-NEXT: v_cmp_gt_f32_e32 vcc, v1, v5
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v5, v1, vcc
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v5
; GFX940-NEXT: s_nop 1
; GFX940-NEXT: v_cndmask_b32_e32 v1, v9, v4, vcc
-; GFX940-NEXT: v_cmp_gt_f32_e32 vcc, v2, v6
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v6, v2, vcc
+; GFX940-NEXT: v_max_f32_e32 v4, v2, v6
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v2, v6
; GFX940-NEXT: s_nop 1
; GFX940-NEXT: v_cndmask_b32_e32 v2, v9, v4, vcc
-; GFX940-NEXT: v_cmp_gt_f32_e32 vcc, v3, v7
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v7, v3, vcc
+; GFX940-NEXT: v_max_f32_e32 v4, v3, v7
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v3, v7
; GFX940-NEXT: s_nop 1
; GFX940-NEXT: v_cndmask_b32_e32 v3, v9, v4, vcc
@@ -2518,44 +1510,35 @@ define <4 x float> @v_maximum_v4f32__nsz(<4 x float> %src0, <4 x float> %src1) {
; GFX10-LABEL: v_maximum_v4f32__nsz:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, v0, v4
-; GFX10-NEXT: v_cndmask_b32_e32 v8, v4, v0, vcc_lo
+; GFX10-NEXT: v_max_f32_e32 v8, v0, v4
; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v0, v4
+; GFX10-NEXT: v_max_f32_e32 v9, v1, v5
+; GFX10-NEXT: v_max_f32_e32 v4, v2, v6
; GFX10-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v8, vcc_lo
-; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, v1, v5
-; GFX10-NEXT: v_cndmask_b32_e32 v4, v5, v1, vcc_lo
-; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, v2, v6
-; GFX10-NEXT: v_cndmask_b32_e32 v8, v6, v2, vcc_lo
-; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, v3, v7
-; GFX10-NEXT: v_cndmask_b32_e32 v9, v7, v3, vcc_lo
; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v1, v5
-; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x7fc00000, v4, vcc_lo
+; GFX10-NEXT: v_max_f32_e32 v8, v3, v7
+; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x7fc00000, v9, vcc_lo
; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v2, v6
-; GFX10-NEXT: v_cndmask_b32_e32 v2, 0x7fc00000, v8, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e32 v2, 0x7fc00000, v4, vcc_lo
; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v3, v7
-; GFX10-NEXT: v_cndmask_b32_e32 v3, 0x7fc00000, v9, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e32 v3, 0x7fc00000, v8, vcc_lo
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_maximum_v4f32__nsz:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, v0, v4
-; GFX11-NEXT: v_cndmask_b32_e32 v8, v4, v0, vcc_lo
+; GFX11-NEXT: v_dual_max_f32 v8, v0, v4 :: v_dual_max_f32 v9, v1, v5
; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v0, v4
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_4) | instid1(VALU_DEP_3)
; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v8, vcc_lo
-; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, v1, v5
-; GFX11-NEXT: v_cndmask_b32_e32 v4, v5, v1, vcc_lo
-; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, v2, v6
-; GFX11-NEXT: v_cndmask_b32_e32 v8, v6, v2, vcc_lo
-; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, v3, v7
-; GFX11-NEXT: v_cndmask_b32_e32 v9, v7, v3, vcc_lo
; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v1, v5
-; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7fc00000, v4, vcc_lo
+; GFX11-NEXT: v_max_f32_e32 v4, v2, v6
+; GFX11-NEXT: v_dual_max_f32 v8, v3, v7 :: v_dual_cndmask_b32 v1, 0x7fc00000, v9
; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v2, v6
-; GFX11-NEXT: v_cndmask_b32_e32 v2, 0x7fc00000, v8, vcc_lo
+; GFX11-NEXT: v_cndmask_b32_e32 v2, 0x7fc00000, v4, vcc_lo
; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v3, v7
-; GFX11-NEXT: v_cndmask_b32_e32 v3, 0x7fc00000, v9, vcc_lo
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4)
+; GFX11-NEXT: v_cndmask_b32_e32 v3, 0x7fc00000, v8, vcc_lo
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_maximum_v4f32__nsz:
@@ -2578,79 +1561,53 @@ define <4 x float> @v_maximum_v4f32__nnan_nsz(<4 x float> %src0, <4 x float> %sr
; GFX7-LABEL: v_maximum_v4f32__nnan_nsz:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_max_legacy_f32_e32 v0, v0, v4
-; GFX7-NEXT: v_max_legacy_f32_e32 v1, v1, v5
-; GFX7-NEXT: v_max_legacy_f32_e32 v2, v2, v6
-; GFX7-NEXT: v_max_legacy_f32_e32 v3, v3, v7
+; GFX7-NEXT: v_max_f32_e32 v0, v0, v4
+; GFX7-NEXT: v_max_f32_e32 v1, v1, v5
+; GFX7-NEXT: v_max_f32_e32 v2, v2, v6
+; GFX7-NEXT: v_max_f32_e32 v3, v3, v7
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_maximum_v4f32__nnan_nsz:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_cmp_gt_f32_e32 vcc, v0, v4
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
-; GFX8-NEXT: v_cmp_gt_f32_e32 vcc, v1, v5
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
-; GFX8-NEXT: v_cmp_gt_f32_e32 vcc, v2, v6
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v6, v2, vcc
-; GFX8-NEXT: v_cmp_gt_f32_e32 vcc, v3, v7
-; GFX8-NEXT: v_cndmask_b32_e32 v3, v7, v3, vcc
+; GFX8-NEXT: v_max_f32_e32 v0, v0, v4
+; GFX8-NEXT: v_max_f32_e32 v1, v1, v5
+; GFX8-NEXT: v_max_f32_e32 v2, v2, v6
+; GFX8-NEXT: v_max_f32_e32 v3, v3, v7
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_maximum_v4f32__nnan_nsz:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v0, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v1, v5
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v2, v6
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v6, v2, vcc
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v3, v7
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v7, v3, vcc
+; GFX9-NEXT: v_max_f32_e32 v0, v0, v4
+; GFX9-NEXT: v_max_f32_e32 v1, v1, v5
+; GFX9-NEXT: v_max_f32_e32 v2, v2, v6
+; GFX9-NEXT: v_max_f32_e32 v3, v3, v7
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX940-LABEL: v_maximum_v4f32__nnan_nsz:
; GFX940: ; %bb.0:
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_cmp_gt_f32_e32 vcc, v0, v4
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
-; GFX940-NEXT: v_cmp_gt_f32_e32 vcc, v1, v5
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
-; GFX940-NEXT: v_cmp_gt_f32_e32 vcc, v2, v6
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v6, v2, vcc
-; GFX940-NEXT: v_cmp_gt_f32_e32 vcc, v3, v7
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v7, v3, vcc
+; GFX940-NEXT: v_max_f32_e32 v0, v0, v4
+; GFX940-NEXT: v_max_f32_e32 v1, v1, v5
+; GFX940-NEXT: v_max_f32_e32 v2, v2, v6
+; GFX940-NEXT: v_max_f32_e32 v3, v3, v7
; GFX940-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximum_v4f32__nnan_nsz:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, v0, v4
-; GFX10-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc_lo
-; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, v1, v5
-; GFX10-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc_lo
-; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, v2, v6
-; GFX10-NEXT: v_cndmask_b32_e32 v2, v6, v2, vcc_lo
-; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, v3, v7
-; GFX10-NEXT: v_cndmask_b32_e32 v3, v7, v3, vcc_lo
+; GFX10-NEXT: v_max_f32_e32 v0, v0, v4
+; GFX10-NEXT: v_max_f32_e32 v1, v1, v5
+; GFX10-NEXT: v_max_f32_e32 v2, v2, v6
+; GFX10-NEXT: v_max_f32_e32 v3, v3, v7
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_maximum_v4f32__nnan_nsz:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, v0, v4
-; GFX11-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc_lo
-; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, v1, v5
-; GFX11-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc_lo
-; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, v2, v6
-; GFX11-NEXT: v_cndmask_b32_e32 v2, v6, v2, vcc_lo
-; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, v3, v7
-; GFX11-NEXT: v_cndmask_b32_e32 v3, v7, v3, vcc_lo
+; GFX11-NEXT: v_dual_max_f32 v0, v0, v4 :: v_dual_max_f32 v1, v1, v5
+; GFX11-NEXT: v_dual_max_f32 v2, v2, v6 :: v_dual_max_f32 v3, v3, v7
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_maximum_v4f32__nnan_nsz:
@@ -2673,551 +1630,185 @@ define <8 x float> @v_maximum_v8f32(<8 x float> %src0, <8 x float> %src1) {
; GFX7-LABEL: v_maximum_v8f32:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_max_legacy_f32_e32 v16, v0, v8
+; GFX7-NEXT: v_max_f32_e32 v16, v0, v8
; GFX7-NEXT: v_mov_b32_e32 v17, 0x7fc00000
; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v0, v8
-; GFX7-NEXT: v_cndmask_b32_e32 v16, v17, v16, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v0, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v16, v0, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v8, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v16, v0, vcc
-; GFX7-NEXT: v_max_legacy_f32_e32 v8, v1, v9
+; GFX7-NEXT: v_cndmask_b32_e32 v0, v17, v16, vcc
+; GFX7-NEXT: v_max_f32_e32 v8, v1, v9
; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v1, v9
-; GFX7-NEXT: v_cndmask_b32_e32 v8, v17, v8, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v1, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v8, v1, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v9, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v8
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v8, v1, vcc
-; GFX7-NEXT: v_max_legacy_f32_e32 v8, v2, v10
+; GFX7-NEXT: v_cndmask_b32_e32 v1, v17, v8, vcc
+; GFX7-NEXT: v_max_f32_e32 v8, v2, v10
; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v2, v10
-; GFX7-NEXT: v_cndmask_b32_e32 v8, v17, v8, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v2, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v10, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v2, v2, v10, vcc
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v8
-; GFX7-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc
-; GFX7-NEXT: v_max_legacy_f32_e32 v8, v3, v11
+; GFX7-NEXT: v_cndmask_b32_e32 v2, v17, v8, vcc
+; GFX7-NEXT: v_max_f32_e32 v8, v3, v11
; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v3, v11
-; GFX7-NEXT: v_cndmask_b32_e32 v8, v17, v8, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v3, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v3, v8, v3, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v11, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v3, v3, v11, vcc
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v8
-; GFX7-NEXT: v_cndmask_b32_e32 v3, v8, v3, vcc
-; GFX7-NEXT: v_max_legacy_f32_e32 v8, v4, v12
+; GFX7-NEXT: v_cndmask_b32_e32 v3, v17, v8, vcc
+; GFX7-NEXT: v_max_f32_e32 v8, v4, v12
; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v4, v12
-; GFX7-NEXT: v_cndmask_b32_e32 v8, v17, v8, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v4, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v4, v8, v4, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v12, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v4, v4, v12, vcc
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v8
-; GFX7-NEXT: v_cndmask_b32_e32 v4, v8, v4, vcc
-; GFX7-NEXT: v_max_legacy_f32_e32 v8, v5, v13
+; GFX7-NEXT: v_cndmask_b32_e32 v4, v17, v8, vcc
+; GFX7-NEXT: v_max_f32_e32 v8, v5, v13
; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v5, v13
-; GFX7-NEXT: v_cndmask_b32_e32 v8, v17, v8, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v5, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v5, v8, v5, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v13, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v5, v5, v13, vcc
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v8
-; GFX7-NEXT: v_cndmask_b32_e32 v5, v8, v5, vcc
-; GFX7-NEXT: v_max_legacy_f32_e32 v8, v6, v14
+; GFX7-NEXT: v_cndmask_b32_e32 v5, v17, v8, vcc
+; GFX7-NEXT: v_max_f32_e32 v8, v6, v14
; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v6, v14
-; GFX7-NEXT: v_cndmask_b32_e32 v8, v17, v8, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v6, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v6, v8, v6, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v14, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v6, v6, v14, vcc
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v8
-; GFX7-NEXT: v_cndmask_b32_e32 v6, v8, v6, vcc
-; GFX7-NEXT: v_max_legacy_f32_e32 v8, v7, v15
+; GFX7-NEXT: v_cndmask_b32_e32 v6, v17, v8, vcc
+; GFX7-NEXT: v_max_f32_e32 v8, v7, v15
; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v7, v15
-; GFX7-NEXT: v_cndmask_b32_e32 v8, v17, v8, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v7, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v7, v8, v7, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v15, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v7, v7, v15, vcc
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v8
-; GFX7-NEXT: v_cndmask_b32_e32 v7, v8, v7, vcc
+; GFX7-NEXT: v_cndmask_b32_e32 v7, v17, v8, vcc
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_maximum_v8f32:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_cmp_gt_f32_e32 vcc, v0, v8
-; GFX8-NEXT: v_cndmask_b32_e32 v16, v8, v0, vcc
+; GFX8-NEXT: v_max_f32_e32 v16, v0, v8
; GFX8-NEXT: v_mov_b32_e32 v17, 0x7fc00000
; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v0, v8
-; GFX8-NEXT: v_cndmask_b32_e32 v16, v17, v16, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v0, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v16, v0, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v8, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v16, v0, vcc
-; GFX8-NEXT: v_cmp_gt_f32_e32 vcc, v1, v9
-; GFX8-NEXT: v_cndmask_b32_e32 v8, v9, v1, vcc
+; GFX8-NEXT: v_cndmask_b32_e32 v0, v17, v16, vcc
+; GFX8-NEXT: v_max_f32_e32 v8, v1, v9
; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v1, v9
-; GFX8-NEXT: v_cndmask_b32_e32 v8, v17, v8, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v1, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v8, v1, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v9, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v8
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v8, v1, vcc
-; GFX8-NEXT: v_cmp_gt_f32_e32 vcc, v2, v10
-; GFX8-NEXT: v_cndmask_b32_e32 v8, v10, v2, vcc
+; GFX8-NEXT: v_cndmask_b32_e32 v1, v17, v8, vcc
+; GFX8-NEXT: v_max_f32_e32 v8, v2, v10
; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v2, v10
-; GFX8-NEXT: v_cndmask_b32_e32 v8, v17, v8, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v2, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v10, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v10, vcc
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v8
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc
-; GFX8-NEXT: v_cmp_gt_f32_e32 vcc, v3, v11
-; GFX8-NEXT: v_cndmask_b32_e32 v8, v11, v3, vcc
+; GFX8-NEXT: v_cndmask_b32_e32 v2, v17, v8, vcc
+; GFX8-NEXT: v_max_f32_e32 v8, v3, v11
; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v3, v11
-; GFX8-NEXT: v_cndmask_b32_e32 v8, v17, v8, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v3, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v3, v8, v3, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v11, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v3, v3, v11, vcc
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v8
-; GFX8-NEXT: v_cndmask_b32_e32 v3, v8, v3, vcc
-; GFX8-NEXT: v_cmp_gt_f32_e32 vcc, v4, v12
-; GFX8-NEXT: v_cndmask_b32_e32 v8, v12, v4, vcc
+; GFX8-NEXT: v_cndmask_b32_e32 v3, v17, v8, vcc
+; GFX8-NEXT: v_max_f32_e32 v8, v4, v12
; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v4, v12
-; GFX8-NEXT: v_cndmask_b32_e32 v8, v17, v8, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v4, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v4, v8, v4, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v12, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v4, v4, v12, vcc
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v8
-; GFX8-NEXT: v_cndmask_b32_e32 v4, v8, v4, vcc
-; GFX8-NEXT: v_cmp_gt_f32_e32 vcc, v5, v13
-; GFX8-NEXT: v_cndmask_b32_e32 v8, v13, v5, vcc
+; GFX8-NEXT: v_cndmask_b32_e32 v4, v17, v8, vcc
+; GFX8-NEXT: v_max_f32_e32 v8, v5, v13
; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v5, v13
-; GFX8-NEXT: v_cndmask_b32_e32 v8, v17, v8, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v5, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v5, v8, v5, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v13, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v5, v5, v13, vcc
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v8
-; GFX8-NEXT: v_cndmask_b32_e32 v5, v8, v5, vcc
-; GFX8-NEXT: v_cmp_gt_f32_e32 vcc, v6, v14
-; GFX8-NEXT: v_cndmask_b32_e32 v8, v14, v6, vcc
+; GFX8-NEXT: v_cndmask_b32_e32 v5, v17, v8, vcc
+; GFX8-NEXT: v_max_f32_e32 v8, v6, v14
; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v6, v14
-; GFX8-NEXT: v_cndmask_b32_e32 v8, v17, v8, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v6, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v6, v8, v6, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v14, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v6, v6, v14, vcc
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v8
-; GFX8-NEXT: v_cndmask_b32_e32 v6, v8, v6, vcc
-; GFX8-NEXT: v_cmp_gt_f32_e32 vcc, v7, v15
-; GFX8-NEXT: v_cndmask_b32_e32 v8, v15, v7, vcc
+; GFX8-NEXT: v_cndmask_b32_e32 v6, v17, v8, vcc
+; GFX8-NEXT: v_max_f32_e32 v8, v7, v15
; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v7, v15
-; GFX8-NEXT: v_cndmask_b32_e32 v8, v17, v8, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v7, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v7, v8, v7, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v15, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v7, v7, v15, vcc
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v8
-; GFX8-NEXT: v_cndmask_b32_e32 v7, v8, v7, vcc
+; GFX8-NEXT: v_cndmask_b32_e32 v7, v17, v8, vcc
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_maximum_v8f32:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v0, v8
-; GFX9-NEXT: v_cndmask_b32_e32 v16, v8, v0, vcc
+; GFX9-NEXT: v_max_f32_e32 v16, v0, v8
; GFX9-NEXT: v_mov_b32_e32 v17, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v8
-; GFX9-NEXT: v_cndmask_b32_e32 v16, v17, v16, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v0, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v16, v0, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v8, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v16, v0, vcc
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v1, v9
-; GFX9-NEXT: v_cndmask_b32_e32 v8, v9, v1, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v0, v17, v16, vcc
+; GFX9-NEXT: v_max_f32_e32 v8, v1, v9
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v9
-; GFX9-NEXT: v_cndmask_b32_e32 v8, v17, v8, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v1, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v8, v1, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v9, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v8
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v8, v1, vcc
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v2, v10
-; GFX9-NEXT: v_cndmask_b32_e32 v8, v10, v2, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v17, v8, vcc
+; GFX9-NEXT: v_max_f32_e32 v8, v2, v10
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v2, v10
-; GFX9-NEXT: v_cndmask_b32_e32 v8, v17, v8, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v2, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v10, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v10, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v8
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v3, v11
-; GFX9-NEXT: v_cndmask_b32_e32 v8, v11, v3, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v2, v17, v8, vcc
+; GFX9-NEXT: v_max_f32_e32 v8, v3, v11
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v3, v11
-; GFX9-NEXT: v_cndmask_b32_e32 v8, v17, v8, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v3, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v8, v3, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v11, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v3, v11, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v8
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v8, v3, vcc
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v4, v12
-; GFX9-NEXT: v_cndmask_b32_e32 v8, v12, v4, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v3, v17, v8, vcc
+; GFX9-NEXT: v_max_f32_e32 v8, v4, v12
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v4, v12
-; GFX9-NEXT: v_cndmask_b32_e32 v8, v17, v8, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v4, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v8, v4, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v12, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v4, v12, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v8
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v8, v4, vcc
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v5, v13
-; GFX9-NEXT: v_cndmask_b32_e32 v8, v13, v5, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v4, v17, v8, vcc
+; GFX9-NEXT: v_max_f32_e32 v8, v5, v13
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v5, v13
-; GFX9-NEXT: v_cndmask_b32_e32 v8, v17, v8, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v5, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v5, v8, v5, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v13, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v5, v5, v13, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v8
-; GFX9-NEXT: v_cndmask_b32_e32 v5, v8, v5, vcc
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v6, v14
-; GFX9-NEXT: v_cndmask_b32_e32 v8, v14, v6, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v5, v17, v8, vcc
+; GFX9-NEXT: v_max_f32_e32 v8, v6, v14
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v6, v14
-; GFX9-NEXT: v_cndmask_b32_e32 v8, v17, v8, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v6, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v6, v8, v6, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v14, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v6, v6, v14, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v8
-; GFX9-NEXT: v_cndmask_b32_e32 v6, v8, v6, vcc
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v7, v15
-; GFX9-NEXT: v_cndmask_b32_e32 v8, v15, v7, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v6, v17, v8, vcc
+; GFX9-NEXT: v_max_f32_e32 v8, v7, v15
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v7, v15
-; GFX9-NEXT: v_cndmask_b32_e32 v8, v17, v8, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v7, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v7, v8, v7, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v15, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v7, v7, v15, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v8
-; GFX9-NEXT: v_cndmask_b32_e32 v7, v8, v7, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v7, v17, v8, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX940-LABEL: v_maximum_v8f32:
; GFX940: ; %bb.0:
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_cmp_gt_f32_e32 vcc, v0, v8
+; GFX940-NEXT: v_max_f32_e32 v16, v0, v8
; GFX940-NEXT: v_mov_b32_e32 v17, 0x7fc00000
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v8, v0, vcc
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v8
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v17, v16, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v0, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v16, v0, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v8, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v16, v0, vcc
-; GFX940-NEXT: v_cmp_gt_f32_e32 vcc, v1, v9
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v8, v9, v1, vcc
+; GFX940-NEXT: v_max_f32_e32 v8, v1, v9
+; GFX940-NEXT: s_nop 0
+; GFX940-NEXT: v_cndmask_b32_e32 v0, v17, v16, vcc
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v9
; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v8, v17, v8, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v1, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v8, v1, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v9, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v8
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v8, v1, vcc
-; GFX940-NEXT: v_cmp_gt_f32_e32 vcc, v2, v10
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v8, v10, v2, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v1, v17, v8, vcc
+; GFX940-NEXT: v_max_f32_e32 v8, v2, v10
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v2, v10
; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v8, v17, v8, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v2, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v10, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v2, v10, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v8
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc
-; GFX940-NEXT: v_cmp_gt_f32_e32 vcc, v3, v11
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v8, v11, v3, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v2, v17, v8, vcc
+; GFX940-NEXT: v_max_f32_e32 v8, v3, v11
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v3, v11
; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v8, v17, v8, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v3, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v8, v3, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v11, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v3, v11, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v8
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v8, v3, vcc
-; GFX940-NEXT: v_cmp_gt_f32_e32 vcc, v4, v12
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v8, v12, v4, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v3, v17, v8, vcc
+; GFX940-NEXT: v_max_f32_e32 v8, v4, v12
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v4, v12
; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v8, v17, v8, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v4, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v8, v4, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v12, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v4, v12, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v8
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v8, v4, vcc
-; GFX940-NEXT: v_cmp_gt_f32_e32 vcc, v5, v13
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v8, v13, v5, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v4, v17, v8, vcc
+; GFX940-NEXT: v_max_f32_e32 v8, v5, v13
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v5, v13
; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v8, v17, v8, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v5, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v5, v8, v5, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v13, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v5, v5, v13, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v8
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v5, v8, v5, vcc
-; GFX940-NEXT: v_cmp_gt_f32_e32 vcc, v6, v14
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v8, v14, v6, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v5, v17, v8, vcc
+; GFX940-NEXT: v_max_f32_e32 v8, v6, v14
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v6, v14
; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v8, v17, v8, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v6, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v6, v8, v6, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v14, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v6, v6, v14, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v8
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v6, v8, v6, vcc
-; GFX940-NEXT: v_cmp_gt_f32_e32 vcc, v7, v15
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v8, v15, v7, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v6, v17, v8, vcc
+; GFX940-NEXT: v_max_f32_e32 v8, v7, v15
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v7, v15
; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v8, v17, v8, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v7, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v7, v8, v7, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v15, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v7, v7, v15, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v8
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v7, v8, v7, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v7, v17, v8, vcc
; GFX940-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximum_v8f32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, v0, v8
-; GFX10-NEXT: v_cndmask_b32_e32 v16, v8, v0, vcc_lo
-; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, v1, v9
-; GFX10-NEXT: v_cndmask_b32_e32 v17, v9, v1, vcc_lo
+; GFX10-NEXT: v_max_f32_e32 v16, v0, v8
; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v0, v8
-; GFX10-NEXT: v_cndmask_b32_e32 v16, 0x7fc00000, v16, vcc_lo
+; GFX10-NEXT: v_max_f32_e32 v17, v1, v9
+; GFX10-NEXT: v_max_f32_e32 v8, v2, v10
+; GFX10-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v16, vcc_lo
; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v1, v9
-; GFX10-NEXT: v_cndmask_b32_e32 v17, 0x7fc00000, v17, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v0, 64
-; GFX10-NEXT: v_cndmask_b32_e32 v0, v16, v0, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v1, 64
-; GFX10-NEXT: v_cndmask_b32_e32 v1, v17, v1, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v8, 64
-; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v9, 64
-; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc_lo
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v16
-; GFX10-NEXT: v_cndmask_b32_e32 v0, v16, v0, vcc_lo
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v17
-; GFX10-NEXT: v_cndmask_b32_e32 v1, v17, v1, vcc_lo
-; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, v2, v10
-; GFX10-NEXT: v_cndmask_b32_e32 v8, v10, v2, vcc_lo
-; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, v3, v11
-; GFX10-NEXT: v_cndmask_b32_e32 v9, v11, v3, vcc_lo
-; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, v4, v12
-; GFX10-NEXT: v_cndmask_b32_e32 v16, v12, v4, vcc_lo
+; GFX10-NEXT: v_max_f32_e32 v9, v3, v11
+; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x7fc00000, v17, vcc_lo
; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v2, v10
-; GFX10-NEXT: v_cndmask_b32_e32 v8, 0x7fc00000, v8, vcc_lo
+; GFX10-NEXT: v_max_f32_e32 v10, v7, v15
+; GFX10-NEXT: v_cndmask_b32_e32 v2, 0x7fc00000, v8, vcc_lo
; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v3, v11
-; GFX10-NEXT: v_cndmask_b32_e32 v9, 0x7fc00000, v9, vcc_lo
+; GFX10-NEXT: v_max_f32_e32 v8, v4, v12
+; GFX10-NEXT: v_cndmask_b32_e32 v3, 0x7fc00000, v9, vcc_lo
; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v4, v12
-; GFX10-NEXT: v_cndmask_b32_e32 v16, 0x7fc00000, v16, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v2, 64
-; GFX10-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v3, 64
-; GFX10-NEXT: v_cndmask_b32_e32 v3, v9, v3, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v4, 64
-; GFX10-NEXT: v_cndmask_b32_e32 v4, v16, v4, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v10, 64
-; GFX10-NEXT: v_cndmask_b32_e32 v2, v2, v10, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v11, 64
-; GFX10-NEXT: v_cndmask_b32_e32 v3, v3, v11, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v12, 64
-; GFX10-NEXT: v_cndmask_b32_e32 v4, v4, v12, vcc_lo
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v8
-; GFX10-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc_lo
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v9
-; GFX10-NEXT: v_cndmask_b32_e32 v3, v9, v3, vcc_lo
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v16
-; GFX10-NEXT: v_cndmask_b32_e32 v4, v16, v4, vcc_lo
-; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, v5, v13
-; GFX10-NEXT: v_cndmask_b32_e32 v8, v13, v5, vcc_lo
-; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, v6, v14
-; GFX10-NEXT: v_cndmask_b32_e32 v9, v14, v6, vcc_lo
-; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, v7, v15
-; GFX10-NEXT: v_cndmask_b32_e32 v10, v15, v7, vcc_lo
+; GFX10-NEXT: v_max_f32_e32 v9, v5, v13
+; GFX10-NEXT: v_cndmask_b32_e32 v4, 0x7fc00000, v8, vcc_lo
; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v5, v13
-; GFX10-NEXT: v_cndmask_b32_e32 v8, 0x7fc00000, v8, vcc_lo
+; GFX10-NEXT: v_max_f32_e32 v8, v6, v14
+; GFX10-NEXT: v_cndmask_b32_e32 v5, 0x7fc00000, v9, vcc_lo
; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v6, v14
-; GFX10-NEXT: v_cndmask_b32_e32 v9, 0x7fc00000, v9, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e32 v6, 0x7fc00000, v8, vcc_lo
; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v7, v15
-; GFX10-NEXT: v_cndmask_b32_e32 v10, 0x7fc00000, v10, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v5, 64
-; GFX10-NEXT: v_cndmask_b32_e32 v5, v8, v5, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v6, 64
-; GFX10-NEXT: v_cndmask_b32_e32 v6, v9, v6, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v7, 64
-; GFX10-NEXT: v_cndmask_b32_e32 v7, v10, v7, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v13, 64
-; GFX10-NEXT: v_cndmask_b32_e32 v5, v5, v13, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v14, 64
-; GFX10-NEXT: v_cndmask_b32_e32 v6, v6, v14, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v15, 64
-; GFX10-NEXT: v_cndmask_b32_e32 v7, v7, v15, vcc_lo
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v8
-; GFX10-NEXT: v_cndmask_b32_e32 v5, v8, v5, vcc_lo
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v9
-; GFX10-NEXT: v_cndmask_b32_e32 v6, v9, v6, vcc_lo
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v10
-; GFX10-NEXT: v_cndmask_b32_e32 v7, v10, v7, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e32 v7, 0x7fc00000, v10, vcc_lo
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_maximum_v8f32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, v0, v8
-; GFX11-NEXT: v_cndmask_b32_e32 v16, v8, v0, vcc_lo
-; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, v1, v9
-; GFX11-NEXT: v_cndmask_b32_e32 v17, v9, v1, vcc_lo
+; GFX11-NEXT: v_dual_max_f32 v16, v0, v8 :: v_dual_max_f32 v17, v1, v9
; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v0, v8
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4)
-; GFX11-NEXT: v_cndmask_b32_e32 v16, 0x7fc00000, v16, vcc_lo
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v16, vcc_lo
; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v1, v9
-; GFX11-NEXT: v_cndmask_b32_e32 v17, 0x7fc00000, v17, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v0, 64
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4)
-; GFX11-NEXT: v_cndmask_b32_e32 v0, v16, v0, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v1, 64
-; GFX11-NEXT: v_cndmask_b32_e32 v1, v17, v1, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v8, 64
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4)
-; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v9, 64
-; GFX11-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc_lo
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v16
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4)
-; GFX11-NEXT: v_cndmask_b32_e32 v0, v16, v0, vcc_lo
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v17
-; GFX11-NEXT: v_cndmask_b32_e32 v1, v17, v1, vcc_lo
-; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, v2, v10
-; GFX11-NEXT: v_cndmask_b32_e32 v8, v10, v2, vcc_lo
-; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, v3, v11
-; GFX11-NEXT: v_cndmask_b32_e32 v9, v11, v3, vcc_lo
-; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, v4, v12
-; GFX11-NEXT: v_cndmask_b32_e32 v16, v12, v4, vcc_lo
+; GFX11-NEXT: v_dual_max_f32 v9, v3, v11 :: v_dual_max_f32 v8, v2, v10
+; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7fc00000, v17, vcc_lo
; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v2, v10
-; GFX11-NEXT: v_cndmask_b32_e32 v8, 0x7fc00000, v8, vcc_lo
+; GFX11-NEXT: v_max_f32_e32 v10, v7, v15
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_3) | instid1(VALU_DEP_2)
+; GFX11-NEXT: v_cndmask_b32_e32 v2, 0x7fc00000, v8, vcc_lo
; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v3, v11
-; GFX11-NEXT: v_cndmask_b32_e32 v9, 0x7fc00000, v9, vcc_lo
+; GFX11-NEXT: v_dual_max_f32 v8, v4, v12 :: v_dual_cndmask_b32 v3, 0x7fc00000, v9
; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v4, v12
-; GFX11-NEXT: v_cndmask_b32_e32 v16, 0x7fc00000, v16, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v2, 64
-; GFX11-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v3, 64
-; GFX11-NEXT: v_cndmask_b32_e32 v3, v9, v3, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v4, 64
-; GFX11-NEXT: v_cndmask_b32_e32 v4, v16, v4, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v10, 64
-; GFX11-NEXT: v_cndmask_b32_e32 v2, v2, v10, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v11, 64
-; GFX11-NEXT: v_cndmask_b32_e32 v3, v3, v11, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v12, 64
-; GFX11-NEXT: v_cndmask_b32_e32 v4, v4, v12, vcc_lo
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v8
-; GFX11-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc_lo
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v9
-; GFX11-NEXT: v_cndmask_b32_e32 v3, v9, v3, vcc_lo
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v16
-; GFX11-NEXT: v_cndmask_b32_e32 v4, v16, v4, vcc_lo
-; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, v5, v13
-; GFX11-NEXT: v_cndmask_b32_e32 v8, v13, v5, vcc_lo
-; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, v6, v14
-; GFX11-NEXT: v_cndmask_b32_e32 v9, v14, v6, vcc_lo
-; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, v7, v15
-; GFX11-NEXT: v_cndmask_b32_e32 v10, v15, v7, vcc_lo
+; GFX11-NEXT: v_dual_max_f32 v9, v5, v13 :: v_dual_cndmask_b32 v4, 0x7fc00000, v8
; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v5, v13
-; GFX11-NEXT: v_cndmask_b32_e32 v8, 0x7fc00000, v8, vcc_lo
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-NEXT: v_dual_max_f32 v8, v6, v14 :: v_dual_cndmask_b32 v5, 0x7fc00000, v9
; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v6, v14
-; GFX11-NEXT: v_cndmask_b32_e32 v9, 0x7fc00000, v9, vcc_lo
+; GFX11-NEXT: v_cndmask_b32_e32 v6, 0x7fc00000, v8, vcc_lo
; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v7, v15
-; GFX11-NEXT: v_cndmask_b32_e32 v10, 0x7fc00000, v10, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v5, 64
-; GFX11-NEXT: v_cndmask_b32_e32 v5, v8, v5, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v6, 64
-; GFX11-NEXT: v_cndmask_b32_e32 v6, v9, v6, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v7, 64
-; GFX11-NEXT: v_cndmask_b32_e32 v7, v10, v7, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v13, 64
-; GFX11-NEXT: v_cndmask_b32_e32 v5, v5, v13, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v14, 64
-; GFX11-NEXT: v_cndmask_b32_e32 v6, v6, v14, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v15, 64
-; GFX11-NEXT: v_cndmask_b32_e32 v7, v7, v15, vcc_lo
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v8
-; GFX11-NEXT: v_cndmask_b32_e32 v5, v8, v5, vcc_lo
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v9
-; GFX11-NEXT: v_cndmask_b32_e32 v6, v9, v6, vcc_lo
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v10
-; GFX11-NEXT: v_cndmask_b32_e32 v7, v10, v7, vcc_lo
+; GFX11-NEXT: v_cndmask_b32_e32 v7, 0x7fc00000, v10, vcc_lo
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_maximum_v8f32:
@@ -3244,1071 +1835,371 @@ define <16 x float> @v_maximum_v16f32(<16 x float> %src0, <16 x float> %src1) {
; GFX7-LABEL: v_maximum_v16f32:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_max_legacy_f32_e32 v32, v0, v16
-; GFX7-NEXT: v_mov_b32_e32 v31, 0x7fc00000
-; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v0, v16
-; GFX7-NEXT: v_cndmask_b32_e32 v32, v31, v32, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v0, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v32, v0, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v16, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v0, v16, vcc
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v32
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v32, v0, vcc
-; GFX7-NEXT: v_max_legacy_f32_e32 v16, v1, v17
+; GFX7-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; GFX7-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
+; GFX7-NEXT: s_mov_b64 exec, s[4:5]
+; GFX7-NEXT: v_cmp_o_f32_e64 s[16:17], v0, v16
+; GFX7-NEXT: v_max_f32_e32 v0, v0, v16
+; GFX7-NEXT: buffer_load_dword v16, off, s[0:3], s32
+; GFX7-NEXT: v_writelane_b32 v31, s30, 0
; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v1, v17
-; GFX7-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v1, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v16, v1, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v17, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v1, v17, vcc
-; GFX7-NEXT: buffer_load_dword v17, off, s[0:3], s32
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v16, v1, vcc
-; GFX7-NEXT: v_max_legacy_f32_e32 v16, v2, v18
-; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v2, v18
-; GFX7-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v2, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v2, v16, v2, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v18, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v2, v2, v18, vcc
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX7-NEXT: v_cndmask_b32_e32 v2, v16, v2, vcc
-; GFX7-NEXT: v_max_legacy_f32_e32 v16, v3, v19
-; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v3, v19
-; GFX7-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v3, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v3, v16, v3, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v19, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v3, v3, v19, vcc
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX7-NEXT: v_cndmask_b32_e32 v3, v16, v3, vcc
-; GFX7-NEXT: v_max_legacy_f32_e32 v16, v4, v20
-; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v4, v20
-; GFX7-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v4, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v4, v16, v4, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v20, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v4, v4, v20, vcc
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX7-NEXT: v_cndmask_b32_e32 v4, v16, v4, vcc
-; GFX7-NEXT: v_max_legacy_f32_e32 v16, v5, v21
-; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v5, v21
-; GFX7-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v5, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v5, v16, v5, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v21, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v5, v5, v21, vcc
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX7-NEXT: v_cndmask_b32_e32 v5, v16, v5, vcc
-; GFX7-NEXT: v_max_legacy_f32_e32 v16, v6, v22
-; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v6, v22
-; GFX7-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v6, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v6, v16, v6, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v22, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v6, v6, v22, vcc
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX7-NEXT: v_cndmask_b32_e32 v6, v16, v6, vcc
-; GFX7-NEXT: v_max_legacy_f32_e32 v16, v7, v23
-; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v7, v23
-; GFX7-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v7, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v7, v16, v7, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v23, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v7, v7, v23, vcc
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX7-NEXT: v_cndmask_b32_e32 v7, v16, v7, vcc
-; GFX7-NEXT: v_max_legacy_f32_e32 v16, v8, v24
-; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v8, v24
-; GFX7-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v8, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v8, v16, v8, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v24, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v8, v8, v24, vcc
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX7-NEXT: v_cndmask_b32_e32 v8, v16, v8, vcc
-; GFX7-NEXT: v_max_legacy_f32_e32 v16, v9, v25
-; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v9, v25
-; GFX7-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v9, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v9, v16, v9, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v25, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v9, v9, v25, vcc
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX7-NEXT: v_cndmask_b32_e32 v9, v16, v9, vcc
-; GFX7-NEXT: v_max_legacy_f32_e32 v16, v10, v26
-; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v10, v26
-; GFX7-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v10, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v10, v16, v10, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v26, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v10, v10, v26, vcc
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX7-NEXT: v_cndmask_b32_e32 v10, v16, v10, vcc
-; GFX7-NEXT: v_max_legacy_f32_e32 v16, v11, v27
-; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v11, v27
-; GFX7-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v11, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v11, v16, v11, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v27, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v11, v11, v27, vcc
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX7-NEXT: v_cndmask_b32_e32 v11, v16, v11, vcc
-; GFX7-NEXT: v_max_legacy_f32_e32 v16, v12, v28
-; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v12, v28
-; GFX7-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v12, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v12, v16, v12, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v28, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v12, v12, v28, vcc
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX7-NEXT: v_cndmask_b32_e32 v12, v16, v12, vcc
-; GFX7-NEXT: v_max_legacy_f32_e32 v16, v13, v29
-; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v13, v29
-; GFX7-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v13, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v13, v16, v13, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v29, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v13, v13, v29, vcc
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX7-NEXT: v_cndmask_b32_e32 v13, v16, v13, vcc
-; GFX7-NEXT: v_max_legacy_f32_e32 v16, v14, v30
-; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v14, v30
-; GFX7-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v14, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v14, v16, v14, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v30, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v14, v14, v30, vcc
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX7-NEXT: v_cndmask_b32_e32 v14, v16, v14, vcc
+; GFX7-NEXT: v_max_f32_e32 v1, v1, v17
+; GFX7-NEXT: v_cmp_o_f32_e64 s[4:5], v2, v18
+; GFX7-NEXT: v_max_f32_e32 v2, v2, v18
+; GFX7-NEXT: v_mov_b32_e32 v17, 0x7fc00000
+; GFX7-NEXT: v_max_f32_e32 v18, v13, v29
+; GFX7-NEXT: v_cmp_o_f32_e64 s[28:29], v13, v29
+; GFX7-NEXT: v_writelane_b32 v31, s31, 1
+; GFX7-NEXT: v_cmp_o_f32_e64 s[6:7], v3, v19
+; GFX7-NEXT: v_max_f32_e32 v3, v3, v19
+; GFX7-NEXT: v_cmp_o_f32_e64 s[8:9], v4, v20
+; GFX7-NEXT: v_max_f32_e32 v4, v4, v20
+; GFX7-NEXT: v_cmp_o_f32_e64 s[10:11], v5, v21
+; GFX7-NEXT: v_max_f32_e32 v5, v5, v21
+; GFX7-NEXT: v_cmp_o_f32_e64 s[12:13], v6, v22
+; GFX7-NEXT: v_max_f32_e32 v6, v6, v22
+; GFX7-NEXT: v_cmp_o_f32_e64 s[14:15], v7, v23
+; GFX7-NEXT: v_max_f32_e32 v7, v7, v23
+; GFX7-NEXT: v_cmp_o_f32_e64 s[18:19], v8, v24
+; GFX7-NEXT: v_max_f32_e32 v8, v8, v24
+; GFX7-NEXT: v_cmp_o_f32_e64 s[20:21], v9, v25
+; GFX7-NEXT: v_max_f32_e32 v9, v9, v25
+; GFX7-NEXT: v_cmp_o_f32_e64 s[22:23], v10, v26
+; GFX7-NEXT: v_max_f32_e32 v10, v10, v26
+; GFX7-NEXT: v_cmp_o_f32_e64 s[24:25], v11, v27
+; GFX7-NEXT: v_max_f32_e32 v11, v11, v27
+; GFX7-NEXT: v_cmp_o_f32_e64 s[26:27], v12, v28
+; GFX7-NEXT: v_max_f32_e32 v12, v12, v28
+; GFX7-NEXT: v_max_f32_e32 v19, v14, v30
+; GFX7-NEXT: v_cmp_o_f32_e64 s[30:31], v14, v30
+; GFX7-NEXT: v_cndmask_b32_e32 v1, v17, v1, vcc
+; GFX7-NEXT: v_cndmask_b32_e64 v13, v17, v18, s[28:29]
+; GFX7-NEXT: v_cndmask_b32_e64 v0, v17, v0, s[16:17]
+; GFX7-NEXT: v_cndmask_b32_e64 v2, v17, v2, s[4:5]
+; GFX7-NEXT: v_cndmask_b32_e64 v3, v17, v3, s[6:7]
+; GFX7-NEXT: v_cndmask_b32_e64 v4, v17, v4, s[8:9]
+; GFX7-NEXT: v_cndmask_b32_e64 v5, v17, v5, s[10:11]
+; GFX7-NEXT: v_cndmask_b32_e64 v6, v17, v6, s[12:13]
+; GFX7-NEXT: v_cndmask_b32_e64 v7, v17, v7, s[14:15]
+; GFX7-NEXT: v_cndmask_b32_e64 v8, v17, v8, s[18:19]
+; GFX7-NEXT: v_cndmask_b32_e64 v9, v17, v9, s[20:21]
+; GFX7-NEXT: v_cndmask_b32_e64 v10, v17, v10, s[22:23]
+; GFX7-NEXT: v_cndmask_b32_e64 v11, v17, v11, s[24:25]
+; GFX7-NEXT: v_cndmask_b32_e64 v12, v17, v12, s[26:27]
+; GFX7-NEXT: v_cndmask_b32_e64 v14, v17, v19, s[30:31]
+; GFX7-NEXT: v_readlane_b32 s31, v31, 1
+; GFX7-NEXT: v_readlane_b32 s30, v31, 0
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: v_max_f32_e32 v18, v15, v16
+; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v15, v16
+; GFX7-NEXT: v_cndmask_b32_e32 v15, v17, v18, vcc
+; GFX7-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
+; GFX7-NEXT: s_mov_b64 exec, s[4:5]
; GFX7-NEXT: s_waitcnt vmcnt(0)
-; GFX7-NEXT: v_max_legacy_f32_e32 v16, v15, v17
-; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v15, v17
-; GFX7-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v15, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v15, v16, v15, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v17, 64
-; GFX7-NEXT: v_cndmask_b32_e32 v15, v15, v17, vcc
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX7-NEXT: v_cndmask_b32_e32 v15, v16, v15, vcc
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_maximum_v16f32:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_cmp_gt_f32_e32 vcc, v0, v16
-; GFX8-NEXT: v_cndmask_b32_e32 v32, v16, v0, vcc
-; GFX8-NEXT: v_mov_b32_e32 v31, 0x7fc00000
-; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v0, v16
-; GFX8-NEXT: v_cndmask_b32_e32 v32, v31, v32, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v0, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v32, v0, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v16, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v16, vcc
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v32
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v32, v0, vcc
-; GFX8-NEXT: v_cmp_gt_f32_e32 vcc, v1, v17
-; GFX8-NEXT: v_cndmask_b32_e32 v16, v17, v1, vcc
+; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; GFX8-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
+; GFX8-NEXT: s_mov_b64 exec, s[4:5]
+; GFX8-NEXT: v_cmp_o_f32_e64 s[16:17], v0, v16
+; GFX8-NEXT: v_max_f32_e32 v0, v0, v16
+; GFX8-NEXT: buffer_load_dword v16, off, s[0:3], s32
+; GFX8-NEXT: v_writelane_b32 v31, s30, 0
; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v1, v17
-; GFX8-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v1, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v16, v1, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v17, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v17, vcc
-; GFX8-NEXT: buffer_load_dword v17, off, s[0:3], s32
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v16, v1, vcc
-; GFX8-NEXT: v_cmp_gt_f32_e32 vcc, v2, v18
-; GFX8-NEXT: v_cndmask_b32_e32 v16, v18, v2, vcc
-; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v2, v18
-; GFX8-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v2, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v16, v2, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v18, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v18, vcc
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v16, v2, vcc
-; GFX8-NEXT: v_cmp_gt_f32_e32 vcc, v3, v19
-; GFX8-NEXT: v_cndmask_b32_e32 v16, v19, v3, vcc
-; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v3, v19
-; GFX8-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v3, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v3, v16, v3, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v19, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v3, v3, v19, vcc
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX8-NEXT: v_cndmask_b32_e32 v3, v16, v3, vcc
-; GFX8-NEXT: v_cmp_gt_f32_e32 vcc, v4, v20
-; GFX8-NEXT: v_cndmask_b32_e32 v16, v20, v4, vcc
-; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v4, v20
-; GFX8-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v4, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v4, v16, v4, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v20, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v4, v4, v20, vcc
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX8-NEXT: v_cndmask_b32_e32 v4, v16, v4, vcc
-; GFX8-NEXT: v_cmp_gt_f32_e32 vcc, v5, v21
-; GFX8-NEXT: v_cndmask_b32_e32 v16, v21, v5, vcc
-; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v5, v21
-; GFX8-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v5, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v5, v16, v5, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v21, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v5, v5, v21, vcc
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX8-NEXT: v_cndmask_b32_e32 v5, v16, v5, vcc
-; GFX8-NEXT: v_cmp_gt_f32_e32 vcc, v6, v22
-; GFX8-NEXT: v_cndmask_b32_e32 v16, v22, v6, vcc
-; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v6, v22
-; GFX8-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v6, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v6, v16, v6, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v22, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v6, v6, v22, vcc
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX8-NEXT: v_cndmask_b32_e32 v6, v16, v6, vcc
-; GFX8-NEXT: v_cmp_gt_f32_e32 vcc, v7, v23
-; GFX8-NEXT: v_cndmask_b32_e32 v16, v23, v7, vcc
-; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v7, v23
-; GFX8-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v7, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v7, v16, v7, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v23, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v7, v7, v23, vcc
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX8-NEXT: v_cndmask_b32_e32 v7, v16, v7, vcc
-; GFX8-NEXT: v_cmp_gt_f32_e32 vcc, v8, v24
-; GFX8-NEXT: v_cndmask_b32_e32 v16, v24, v8, vcc
-; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v8, v24
-; GFX8-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v8, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v8, v16, v8, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v24, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v8, v8, v24, vcc
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX8-NEXT: v_cndmask_b32_e32 v8, v16, v8, vcc
-; GFX8-NEXT: v_cmp_gt_f32_e32 vcc, v9, v25
-; GFX8-NEXT: v_cndmask_b32_e32 v16, v25, v9, vcc
-; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v9, v25
-; GFX8-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v9, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v9, v16, v9, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v25, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v9, v9, v25, vcc
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX8-NEXT: v_cndmask_b32_e32 v9, v16, v9, vcc
-; GFX8-NEXT: v_cmp_gt_f32_e32 vcc, v10, v26
-; GFX8-NEXT: v_cndmask_b32_e32 v16, v26, v10, vcc
-; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v10, v26
-; GFX8-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v10, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v10, v16, v10, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v26, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v10, v10, v26, vcc
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX8-NEXT: v_cndmask_b32_e32 v10, v16, v10, vcc
-; GFX8-NEXT: v_cmp_gt_f32_e32 vcc, v11, v27
-; GFX8-NEXT: v_cndmask_b32_e32 v16, v27, v11, vcc
-; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v11, v27
-; GFX8-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v11, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v11, v16, v11, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v27, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v11, v11, v27, vcc
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX8-NEXT: v_cndmask_b32_e32 v11, v16, v11, vcc
-; GFX8-NEXT: v_cmp_gt_f32_e32 vcc, v12, v28
-; GFX8-NEXT: v_cndmask_b32_e32 v16, v28, v12, vcc
-; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v12, v28
-; GFX8-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v12, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v12, v16, v12, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v28, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v12, v12, v28, vcc
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX8-NEXT: v_cndmask_b32_e32 v12, v16, v12, vcc
-; GFX8-NEXT: v_cmp_gt_f32_e32 vcc, v13, v29
-; GFX8-NEXT: v_cndmask_b32_e32 v16, v29, v13, vcc
-; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v13, v29
-; GFX8-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v13, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v13, v16, v13, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v29, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v13, v13, v29, vcc
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX8-NEXT: v_cndmask_b32_e32 v13, v16, v13, vcc
-; GFX8-NEXT: v_cmp_gt_f32_e32 vcc, v14, v30
-; GFX8-NEXT: v_cndmask_b32_e32 v16, v30, v14, vcc
-; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v14, v30
-; GFX8-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v14, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v14, v16, v14, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v30, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v14, v14, v30, vcc
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX8-NEXT: v_cndmask_b32_e32 v14, v16, v14, vcc
+; GFX8-NEXT: v_max_f32_e32 v1, v1, v17
+; GFX8-NEXT: v_cmp_o_f32_e64 s[4:5], v2, v18
+; GFX8-NEXT: v_max_f32_e32 v2, v2, v18
+; GFX8-NEXT: v_mov_b32_e32 v17, 0x7fc00000
+; GFX8-NEXT: v_max_f32_e32 v18, v13, v29
+; GFX8-NEXT: v_cmp_o_f32_e64 s[28:29], v13, v29
+; GFX8-NEXT: v_writelane_b32 v31, s31, 1
+; GFX8-NEXT: v_cmp_o_f32_e64 s[6:7], v3, v19
+; GFX8-NEXT: v_max_f32_e32 v3, v3, v19
+; GFX8-NEXT: v_cmp_o_f32_e64 s[8:9], v4, v20
+; GFX8-NEXT: v_max_f32_e32 v4, v4, v20
+; GFX8-NEXT: v_cmp_o_f32_e64 s[10:11], v5, v21
+; GFX8-NEXT: v_max_f32_e32 v5, v5, v21
+; GFX8-NEXT: v_cmp_o_f32_e64 s[12:13], v6, v22
+; GFX8-NEXT: v_max_f32_e32 v6, v6, v22
+; GFX8-NEXT: v_cmp_o_f32_e64 s[14:15], v7, v23
+; GFX8-NEXT: v_max_f32_e32 v7, v7, v23
+; GFX8-NEXT: v_cmp_o_f32_e64 s[18:19], v8, v24
+; GFX8-NEXT: v_max_f32_e32 v8, v8, v24
+; GFX8-NEXT: v_cmp_o_f32_e64 s[20:21], v9, v25
+; GFX8-NEXT: v_max_f32_e32 v9, v9, v25
+; GFX8-NEXT: v_cmp_o_f32_e64 s[22:23], v10, v26
+; GFX8-NEXT: v_max_f32_e32 v10, v10, v26
+; GFX8-NEXT: v_cmp_o_f32_e64 s[24:25], v11, v27
+; GFX8-NEXT: v_max_f32_e32 v11, v11, v27
+; GFX8-NEXT: v_cmp_o_f32_e64 s[26:27], v12, v28
+; GFX8-NEXT: v_max_f32_e32 v12, v12, v28
+; GFX8-NEXT: v_max_f32_e32 v19, v14, v30
+; GFX8-NEXT: v_cmp_o_f32_e64 s[30:31], v14, v30
+; GFX8-NEXT: v_cndmask_b32_e32 v1, v17, v1, vcc
+; GFX8-NEXT: v_cndmask_b32_e64 v13, v17, v18, s[28:29]
+; GFX8-NEXT: v_cndmask_b32_e64 v0, v17, v0, s[16:17]
+; GFX8-NEXT: v_cndmask_b32_e64 v2, v17, v2, s[4:5]
+; GFX8-NEXT: v_cndmask_b32_e64 v3, v17, v3, s[6:7]
+; GFX8-NEXT: v_cndmask_b32_e64 v4, v17, v4, s[8:9]
+; GFX8-NEXT: v_cndmask_b32_e64 v5, v17, v5, s[10:11]
+; GFX8-NEXT: v_cndmask_b32_e64 v6, v17, v6, s[12:13]
+; GFX8-NEXT: v_cndmask_b32_e64 v7, v17, v7, s[14:15]
+; GFX8-NEXT: v_cndmask_b32_e64 v8, v17, v8, s[18:19]
+; GFX8-NEXT: v_cndmask_b32_e64 v9, v17, v9, s[20:21]
+; GFX8-NEXT: v_cndmask_b32_e64 v10, v17, v10, s[22:23]
+; GFX8-NEXT: v_cndmask_b32_e64 v11, v17, v11, s[24:25]
+; GFX8-NEXT: v_cndmask_b32_e64 v12, v17, v12, s[26:27]
+; GFX8-NEXT: v_cndmask_b32_e64 v14, v17, v19, s[30:31]
+; GFX8-NEXT: v_readlane_b32 s31, v31, 1
+; GFX8-NEXT: v_readlane_b32 s30, v31, 0
+; GFX8-NEXT: s_waitcnt vmcnt(0)
+; GFX8-NEXT: v_max_f32_e32 v18, v15, v16
+; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v15, v16
+; GFX8-NEXT: v_cndmask_b32_e32 v15, v17, v18, vcc
+; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
+; GFX8-NEXT: s_mov_b64 exec, s[4:5]
; GFX8-NEXT: s_waitcnt vmcnt(0)
-; GFX8-NEXT: v_cmp_gt_f32_e32 vcc, v15, v17
-; GFX8-NEXT: v_cndmask_b32_e32 v16, v17, v15, vcc
-; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v15, v17
-; GFX8-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v15, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v15, v16, v15, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v17, 64
-; GFX8-NEXT: v_cndmask_b32_e32 v15, v15, v17, vcc
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX8-NEXT: v_cndmask_b32_e32 v15, v16, v15, vcc
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_maximum_v16f32:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v0, v16
-; GFX9-NEXT: v_cndmask_b32_e32 v32, v16, v0, vcc
-; GFX9-NEXT: v_mov_b32_e32 v31, 0x7fc00000
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v16
-; GFX9-NEXT: v_cndmask_b32_e32 v32, v31, v32, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v0, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v32, v0, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v16, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v16, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v32
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v32, v0, vcc
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v1, v17
-; GFX9-NEXT: v_cndmask_b32_e32 v16, v17, v1, vcc
+; GFX9-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; GFX9-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
+; GFX9-NEXT: s_mov_b64 exec, s[4:5]
+; GFX9-NEXT: v_cmp_o_f32_e64 s[16:17], v0, v16
+; GFX9-NEXT: v_max_f32_e32 v0, v0, v16
+; GFX9-NEXT: buffer_load_dword v16, off, s[0:3], s32
+; GFX9-NEXT: v_writelane_b32 v31, s30, 0
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v17
-; GFX9-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v1, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v16, v1, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v17, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v17, vcc
-; GFX9-NEXT: buffer_load_dword v17, off, s[0:3], s32
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v16, v1, vcc
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v2, v18
-; GFX9-NEXT: v_cndmask_b32_e32 v16, v18, v2, vcc
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v2, v18
-; GFX9-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v2, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v16, v2, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v18, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v18, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v16, v2, vcc
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v3, v19
-; GFX9-NEXT: v_cndmask_b32_e32 v16, v19, v3, vcc
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v3, v19
-; GFX9-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v3, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v16, v3, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v19, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v3, v19, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v16, v3, vcc
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v4, v20
-; GFX9-NEXT: v_cndmask_b32_e32 v16, v20, v4, vcc
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v4, v20
-; GFX9-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v4, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v16, v4, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v20, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v4, v20, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v16, v4, vcc
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v5, v21
-; GFX9-NEXT: v_cndmask_b32_e32 v16, v21, v5, vcc
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v5, v21
-; GFX9-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v5, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v5, v16, v5, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v21, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v5, v5, v21, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX9-NEXT: v_cndmask_b32_e32 v5, v16, v5, vcc
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v6, v22
-; GFX9-NEXT: v_cndmask_b32_e32 v16, v22, v6, vcc
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v6, v22
-; GFX9-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v6, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v6, v16, v6, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v22, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v6, v6, v22, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX9-NEXT: v_cndmask_b32_e32 v6, v16, v6, vcc
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v7, v23
-; GFX9-NEXT: v_cndmask_b32_e32 v16, v23, v7, vcc
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v7, v23
-; GFX9-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v7, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v7, v16, v7, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v23, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v7, v7, v23, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX9-NEXT: v_cndmask_b32_e32 v7, v16, v7, vcc
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v8, v24
-; GFX9-NEXT: v_cndmask_b32_e32 v16, v24, v8, vcc
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v8, v24
-; GFX9-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v8, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v8, v16, v8, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v24, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v8, v8, v24, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX9-NEXT: v_cndmask_b32_e32 v8, v16, v8, vcc
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v9, v25
-; GFX9-NEXT: v_cndmask_b32_e32 v16, v25, v9, vcc
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v9, v25
-; GFX9-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v9, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v9, v16, v9, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v25, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v9, v9, v25, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX9-NEXT: v_cndmask_b32_e32 v9, v16, v9, vcc
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v10, v26
-; GFX9-NEXT: v_cndmask_b32_e32 v16, v26, v10, vcc
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v10, v26
-; GFX9-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v10, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v10, v16, v10, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v26, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v10, v10, v26, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX9-NEXT: v_cndmask_b32_e32 v10, v16, v10, vcc
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v11, v27
-; GFX9-NEXT: v_cndmask_b32_e32 v16, v27, v11, vcc
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v11, v27
-; GFX9-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v11, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v11, v16, v11, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v27, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v11, v11, v27, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX9-NEXT: v_cndmask_b32_e32 v11, v16, v11, vcc
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v12, v28
-; GFX9-NEXT: v_cndmask_b32_e32 v16, v28, v12, vcc
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v12, v28
-; GFX9-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v12, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v12, v16, v12, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v28, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v12, v12, v28, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX9-NEXT: v_cndmask_b32_e32 v12, v16, v12, vcc
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v13, v29
-; GFX9-NEXT: v_cndmask_b32_e32 v16, v29, v13, vcc
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v13, v29
-; GFX9-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v13, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v13, v16, v13, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v29, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v13, v13, v29, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX9-NEXT: v_cndmask_b32_e32 v13, v16, v13, vcc
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v14, v30
-; GFX9-NEXT: v_cndmask_b32_e32 v16, v30, v14, vcc
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v14, v30
-; GFX9-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v14, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v14, v16, v14, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v30, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v14, v14, v30, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX9-NEXT: v_cndmask_b32_e32 v14, v16, v14, vcc
+; GFX9-NEXT: v_max_f32_e32 v1, v1, v17
+; GFX9-NEXT: v_cmp_o_f32_e64 s[4:5], v2, v18
+; GFX9-NEXT: v_max_f32_e32 v2, v2, v18
+; GFX9-NEXT: v_mov_b32_e32 v17, 0x7fc00000
+; GFX9-NEXT: v_max_f32_e32 v18, v13, v29
+; GFX9-NEXT: v_cmp_o_f32_e64 s[28:29], v13, v29
+; GFX9-NEXT: v_writelane_b32 v31, s31, 1
+; GFX9-NEXT: v_cmp_o_f32_e64 s[6:7], v3, v19
+; GFX9-NEXT: v_max_f32_e32 v3, v3, v19
+; GFX9-NEXT: v_cmp_o_f32_e64 s[8:9], v4, v20
+; GFX9-NEXT: v_max_f32_e32 v4, v4, v20
+; GFX9-NEXT: v_cmp_o_f32_e64 s[10:11], v5, v21
+; GFX9-NEXT: v_max_f32_e32 v5, v5, v21
+; GFX9-NEXT: v_cmp_o_f32_e64 s[12:13], v6, v22
+; GFX9-NEXT: v_max_f32_e32 v6, v6, v22
+; GFX9-NEXT: v_cmp_o_f32_e64 s[14:15], v7, v23
+; GFX9-NEXT: v_max_f32_e32 v7, v7, v23
+; GFX9-NEXT: v_cmp_o_f32_e64 s[18:19], v8, v24
+; GFX9-NEXT: v_max_f32_e32 v8, v8, v24
+; GFX9-NEXT: v_cmp_o_f32_e64 s[20:21], v9, v25
+; GFX9-NEXT: v_max_f32_e32 v9, v9, v25
+; GFX9-NEXT: v_cmp_o_f32_e64 s[22:23], v10, v26
+; GFX9-NEXT: v_max_f32_e32 v10, v10, v26
+; GFX9-NEXT: v_cmp_o_f32_e64 s[24:25], v11, v27
+; GFX9-NEXT: v_max_f32_e32 v11, v11, v27
+; GFX9-NEXT: v_cmp_o_f32_e64 s[26:27], v12, v28
+; GFX9-NEXT: v_max_f32_e32 v12, v12, v28
+; GFX9-NEXT: v_max_f32_e32 v19, v14, v30
+; GFX9-NEXT: v_cmp_o_f32_e64 s[30:31], v14, v30
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v17, v1, vcc
+; GFX9-NEXT: v_cndmask_b32_e64 v13, v17, v18, s[28:29]
+; GFX9-NEXT: v_cndmask_b32_e64 v0, v17, v0, s[16:17]
+; GFX9-NEXT: v_cndmask_b32_e64 v2, v17, v2, s[4:5]
+; GFX9-NEXT: v_cndmask_b32_e64 v3, v17, v3, s[6:7]
+; GFX9-NEXT: v_cndmask_b32_e64 v4, v17, v4, s[8:9]
+; GFX9-NEXT: v_cndmask_b32_e64 v5, v17, v5, s[10:11]
+; GFX9-NEXT: v_cndmask_b32_e64 v6, v17, v6, s[12:13]
+; GFX9-NEXT: v_cndmask_b32_e64 v7, v17, v7, s[14:15]
+; GFX9-NEXT: v_cndmask_b32_e64 v8, v17, v8, s[18:19]
+; GFX9-NEXT: v_cndmask_b32_e64 v9, v17, v9, s[20:21]
+; GFX9-NEXT: v_cndmask_b32_e64 v10, v17, v10, s[22:23]
+; GFX9-NEXT: v_cndmask_b32_e64 v11, v17, v11, s[24:25]
+; GFX9-NEXT: v_cndmask_b32_e64 v12, v17, v12, s[26:27]
+; GFX9-NEXT: v_cndmask_b32_e64 v14, v17, v19, s[30:31]
+; GFX9-NEXT: v_readlane_b32 s31, v31, 1
+; GFX9-NEXT: v_readlane_b32 s30, v31, 0
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: v_max_f32_e32 v18, v15, v16
+; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v15, v16
+; GFX9-NEXT: v_cndmask_b32_e32 v15, v17, v18, vcc
+; GFX9-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
+; GFX9-NEXT: s_mov_b64 exec, s[4:5]
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v15, v17
-; GFX9-NEXT: v_cndmask_b32_e32 v16, v17, v15, vcc
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v15, v17
-; GFX9-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v15, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v15, v16, v15, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v17, 64
-; GFX9-NEXT: v_cndmask_b32_e32 v15, v15, v17, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX9-NEXT: v_cndmask_b32_e32 v15, v16, v15, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX940-LABEL: v_maximum_v16f32:
; GFX940: ; %bb.0:
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX940-NEXT: scratch_load_dword v31, off, s32
-; GFX940-NEXT: v_cmp_gt_f32_e32 vcc, v0, v16
; GFX940-NEXT: v_mov_b32_e32 v32, 0x7fc00000
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v33, v16, v0, vcc
+; GFX940-NEXT: v_max_f32_e32 v33, v0, v16
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v16
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v33, v32, v33, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v0, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v33, v0, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v16, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v0, v16, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v33
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v33, v0, vcc
-; GFX940-NEXT: v_cmp_gt_f32_e32 vcc, v1, v17
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v17, v1, vcc
+; GFX940-NEXT: v_max_f32_e32 v34, v1, v17
+; GFX940-NEXT: v_max_f32_e32 v35, v2, v18
+; GFX940-NEXT: v_cndmask_b32_e32 v0, v32, v33, vcc
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v17
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v32, v16, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v1, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v16, v1, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v17, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v1, v17, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v16, v1, vcc
-; GFX940-NEXT: v_cmp_gt_f32_e32 vcc, v2, v18
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v18, v2, vcc
+; GFX940-NEXT: v_max_f32_e32 v36, v3, v19
+; GFX940-NEXT: v_max_f32_e32 v37, v4, v20
+; GFX940-NEXT: v_cndmask_b32_e32 v1, v32, v34, vcc
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v2, v18
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v32, v16, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v2, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v16, v2, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v18, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v2, v18, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v16, v2, vcc
-; GFX940-NEXT: v_cmp_gt_f32_e32 vcc, v3, v19
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v19, v3, vcc
+; GFX940-NEXT: v_max_f32_e32 v38, v5, v21
+; GFX940-NEXT: v_max_f32_e32 v39, v6, v22
+; GFX940-NEXT: v_cndmask_b32_e32 v2, v32, v35, vcc
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v3, v19
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v32, v16, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v3, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v16, v3, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v19, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v3, v19, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v16, v3, vcc
-; GFX940-NEXT: v_cmp_gt_f32_e32 vcc, v4, v20
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v20, v4, vcc
+; GFX940-NEXT: v_max_f32_e32 v48, v7, v23
+; GFX940-NEXT: v_max_f32_e32 v49, v8, v24
+; GFX940-NEXT: v_cndmask_b32_e32 v3, v32, v36, vcc
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v4, v20
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v32, v16, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v4, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v16, v4, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v20, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v4, v20, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v16, v4, vcc
-; GFX940-NEXT: v_cmp_gt_f32_e32 vcc, v5, v21
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v21, v5, vcc
+; GFX940-NEXT: v_max_f32_e32 v50, v9, v25
+; GFX940-NEXT: v_max_f32_e32 v51, v10, v26
+; GFX940-NEXT: v_cndmask_b32_e32 v4, v32, v37, vcc
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v5, v21
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v32, v16, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v5, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v5, v16, v5, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v21, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v5, v5, v21, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v5, v16, v5, vcc
-; GFX940-NEXT: v_cmp_gt_f32_e32 vcc, v6, v22
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v22, v6, vcc
+; GFX940-NEXT: v_max_f32_e32 v52, v11, v27
+; GFX940-NEXT: v_max_f32_e32 v53, v12, v28
+; GFX940-NEXT: v_cndmask_b32_e32 v5, v32, v38, vcc
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v6, v22
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v32, v16, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v6, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v6, v16, v6, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v22, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v6, v6, v22, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v6, v16, v6, vcc
-; GFX940-NEXT: v_cmp_gt_f32_e32 vcc, v7, v23
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v23, v7, vcc
+; GFX940-NEXT: v_max_f32_e32 v54, v13, v29
+; GFX940-NEXT: v_max_f32_e32 v55, v14, v30
+; GFX940-NEXT: v_cndmask_b32_e32 v6, v32, v39, vcc
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v7, v23
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v32, v16, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v7, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v7, v16, v7, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v23, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v7, v7, v23, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v7, v16, v7, vcc
-; GFX940-NEXT: v_cmp_gt_f32_e32 vcc, v8, v24
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v24, v8, vcc
+; GFX940-NEXT: s_waitcnt vmcnt(0)
+; GFX940-NEXT: v_max_f32_e32 v16, v15, v31
+; GFX940-NEXT: v_cndmask_b32_e32 v7, v32, v48, vcc
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v8, v24
; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v32, v16, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v8, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v8, v16, v8, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v24, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v8, v8, v24, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v8, v16, v8, vcc
-; GFX940-NEXT: v_cmp_gt_f32_e32 vcc, v9, v25
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v25, v9, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v8, v32, v49, vcc
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v9, v25
; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v32, v16, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v9, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v9, v16, v9, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v25, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v9, v9, v25, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v9, v16, v9, vcc
-; GFX940-NEXT: v_cmp_gt_f32_e32 vcc, v10, v26
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v26, v10, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v9, v32, v50, vcc
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v10, v26
; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v32, v16, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v10, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v10, v16, v10, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v26, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v10, v10, v26, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v10, v16, v10, vcc
-; GFX940-NEXT: v_cmp_gt_f32_e32 vcc, v11, v27
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v27, v11, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v10, v32, v51, vcc
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v11, v27
; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v32, v16, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v11, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v11, v16, v11, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v27, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v11, v11, v27, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v11, v16, v11, vcc
-; GFX940-NEXT: v_cmp_gt_f32_e32 vcc, v12, v28
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v28, v12, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v11, v32, v52, vcc
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v12, v28
; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v32, v16, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v12, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v12, v16, v12, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v28, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v12, v12, v28, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v12, v16, v12, vcc
-; GFX940-NEXT: v_cmp_gt_f32_e32 vcc, v13, v29
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v29, v13, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v12, v32, v53, vcc
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v13, v29
; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v32, v16, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v13, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v13, v16, v13, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v29, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v13, v13, v29, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v13, v16, v13, vcc
-; GFX940-NEXT: v_cmp_gt_f32_e32 vcc, v14, v30
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v30, v14, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v13, v32, v54, vcc
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v14, v30
; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v32, v16, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v14, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v14, v16, v14, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v30, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v14, v14, v30, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v14, v16, v14, vcc
-; GFX940-NEXT: s_waitcnt vmcnt(0)
-; GFX940-NEXT: v_cmp_gt_f32_e32 vcc, v15, v31
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v31, v15, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v14, v32, v55, vcc
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v15, v31
; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v32, v16, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v15, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v15, v16, v15, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v31, 64
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v15, v15, v31, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v15, v16, v15, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v15, v32, v16, vcc
; GFX940-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximum_v16f32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, v0, v16
; GFX10-NEXT: buffer_load_dword v31, off, s[0:3], s32
-; GFX10-NEXT: v_cndmask_b32_e32 v32, v16, v0, vcc_lo
-; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, v1, v17
-; GFX10-NEXT: v_cndmask_b32_e32 v33, v17, v1, vcc_lo
-; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, v2, v18
-; GFX10-NEXT: v_cndmask_b32_e32 v34, v18, v2, vcc_lo
-; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, v3, v19
-; GFX10-NEXT: v_cndmask_b32_e32 v35, v19, v3, vcc_lo
-; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, v4, v20
-; GFX10-NEXT: v_cndmask_b32_e32 v36, v20, v4, vcc_lo
-; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, v5, v21
-; GFX10-NEXT: v_cndmask_b32_e32 v37, v21, v5, vcc_lo
-; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, v6, v22
-; GFX10-NEXT: v_cndmask_b32_e32 v38, v22, v6, vcc_lo
-; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, v7, v23
-; GFX10-NEXT: v_cndmask_b32_e32 v39, v23, v7, vcc_lo
-; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, v8, v24
-; GFX10-NEXT: v_cndmask_b32_e32 v48, v24, v8, vcc_lo
-; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, v9, v25
-; GFX10-NEXT: v_cndmask_b32_e32 v49, v25, v9, vcc_lo
-; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, v10, v26
-; GFX10-NEXT: v_cndmask_b32_e32 v50, v26, v10, vcc_lo
-; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, v11, v27
-; GFX10-NEXT: v_cndmask_b32_e32 v51, v27, v11, vcc_lo
-; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, v12, v28
-; GFX10-NEXT: v_cndmask_b32_e32 v52, v28, v12, vcc_lo
-; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, v13, v29
-; GFX10-NEXT: v_cndmask_b32_e32 v53, v29, v13, vcc_lo
-; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, v14, v30
-; GFX10-NEXT: v_cndmask_b32_e32 v54, v30, v14, vcc_lo
+; GFX10-NEXT: v_max_f32_e32 v32, v0, v16
; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v0, v16
-; GFX10-NEXT: v_cndmask_b32_e32 v32, 0x7fc00000, v32, vcc_lo
+; GFX10-NEXT: v_max_f32_e32 v33, v1, v17
+; GFX10-NEXT: v_max_f32_e32 v34, v2, v18
+; GFX10-NEXT: v_max_f32_e32 v35, v3, v19
+; GFX10-NEXT: v_max_f32_e32 v36, v4, v20
+; GFX10-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v32, vcc_lo
; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v1, v17
-; GFX10-NEXT: v_cndmask_b32_e32 v33, 0x7fc00000, v33, vcc_lo
+; GFX10-NEXT: v_max_f32_e32 v37, v5, v21
+; GFX10-NEXT: v_max_f32_e32 v38, v6, v22
+; GFX10-NEXT: v_max_f32_e32 v39, v7, v23
+; GFX10-NEXT: v_max_f32_e32 v48, v8, v24
+; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x7fc00000, v33, vcc_lo
; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v2, v18
-; GFX10-NEXT: v_cndmask_b32_e32 v34, 0x7fc00000, v34, vcc_lo
+; GFX10-NEXT: v_max_f32_e32 v49, v9, v25
+; GFX10-NEXT: v_max_f32_e32 v50, v10, v26
+; GFX10-NEXT: v_max_f32_e32 v51, v11, v27
+; GFX10-NEXT: v_max_f32_e32 v52, v12, v28
+; GFX10-NEXT: v_cndmask_b32_e32 v2, 0x7fc00000, v34, vcc_lo
; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v3, v19
-; GFX10-NEXT: v_cndmask_b32_e32 v35, 0x7fc00000, v35, vcc_lo
+; GFX10-NEXT: v_max_f32_e32 v53, v13, v29
+; GFX10-NEXT: v_max_f32_e32 v54, v14, v30
+; GFX10-NEXT: v_cndmask_b32_e32 v3, 0x7fc00000, v35, vcc_lo
; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v4, v20
-; GFX10-NEXT: v_cndmask_b32_e32 v36, 0x7fc00000, v36, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e32 v4, 0x7fc00000, v36, vcc_lo
; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v5, v21
-; GFX10-NEXT: v_cndmask_b32_e32 v37, 0x7fc00000, v37, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e32 v5, 0x7fc00000, v37, vcc_lo
; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v6, v22
-; GFX10-NEXT: v_cndmask_b32_e32 v38, 0x7fc00000, v38, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e32 v6, 0x7fc00000, v38, vcc_lo
; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v7, v23
-; GFX10-NEXT: v_cndmask_b32_e32 v39, 0x7fc00000, v39, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e32 v7, 0x7fc00000, v39, vcc_lo
; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v8, v24
-; GFX10-NEXT: v_cndmask_b32_e32 v48, 0x7fc00000, v48, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e32 v8, 0x7fc00000, v48, vcc_lo
; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v9, v25
-; GFX10-NEXT: v_cndmask_b32_e32 v49, 0x7fc00000, v49, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e32 v9, 0x7fc00000, v49, vcc_lo
; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v10, v26
-; GFX10-NEXT: v_cndmask_b32_e32 v50, 0x7fc00000, v50, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e32 v10, 0x7fc00000, v50, vcc_lo
; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v11, v27
-; GFX10-NEXT: v_cndmask_b32_e32 v51, 0x7fc00000, v51, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e32 v11, 0x7fc00000, v51, vcc_lo
; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v12, v28
-; GFX10-NEXT: v_cndmask_b32_e32 v52, 0x7fc00000, v52, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e32 v12, 0x7fc00000, v52, vcc_lo
; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v13, v29
-; GFX10-NEXT: v_cndmask_b32_e32 v53, 0x7fc00000, v53, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e32 v13, 0x7fc00000, v53, vcc_lo
; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v14, v30
-; GFX10-NEXT: v_cndmask_b32_e32 v54, 0x7fc00000, v54, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v0, 64
-; GFX10-NEXT: v_cndmask_b32_e32 v0, v32, v0, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v1, 64
-; GFX10-NEXT: v_cndmask_b32_e32 v1, v33, v1, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v2, 64
-; GFX10-NEXT: v_cndmask_b32_e32 v2, v34, v2, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v3, 64
-; GFX10-NEXT: v_cndmask_b32_e32 v3, v35, v3, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v4, 64
-; GFX10-NEXT: v_cndmask_b32_e32 v4, v36, v4, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v5, 64
-; GFX10-NEXT: v_cndmask_b32_e32 v5, v37, v5, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v6, 64
-; GFX10-NEXT: v_cndmask_b32_e32 v6, v38, v6, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v7, 64
-; GFX10-NEXT: v_cndmask_b32_e32 v7, v39, v7, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v8, 64
-; GFX10-NEXT: v_cndmask_b32_e32 v8, v48, v8, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v9, 64
-; GFX10-NEXT: v_cndmask_b32_e32 v9, v49, v9, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v10, 64
-; GFX10-NEXT: v_cndmask_b32_e32 v10, v50, v10, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v11, 64
-; GFX10-NEXT: v_cndmask_b32_e32 v11, v51, v11, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v12, 64
-; GFX10-NEXT: v_cndmask_b32_e32 v12, v52, v12, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v13, 64
-; GFX10-NEXT: v_cndmask_b32_e32 v13, v53, v13, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v14, 64
-; GFX10-NEXT: v_cndmask_b32_e32 v14, v54, v14, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v16, 64
-; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v16, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v17, 64
-; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v17, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v18, 64
-; GFX10-NEXT: v_cndmask_b32_e32 v2, v2, v18, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v19, 64
-; GFX10-NEXT: v_cndmask_b32_e32 v3, v3, v19, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v20, 64
-; GFX10-NEXT: v_cndmask_b32_e32 v4, v4, v20, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v21, 64
-; GFX10-NEXT: v_cndmask_b32_e32 v5, v5, v21, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v22, 64
-; GFX10-NEXT: v_cndmask_b32_e32 v6, v6, v22, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v23, 64
-; GFX10-NEXT: v_cndmask_b32_e32 v7, v7, v23, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v24, 64
-; GFX10-NEXT: v_cndmask_b32_e32 v8, v8, v24, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v25, 64
-; GFX10-NEXT: v_cndmask_b32_e32 v9, v9, v25, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v26, 64
-; GFX10-NEXT: v_cndmask_b32_e32 v10, v10, v26, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v27, 64
-; GFX10-NEXT: v_cndmask_b32_e32 v11, v11, v27, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v28, 64
-; GFX10-NEXT: v_cndmask_b32_e32 v12, v12, v28, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v29, 64
-; GFX10-NEXT: v_cndmask_b32_e32 v13, v13, v29, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v30, 64
-; GFX10-NEXT: v_cndmask_b32_e32 v14, v14, v30, vcc_lo
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v32
-; GFX10-NEXT: v_cndmask_b32_e32 v0, v32, v0, vcc_lo
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v33
-; GFX10-NEXT: v_cndmask_b32_e32 v1, v33, v1, vcc_lo
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v34
-; GFX10-NEXT: v_cndmask_b32_e32 v2, v34, v2, vcc_lo
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v35
-; GFX10-NEXT: v_cndmask_b32_e32 v3, v35, v3, vcc_lo
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v36
-; GFX10-NEXT: v_cndmask_b32_e32 v4, v36, v4, vcc_lo
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v37
-; GFX10-NEXT: v_cndmask_b32_e32 v5, v37, v5, vcc_lo
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v38
-; GFX10-NEXT: v_cndmask_b32_e32 v6, v38, v6, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e32 v14, 0x7fc00000, v54, vcc_lo
; GFX10-NEXT: s_waitcnt vmcnt(0)
-; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, v15, v31
-; GFX10-NEXT: v_cndmask_b32_e32 v16, v31, v15, vcc_lo
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v39
-; GFX10-NEXT: v_cndmask_b32_e32 v7, v39, v7, vcc_lo
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v48
-; GFX10-NEXT: v_cndmask_b32_e32 v8, v48, v8, vcc_lo
+; GFX10-NEXT: v_max_f32_e32 v16, v15, v31
; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v15, v31
-; GFX10-NEXT: v_cndmask_b32_e32 v16, 0x7fc00000, v16, vcc_lo
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v49
-; GFX10-NEXT: v_cndmask_b32_e32 v9, v49, v9, vcc_lo
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v50
-; GFX10-NEXT: v_cndmask_b32_e32 v10, v50, v10, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v15, 64
-; GFX10-NEXT: v_cndmask_b32_e32 v15, v16, v15, vcc_lo
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v51
-; GFX10-NEXT: v_cndmask_b32_e32 v11, v51, v11, vcc_lo
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v52
-; GFX10-NEXT: v_cndmask_b32_e32 v12, v52, v12, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v31, 64
-; GFX10-NEXT: v_cndmask_b32_e32 v15, v15, v31, vcc_lo
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v53
-; GFX10-NEXT: v_cndmask_b32_e32 v13, v53, v13, vcc_lo
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v54
-; GFX10-NEXT: v_cndmask_b32_e32 v14, v54, v14, vcc_lo
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v16
-; GFX10-NEXT: v_cndmask_b32_e32 v15, v16, v15, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e32 v15, 0x7fc00000, v16, vcc_lo
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_maximum_v16f32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, v0, v16
; GFX11-NEXT: scratch_load_b32 v31, off, s32
-; GFX11-NEXT: v_cndmask_b32_e32 v32, v16, v0, vcc_lo
-; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, v1, v17
-; GFX11-NEXT: v_cndmask_b32_e32 v33, v17, v1, vcc_lo
-; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, v2, v18
-; GFX11-NEXT: v_cndmask_b32_e32 v34, v18, v2, vcc_lo
-; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, v3, v19
-; GFX11-NEXT: v_cndmask_b32_e32 v35, v19, v3, vcc_lo
-; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, v4, v20
-; GFX11-NEXT: v_cndmask_b32_e32 v36, v20, v4, vcc_lo
-; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, v5, v21
-; GFX11-NEXT: v_cndmask_b32_e32 v37, v21, v5, vcc_lo
-; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, v6, v22
-; GFX11-NEXT: v_cndmask_b32_e32 v38, v22, v6, vcc_lo
-; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, v7, v23
-; GFX11-NEXT: v_cndmask_b32_e32 v39, v23, v7, vcc_lo
-; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, v8, v24
-; GFX11-NEXT: v_cndmask_b32_e32 v48, v24, v8, vcc_lo
-; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, v9, v25
-; GFX11-NEXT: v_cndmask_b32_e32 v49, v25, v9, vcc_lo
-; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, v10, v26
-; GFX11-NEXT: v_cndmask_b32_e32 v50, v26, v10, vcc_lo
-; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, v11, v27
-; GFX11-NEXT: v_cndmask_b32_e32 v51, v27, v11, vcc_lo
-; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, v12, v28
-; GFX11-NEXT: v_cndmask_b32_e32 v52, v28, v12, vcc_lo
-; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, v13, v29
-; GFX11-NEXT: v_cndmask_b32_e32 v53, v29, v13, vcc_lo
-; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, v14, v30
-; GFX11-NEXT: v_cndmask_b32_e32 v54, v30, v14, vcc_lo
+; GFX11-NEXT: v_dual_max_f32 v32, v0, v16 :: v_dual_max_f32 v33, v1, v17
; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v0, v16
-; GFX11-NEXT: v_cndmask_b32_e32 v32, 0x7fc00000, v32, vcc_lo
+; GFX11-NEXT: v_dual_max_f32 v34, v2, v18 :: v_dual_max_f32 v35, v3, v19
+; GFX11-NEXT: v_dual_max_f32 v36, v4, v20 :: v_dual_max_f32 v37, v5, v21
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4)
+; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v32, vcc_lo
; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v1, v17
-; GFX11-NEXT: v_cndmask_b32_e32 v33, 0x7fc00000, v33, vcc_lo
+; GFX11-NEXT: v_max_f32_e32 v54, v14, v30
+; GFX11-NEXT: v_dual_max_f32 v38, v6, v22 :: v_dual_max_f32 v39, v7, v23
+; GFX11-NEXT: v_dual_max_f32 v48, v8, v24 :: v_dual_max_f32 v49, v9, v25
+; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7fc00000, v33, vcc_lo
; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v2, v18
-; GFX11-NEXT: v_cndmask_b32_e32 v34, 0x7fc00000, v34, vcc_lo
+; GFX11-NEXT: v_dual_max_f32 v50, v10, v26 :: v_dual_max_f32 v51, v11, v27
+; GFX11-NEXT: v_dual_max_f32 v52, v12, v28 :: v_dual_max_f32 v53, v13, v29
+; GFX11-NEXT: v_cndmask_b32_e32 v2, 0x7fc00000, v34, vcc_lo
; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v3, v19
-; GFX11-NEXT: v_cndmask_b32_e32 v35, 0x7fc00000, v35, vcc_lo
+; GFX11-NEXT: v_cndmask_b32_e32 v3, 0x7fc00000, v35, vcc_lo
; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v4, v20
-; GFX11-NEXT: v_cndmask_b32_e32 v36, 0x7fc00000, v36, vcc_lo
+; GFX11-NEXT: v_cndmask_b32_e32 v4, 0x7fc00000, v36, vcc_lo
; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v5, v21
-; GFX11-NEXT: v_cndmask_b32_e32 v37, 0x7fc00000, v37, vcc_lo
+; GFX11-NEXT: v_cndmask_b32_e32 v5, 0x7fc00000, v37, vcc_lo
; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v6, v22
-; GFX11-NEXT: v_cndmask_b32_e32 v38, 0x7fc00000, v38, vcc_lo
+; GFX11-NEXT: v_cndmask_b32_e32 v6, 0x7fc00000, v38, vcc_lo
; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v7, v23
-; GFX11-NEXT: v_cndmask_b32_e32 v39, 0x7fc00000, v39, vcc_lo
+; GFX11-NEXT: v_cndmask_b32_e32 v7, 0x7fc00000, v39, vcc_lo
; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v8, v24
-; GFX11-NEXT: v_cndmask_b32_e32 v48, 0x7fc00000, v48, vcc_lo
+; GFX11-NEXT: v_cndmask_b32_e32 v8, 0x7fc00000, v48, vcc_lo
; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v9, v25
-; GFX11-NEXT: v_cndmask_b32_e32 v49, 0x7fc00000, v49, vcc_lo
+; GFX11-NEXT: v_cndmask_b32_e32 v9, 0x7fc00000, v49, vcc_lo
; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v10, v26
-; GFX11-NEXT: v_cndmask_b32_e32 v50, 0x7fc00000, v50, vcc_lo
+; GFX11-NEXT: v_cndmask_b32_e32 v10, 0x7fc00000, v50, vcc_lo
; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v11, v27
-; GFX11-NEXT: v_cndmask_b32_e32 v51, 0x7fc00000, v51, vcc_lo
+; GFX11-NEXT: v_cndmask_b32_e32 v11, 0x7fc00000, v51, vcc_lo
; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v12, v28
-; GFX11-NEXT: v_cndmask_b32_e32 v52, 0x7fc00000, v52, vcc_lo
+; GFX11-NEXT: v_cndmask_b32_e32 v12, 0x7fc00000, v52, vcc_lo
; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v13, v29
-; GFX11-NEXT: v_cndmask_b32_e32 v53, 0x7fc00000, v53, vcc_lo
+; GFX11-NEXT: v_cndmask_b32_e32 v13, 0x7fc00000, v53, vcc_lo
; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v14, v30
-; GFX11-NEXT: v_cndmask_b32_e32 v54, 0x7fc00000, v54, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v0, 64
-; GFX11-NEXT: v_cndmask_b32_e32 v0, v32, v0, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v1, 64
-; GFX11-NEXT: v_cndmask_b32_e32 v1, v33, v1, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v2, 64
-; GFX11-NEXT: v_cndmask_b32_e32 v2, v34, v2, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v3, 64
-; GFX11-NEXT: v_cndmask_b32_e32 v3, v35, v3, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v4, 64
-; GFX11-NEXT: v_cndmask_b32_e32 v4, v36, v4, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v5, 64
-; GFX11-NEXT: v_cndmask_b32_e32 v5, v37, v5, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v6, 64
-; GFX11-NEXT: v_cndmask_b32_e32 v6, v38, v6, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v7, 64
-; GFX11-NEXT: v_cndmask_b32_e32 v7, v39, v7, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v8, 64
-; GFX11-NEXT: v_cndmask_b32_e32 v8, v48, v8, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v9, 64
-; GFX11-NEXT: v_cndmask_b32_e32 v9, v49, v9, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v10, 64
-; GFX11-NEXT: v_cndmask_b32_e32 v10, v50, v10, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v11, 64
-; GFX11-NEXT: v_cndmask_b32_e32 v11, v51, v11, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v12, 64
-; GFX11-NEXT: v_cndmask_b32_e32 v12, v52, v12, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v13, 64
-; GFX11-NEXT: v_cndmask_b32_e32 v13, v53, v13, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v14, 64
-; GFX11-NEXT: v_cndmask_b32_e32 v14, v54, v14, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v16, 64
-; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v16, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v17, 64
-; GFX11-NEXT: v_cndmask_b32_e32 v1, v1, v17, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v18, 64
-; GFX11-NEXT: v_cndmask_b32_e32 v2, v2, v18, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v19, 64
-; GFX11-NEXT: v_cndmask_b32_e32 v3, v3, v19, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v20, 64
-; GFX11-NEXT: v_cndmask_b32_e32 v4, v4, v20, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v21, 64
-; GFX11-NEXT: v_cndmask_b32_e32 v5, v5, v21, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v22, 64
-; GFX11-NEXT: v_cndmask_b32_e32 v6, v6, v22, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v23, 64
-; GFX11-NEXT: v_cndmask_b32_e32 v7, v7, v23, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v24, 64
-; GFX11-NEXT: v_cndmask_b32_e32 v8, v8, v24, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v25, 64
-; GFX11-NEXT: v_cndmask_b32_e32 v9, v9, v25, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v26, 64
-; GFX11-NEXT: v_cndmask_b32_e32 v10, v10, v26, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v27, 64
-; GFX11-NEXT: v_cndmask_b32_e32 v11, v11, v27, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v28, 64
-; GFX11-NEXT: v_cndmask_b32_e32 v12, v12, v28, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v29, 64
-; GFX11-NEXT: v_cndmask_b32_e32 v13, v13, v29, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v30, 64
-; GFX11-NEXT: v_cndmask_b32_e32 v14, v14, v30, vcc_lo
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v32
-; GFX11-NEXT: v_cndmask_b32_e32 v0, v32, v0, vcc_lo
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v33
-; GFX11-NEXT: v_cndmask_b32_e32 v1, v33, v1, vcc_lo
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v34
-; GFX11-NEXT: v_cndmask_b32_e32 v2, v34, v2, vcc_lo
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v35
-; GFX11-NEXT: v_cndmask_b32_e32 v3, v35, v3, vcc_lo
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v36
-; GFX11-NEXT: v_cndmask_b32_e32 v4, v36, v4, vcc_lo
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v37
-; GFX11-NEXT: v_cndmask_b32_e32 v5, v37, v5, vcc_lo
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v38
-; GFX11-NEXT: v_cndmask_b32_e32 v6, v38, v6, vcc_lo
+; GFX11-NEXT: v_cndmask_b32_e32 v14, 0x7fc00000, v54, vcc_lo
; GFX11-NEXT: s_waitcnt vmcnt(0)
-; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, v15, v31
-; GFX11-NEXT: v_cndmask_b32_e32 v16, v31, v15, vcc_lo
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v39
-; GFX11-NEXT: v_cndmask_b32_e32 v7, v39, v7, vcc_lo
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v48
-; GFX11-NEXT: v_cndmask_b32_e32 v8, v48, v8, vcc_lo
+; GFX11-NEXT: v_max_f32_e32 v16, v15, v31
; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v15, v31
-; GFX11-NEXT: v_cndmask_b32_e32 v16, 0x7fc00000, v16, vcc_lo
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v49
-; GFX11-NEXT: v_cndmask_b32_e32 v9, v49, v9, vcc_lo
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v50
-; GFX11-NEXT: v_cndmask_b32_e32 v10, v50, v10, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v15, 64
-; GFX11-NEXT: v_cndmask_b32_e32 v15, v16, v15, vcc_lo
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v51
-; GFX11-NEXT: v_cndmask_b32_e32 v11, v51, v11, vcc_lo
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v52
-; GFX11-NEXT: v_cndmask_b32_e32 v12, v52, v12, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v31, 64
-; GFX11-NEXT: v_cndmask_b32_e32 v15, v15, v31, vcc_lo
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v53
-; GFX11-NEXT: v_cndmask_b32_e32 v13, v53, v13, vcc_lo
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v54
-; GFX11-NEXT: v_cndmask_b32_e32 v14, v54, v14, vcc_lo
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v16
-; GFX11-NEXT: v_cndmask_b32_e32 v15, v16, v15, vcc_lo
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX11-NEXT: v_cndmask_b32_e32 v15, 0x7fc00000, v16, vcc_lo
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_maximum_v16f32:
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.maximum.f64.ll b/llvm/test/CodeGen/AMDGPU/llvm.maximum.f64.ll
index d60a28e74043..78fb23182f80 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.maximum.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.maximum.f64.ll
@@ -530,221 +530,86 @@ define <2 x double> @v_maximum_v2f64(<2 x double> %src0, <2 x double> %src1) {
; GFX7-LABEL: v_maximum_v2f64:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_cmp_gt_f64_e32 vcc, v[0:1], v[4:5]
-; GFX7-NEXT: v_cmp_o_f64_e64 s[4:5], v[0:1], v[4:5]
-; GFX7-NEXT: v_mov_b32_e32 v10, 0x7ff80000
-; GFX7-NEXT: v_cmp_class_f64_e64 s[6:7], v[0:1], 64
-; GFX7-NEXT: v_cmp_class_f64_e64 s[8:9], v[4:5], 64
-; GFX7-NEXT: v_cndmask_b32_e32 v8, v5, v1, vcc
-; GFX7-NEXT: v_cndmask_b32_e64 v9, v10, v8, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e32 v8, v4, v0, vcc
-; GFX7-NEXT: v_cmp_gt_f64_e32 vcc, v[2:3], v[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v8, 0, v8, s[4:5]
-; GFX7-NEXT: v_cmp_o_f64_e64 s[4:5], v[2:3], v[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v0, v8, v0, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v0, v0, v4, s[8:9]
-; GFX7-NEXT: v_cmp_eq_f64_e64 s[10:11], 0, v[8:9]
-; GFX7-NEXT: v_cndmask_b32_e64 v1, v9, v1, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v1, v1, v5, s[8:9]
-; GFX7-NEXT: v_cndmask_b32_e32 v11, v7, v3, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v4, v6, v2, vcc
-; GFX7-NEXT: v_cmp_class_f64_e64 vcc, v[2:3], 64
-; GFX7-NEXT: v_cndmask_b32_e64 v11, v10, v11, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v10, 0, v4, s[4:5]
-; GFX7-NEXT: v_cmp_class_f64_e64 s[4:5], v[6:7], 64
-; GFX7-NEXT: v_cmp_eq_f64_e64 s[12:13], 0, v[10:11]
-; GFX7-NEXT: v_cndmask_b32_e64 v0, v8, v0, s[10:11]
-; GFX7-NEXT: v_cndmask_b32_e64 v1, v9, v1, s[10:11]
-; GFX7-NEXT: v_cndmask_b32_e32 v2, v10, v2, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v3, v11, v3, vcc
-; GFX7-NEXT: v_cndmask_b32_e64 v2, v2, v6, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v3, v3, v7, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v2, v10, v2, s[12:13]
-; GFX7-NEXT: v_cndmask_b32_e64 v3, v11, v3, s[12:13]
+; GFX7-NEXT: v_max_f64 v[8:9], v[0:1], v[4:5]
+; GFX7-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
+; GFX7-NEXT: v_max_f64 v[4:5], v[2:3], v[6:7]
+; GFX7-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[6:7]
+; GFX7-NEXT: v_mov_b32_e32 v3, 0x7ff80000
+; GFX7-NEXT: v_cndmask_b32_e64 v0, v8, 0, vcc
+; GFX7-NEXT: v_cndmask_b32_e32 v1, v9, v3, vcc
+; GFX7-NEXT: v_cndmask_b32_e64 v2, v4, 0, s[4:5]
+; GFX7-NEXT: v_cndmask_b32_e64 v3, v5, v3, s[4:5]
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_maximum_v2f64:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_cmp_gt_f64_e32 vcc, v[0:1], v[4:5]
-; GFX8-NEXT: v_cmp_o_f64_e64 s[4:5], v[0:1], v[4:5]
-; GFX8-NEXT: v_mov_b32_e32 v10, 0x7ff80000
-; GFX8-NEXT: v_cmp_class_f64_e64 s[6:7], v[0:1], 64
-; GFX8-NEXT: v_cmp_class_f64_e64 s[8:9], v[4:5], 64
-; GFX8-NEXT: v_cndmask_b32_e32 v8, v5, v1, vcc
-; GFX8-NEXT: v_cndmask_b32_e64 v9, v10, v8, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e32 v8, v4, v0, vcc
-; GFX8-NEXT: v_cmp_gt_f64_e32 vcc, v[2:3], v[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v8, 0, v8, s[4:5]
-; GFX8-NEXT: v_cmp_o_f64_e64 s[4:5], v[2:3], v[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v0, v8, v0, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v0, v0, v4, s[8:9]
-; GFX8-NEXT: v_cmp_eq_f64_e64 s[10:11], 0, v[8:9]
-; GFX8-NEXT: v_cndmask_b32_e64 v1, v9, v1, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v1, v1, v5, s[8:9]
-; GFX8-NEXT: v_cndmask_b32_e32 v11, v7, v3, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v4, v6, v2, vcc
-; GFX8-NEXT: v_cmp_class_f64_e64 vcc, v[2:3], 64
-; GFX8-NEXT: v_cndmask_b32_e64 v11, v10, v11, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v10, 0, v4, s[4:5]
-; GFX8-NEXT: v_cmp_class_f64_e64 s[4:5], v[6:7], 64
-; GFX8-NEXT: v_cmp_eq_f64_e64 s[12:13], 0, v[10:11]
-; GFX8-NEXT: v_cndmask_b32_e64 v0, v8, v0, s[10:11]
-; GFX8-NEXT: v_cndmask_b32_e64 v1, v9, v1, s[10:11]
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v10, v2, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v3, v11, v3, vcc
-; GFX8-NEXT: v_cndmask_b32_e64 v2, v2, v6, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v3, v3, v7, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v2, v10, v2, s[12:13]
-; GFX8-NEXT: v_cndmask_b32_e64 v3, v11, v3, s[12:13]
+; GFX8-NEXT: v_max_f64 v[8:9], v[0:1], v[4:5]
+; GFX8-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
+; GFX8-NEXT: v_max_f64 v[4:5], v[2:3], v[6:7]
+; GFX8-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[6:7]
+; GFX8-NEXT: v_mov_b32_e32 v3, 0x7ff80000
+; GFX8-NEXT: v_cndmask_b32_e64 v0, v8, 0, vcc
+; GFX8-NEXT: v_cndmask_b32_e32 v1, v9, v3, vcc
+; GFX8-NEXT: v_cndmask_b32_e64 v2, v4, 0, s[4:5]
+; GFX8-NEXT: v_cndmask_b32_e64 v3, v5, v3, s[4:5]
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_maximum_v2f64:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_cmp_gt_f64_e32 vcc, v[0:1], v[4:5]
-; GFX9-NEXT: v_cmp_o_f64_e64 s[4:5], v[0:1], v[4:5]
-; GFX9-NEXT: v_mov_b32_e32 v10, 0x7ff80000
-; GFX9-NEXT: v_cmp_class_f64_e64 s[6:7], v[0:1], 64
-; GFX9-NEXT: v_cmp_class_f64_e64 s[8:9], v[4:5], 64
-; GFX9-NEXT: v_cndmask_b32_e32 v8, v5, v1, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v9, v10, v8, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e32 v8, v4, v0, vcc
-; GFX9-NEXT: v_cmp_gt_f64_e32 vcc, v[2:3], v[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v8, 0, v8, s[4:5]
-; GFX9-NEXT: v_cmp_o_f64_e64 s[4:5], v[2:3], v[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v8, v0, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v0, v4, s[8:9]
-; GFX9-NEXT: v_cmp_eq_f64_e64 s[10:11], 0, v[8:9]
-; GFX9-NEXT: v_cndmask_b32_e64 v1, v9, v1, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v1, v1, v5, s[8:9]
-; GFX9-NEXT: v_cndmask_b32_e32 v11, v7, v3, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v6, v2, vcc
-; GFX9-NEXT: v_cmp_class_f64_e64 vcc, v[2:3], 64
-; GFX9-NEXT: v_cndmask_b32_e64 v11, v10, v11, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v10, 0, v4, s[4:5]
-; GFX9-NEXT: v_cmp_class_f64_e64 s[4:5], v[6:7], 64
-; GFX9-NEXT: v_cmp_eq_f64_e64 s[12:13], 0, v[10:11]
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v8, v0, s[10:11]
-; GFX9-NEXT: v_cndmask_b32_e64 v1, v9, v1, s[10:11]
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v10, v2, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v11, v3, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v2, v2, v6, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v3, v3, v7, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v2, v10, v2, s[12:13]
-; GFX9-NEXT: v_cndmask_b32_e64 v3, v11, v3, s[12:13]
+; GFX9-NEXT: v_max_f64 v[8:9], v[0:1], v[4:5]
+; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
+; GFX9-NEXT: v_max_f64 v[4:5], v[2:3], v[6:7]
+; GFX9-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[6:7]
+; GFX9-NEXT: v_mov_b32_e32 v3, 0x7ff80000
+; GFX9-NEXT: v_cndmask_b32_e64 v0, v8, 0, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v9, v3, vcc
+; GFX9-NEXT: v_cndmask_b32_e64 v2, v4, 0, s[4:5]
+; GFX9-NEXT: v_cndmask_b32_e64 v3, v5, v3, s[4:5]
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX940-LABEL: v_maximum_v2f64:
; GFX940: ; %bb.0:
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_cmp_gt_f64_e32 vcc, v[0:1], v[4:5]
-; GFX940-NEXT: v_mov_b32_e32 v10, 0x7ff80000
-; GFX940-NEXT: v_cmp_o_f64_e64 s[0:1], v[0:1], v[4:5]
-; GFX940-NEXT: v_cndmask_b32_e32 v8, v5, v1, vcc
+; GFX940-NEXT: v_max_f64 v[8:9], v[0:1], v[4:5]
+; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
+; GFX940-NEXT: v_max_f64 v[4:5], v[2:3], v[6:7]
; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e64 v9, v10, v8, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v8, v4, v0, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v8, 0, v8, s[0:1]
-; GFX940-NEXT: v_cmp_class_f64_e64 vcc, v[0:1], 64
-; GFX940-NEXT: v_cmp_class_f64_e64 s[0:1], v[4:5], 64
-; GFX940-NEXT: v_cmp_eq_f64_e64 s[2:3], 0, v[8:9]
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v9, v1, vcc
-; GFX940-NEXT: v_cmp_gt_f64_e32 vcc, v[2:3], v[6:7]
-; GFX940-NEXT: v_cndmask_b32_e64 v0, v0, v4, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v1, v1, v5, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v7, v3, vcc
-; GFX940-NEXT: v_cmp_o_f64_e64 s[0:1], v[2:3], v[6:7]
-; GFX940-NEXT: v_cndmask_b32_e64 v0, v8, v0, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v1, v9, v1, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v5, v10, v4, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v6, v2, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v4, 0, v4, s[0:1]
-; GFX940-NEXT: v_cmp_class_f64_e64 vcc, v[2:3], 64
-; GFX940-NEXT: v_cmp_class_f64_e64 s[0:1], v[6:7], 64
-; GFX940-NEXT: v_cmp_eq_f64_e64 s[2:3], 0, v[4:5]
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v5, v3, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v2, v2, v6, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v3, v3, v7, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v2, v4, v2, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v3, v5, v3, s[2:3]
+; GFX940-NEXT: v_cndmask_b32_e64 v0, v8, 0, vcc
+; GFX940-NEXT: v_mov_b32_e32 v8, 0x7ff80000
+; GFX940-NEXT: v_cndmask_b32_e32 v1, v9, v8, vcc
+; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[6:7]
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e64 v2, v4, 0, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v3, v5, v8, vcc
; GFX940-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximum_v2f64:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_cmp_gt_f64_e32 vcc_lo, v[0:1], v[4:5]
-; GFX10-NEXT: v_cmp_gt_f64_e64 s4, v[2:3], v[6:7]
-; GFX10-NEXT: v_cmp_o_f64_e64 s5, v[0:1], v[4:5]
-; GFX10-NEXT: v_cmp_o_f64_e64 s6, v[2:3], v[6:7]
-; GFX10-NEXT: v_cndmask_b32_e32 v8, v5, v1, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v10, v7, v3, s4
-; GFX10-NEXT: v_cndmask_b32_e32 v12, v4, v0, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v13, v6, v2, s4
-; GFX10-NEXT: v_cmp_class_f64_e64 vcc_lo, v[0:1], 64
-; GFX10-NEXT: v_cmp_class_f64_e64 s4, v[2:3], 64
-; GFX10-NEXT: v_cndmask_b32_e64 v9, 0x7ff80000, v8, s5
-; GFX10-NEXT: v_cndmask_b32_e64 v11, 0x7ff80000, v10, s6
-; GFX10-NEXT: v_cndmask_b32_e64 v8, 0, v12, s5
-; GFX10-NEXT: v_cndmask_b32_e64 v10, 0, v13, s6
-; GFX10-NEXT: v_cmp_class_f64_e64 s5, v[4:5], 64
-; GFX10-NEXT: v_cmp_class_f64_e64 s6, v[6:7], 64
-; GFX10-NEXT: v_cmp_eq_f64_e64 s7, 0, v[8:9]
-; GFX10-NEXT: v_cmp_eq_f64_e64 s8, 0, v[10:11]
-; GFX10-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v2, v10, v2, s4
-; GFX10-NEXT: v_cndmask_b32_e32 v1, v9, v1, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v3, v11, v3, s4
-; GFX10-NEXT: v_cndmask_b32_e64 v0, v0, v4, s5
-; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, v6, s6
-; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, v5, s5
-; GFX10-NEXT: v_cndmask_b32_e64 v3, v3, v7, s6
-; GFX10-NEXT: v_cndmask_b32_e64 v0, v8, v0, s7
-; GFX10-NEXT: v_cndmask_b32_e64 v2, v10, v2, s8
-; GFX10-NEXT: v_cndmask_b32_e64 v1, v9, v1, s7
-; GFX10-NEXT: v_cndmask_b32_e64 v3, v11, v3, s8
+; GFX10-NEXT: v_max_f64 v[8:9], v[0:1], v[4:5]
+; GFX10-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[4:5]
+; GFX10-NEXT: v_max_f64 v[4:5], v[2:3], v[6:7]
+; GFX10-NEXT: v_cmp_u_f64_e64 s4, v[2:3], v[6:7]
+; GFX10-NEXT: v_cndmask_b32_e64 v0, v8, 0, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e64 v1, v9, 0x7ff80000, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e64 v2, v4, 0, s4
+; GFX10-NEXT: v_cndmask_b32_e64 v3, v5, 0x7ff80000, s4
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_maximum_v2f64:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_cmp_gt_f64_e32 vcc_lo, v[0:1], v[4:5]
-; GFX11-NEXT: v_cmp_o_f64_e64 s1, v[0:1], v[4:5]
-; GFX11-NEXT: v_cmp_gt_f64_e64 s0, v[2:3], v[6:7]
-; GFX11-NEXT: v_cmp_o_f64_e64 s2, v[2:3], v[6:7]
-; GFX11-NEXT: v_cndmask_b32_e32 v8, v5, v1, vcc_lo
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_4)
-; GFX11-NEXT: v_cndmask_b32_e64 v10, v7, v3, s0
-; GFX11-NEXT: v_cndmask_b32_e64 v13, v6, v2, s0
-; GFX11-NEXT: v_cmp_class_f64_e64 s0, v[2:3], 64
-; GFX11-NEXT: v_cndmask_b32_e64 v9, 0x7ff80000, v8, s1
-; GFX11-NEXT: v_cndmask_b32_e32 v12, v4, v0, vcc_lo
-; GFX11-NEXT: v_cmp_class_f64_e64 vcc_lo, v[0:1], 64
-; GFX11-NEXT: v_cndmask_b32_e64 v11, 0x7ff80000, v10, s2
-; GFX11-NEXT: v_cndmask_b32_e64 v10, 0, v13, s2
-; GFX11-NEXT: v_cmp_class_f64_e64 s2, v[6:7], 64
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX11-NEXT: v_cmp_eq_f64_e64 s4, 0, v[10:11]
-; GFX11-NEXT: v_cndmask_b32_e64 v2, v10, v2, s0
-; GFX11-NEXT: v_cndmask_b32_e64 v3, v11, v3, s0
-; GFX11-NEXT: v_cndmask_b32_e32 v1, v9, v1, vcc_lo
-; GFX11-NEXT: v_cndmask_b32_e64 v8, 0, v12, s1
-; GFX11-NEXT: v_cmp_class_f64_e64 s1, v[4:5], 64
-; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, v6, s2
-; GFX11-NEXT: v_cndmask_b32_e64 v3, v3, v7, s2
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4)
-; GFX11-NEXT: v_cmp_eq_f64_e64 s3, 0, v[8:9]
-; GFX11-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc_lo
-; GFX11-NEXT: v_cndmask_b32_e64 v2, v10, v2, s4
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NEXT: v_cndmask_b32_e64 v3, v11, v3, s4
-; GFX11-NEXT: v_cndmask_b32_e64 v0, v0, v4, s1
-; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, v5, s1
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NEXT: v_cndmask_b32_e64 v0, v8, v0, s3
-; GFX11-NEXT: v_cndmask_b32_e64 v1, v9, v1, s3
+; GFX11-NEXT: v_max_f64 v[8:9], v[0:1], v[4:5]
+; GFX11-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[4:5]
+; GFX11-NEXT: v_max_f64 v[4:5], v[2:3], v[6:7]
+; GFX11-NEXT: v_cmp_u_f64_e64 s0, v[2:3], v[6:7]
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_3)
+; GFX11-NEXT: v_cndmask_b32_e64 v0, v8, 0, vcc_lo
+; GFX11-NEXT: v_cndmask_b32_e64 v1, v9, 0x7ff80000, vcc_lo
+; GFX11-NEXT: v_cndmask_b32_e64 v2, v4, 0, s0
+; GFX11-NEXT: v_cndmask_b32_e64 v3, v5, 0x7ff80000, s0
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_maximum_v2f64:
@@ -765,182 +630,43 @@ define <2 x double> @v_maximum_v2f64__nnan(<2 x double> %src0, <2 x double> %src
; GFX7-LABEL: v_maximum_v2f64__nnan:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_cmp_gt_f64_e32 vcc, v[0:1], v[4:5]
-; GFX7-NEXT: v_cmp_gt_f64_e64 s[4:5], v[2:3], v[6:7]
-; GFX7-NEXT: v_cmp_class_f64_e64 s[6:7], v[4:5], 64
-; GFX7-NEXT: v_cmp_class_f64_e64 s[10:11], v[6:7], 64
-; GFX7-NEXT: v_cndmask_b32_e32 v9, v5, v1, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v8, v4, v0, vcc
-; GFX7-NEXT: v_cmp_class_f64_e64 vcc, v[0:1], 64
-; GFX7-NEXT: v_cndmask_b32_e64 v11, v7, v3, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v10, v6, v2, s[4:5]
-; GFX7-NEXT: v_cmp_class_f64_e64 s[4:5], v[2:3], 64
-; GFX7-NEXT: v_cmp_eq_f64_e64 s[8:9], 0, v[8:9]
-; GFX7-NEXT: v_cmp_eq_f64_e64 s[12:13], 0, v[10:11]
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v9, v1, vcc
-; GFX7-NEXT: v_cndmask_b32_e64 v0, v0, v4, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v2, v10, v2, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v3, v11, v3, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v2, v2, v6, s[10:11]
-; GFX7-NEXT: v_cndmask_b32_e64 v1, v1, v5, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v3, v3, v7, s[10:11]
-; GFX7-NEXT: v_cndmask_b32_e64 v0, v8, v0, s[8:9]
-; GFX7-NEXT: v_cndmask_b32_e64 v1, v9, v1, s[8:9]
-; GFX7-NEXT: v_cndmask_b32_e64 v2, v10, v2, s[12:13]
-; GFX7-NEXT: v_cndmask_b32_e64 v3, v11, v3, s[12:13]
+; GFX7-NEXT: v_max_f64 v[0:1], v[0:1], v[4:5]
+; GFX7-NEXT: v_max_f64 v[2:3], v[2:3], v[6:7]
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_maximum_v2f64__nnan:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_cmp_gt_f64_e32 vcc, v[0:1], v[4:5]
-; GFX8-NEXT: v_cmp_gt_f64_e64 s[4:5], v[2:3], v[6:7]
-; GFX8-NEXT: v_cmp_class_f64_e64 s[6:7], v[4:5], 64
-; GFX8-NEXT: v_cmp_class_f64_e64 s[10:11], v[6:7], 64
-; GFX8-NEXT: v_cndmask_b32_e32 v9, v5, v1, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v8, v4, v0, vcc
-; GFX8-NEXT: v_cmp_class_f64_e64 vcc, v[0:1], 64
-; GFX8-NEXT: v_cndmask_b32_e64 v11, v7, v3, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v10, v6, v2, s[4:5]
-; GFX8-NEXT: v_cmp_class_f64_e64 s[4:5], v[2:3], 64
-; GFX8-NEXT: v_cmp_eq_f64_e64 s[8:9], 0, v[8:9]
-; GFX8-NEXT: v_cmp_eq_f64_e64 s[12:13], 0, v[10:11]
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v9, v1, vcc
-; GFX8-NEXT: v_cndmask_b32_e64 v0, v0, v4, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v2, v10, v2, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v3, v11, v3, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v2, v2, v6, s[10:11]
-; GFX8-NEXT: v_cndmask_b32_e64 v1, v1, v5, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v3, v3, v7, s[10:11]
-; GFX8-NEXT: v_cndmask_b32_e64 v0, v8, v0, s[8:9]
-; GFX8-NEXT: v_cndmask_b32_e64 v1, v9, v1, s[8:9]
-; GFX8-NEXT: v_cndmask_b32_e64 v2, v10, v2, s[12:13]
-; GFX8-NEXT: v_cndmask_b32_e64 v3, v11, v3, s[12:13]
+; GFX8-NEXT: v_max_f64 v[0:1], v[0:1], v[4:5]
+; GFX8-NEXT: v_max_f64 v[2:3], v[2:3], v[6:7]
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_maximum_v2f64__nnan:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_cmp_gt_f64_e32 vcc, v[0:1], v[4:5]
-; GFX9-NEXT: v_cmp_gt_f64_e64 s[4:5], v[2:3], v[6:7]
-; GFX9-NEXT: v_cmp_class_f64_e64 s[6:7], v[4:5], 64
-; GFX9-NEXT: v_cmp_class_f64_e64 s[10:11], v[6:7], 64
-; GFX9-NEXT: v_cndmask_b32_e32 v9, v5, v1, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v8, v4, v0, vcc
-; GFX9-NEXT: v_cmp_class_f64_e64 vcc, v[0:1], 64
-; GFX9-NEXT: v_cndmask_b32_e64 v11, v7, v3, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v10, v6, v2, s[4:5]
-; GFX9-NEXT: v_cmp_class_f64_e64 s[4:5], v[2:3], 64
-; GFX9-NEXT: v_cmp_eq_f64_e64 s[8:9], 0, v[8:9]
-; GFX9-NEXT: v_cmp_eq_f64_e64 s[12:13], 0, v[10:11]
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v9, v1, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v0, v4, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v2, v10, v2, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v3, v11, v3, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v2, v2, v6, s[10:11]
-; GFX9-NEXT: v_cndmask_b32_e64 v1, v1, v5, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v3, v3, v7, s[10:11]
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v8, v0, s[8:9]
-; GFX9-NEXT: v_cndmask_b32_e64 v1, v9, v1, s[8:9]
-; GFX9-NEXT: v_cndmask_b32_e64 v2, v10, v2, s[12:13]
-; GFX9-NEXT: v_cndmask_b32_e64 v3, v11, v3, s[12:13]
+; GFX9-NEXT: v_max_f64 v[0:1], v[0:1], v[4:5]
+; GFX9-NEXT: v_max_f64 v[2:3], v[2:3], v[6:7]
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX940-LABEL: v_maximum_v2f64__nnan:
; GFX940: ; %bb.0:
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_cmp_gt_f64_e32 vcc, v[0:1], v[4:5]
-; GFX940-NEXT: v_cmp_class_f64_e64 s[0:1], v[4:5], 64
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v9, v5, v1, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v8, v4, v0, vcc
-; GFX940-NEXT: v_cmp_class_f64_e64 vcc, v[0:1], 64
-; GFX940-NEXT: v_cmp_eq_f64_e64 s[2:3], 0, v[8:9]
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v9, v1, vcc
-; GFX940-NEXT: v_cmp_gt_f64_e32 vcc, v[2:3], v[6:7]
-; GFX940-NEXT: v_cndmask_b32_e64 v0, v0, v4, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v1, v1, v5, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v5, v7, v3, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v6, v2, vcc
-; GFX940-NEXT: v_cmp_class_f64_e64 vcc, v[2:3], 64
-; GFX940-NEXT: v_cmp_class_f64_e64 s[0:1], v[6:7], 64
-; GFX940-NEXT: v_cndmask_b32_e64 v0, v8, v0, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v5, v3, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v1, v9, v1, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v2, v2, v6, s[0:1]
-; GFX940-NEXT: v_cmp_eq_f64_e64 s[2:3], 0, v[4:5]
-; GFX940-NEXT: v_cndmask_b32_e64 v3, v3, v7, s[0:1]
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e64 v2, v4, v2, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v3, v5, v3, s[2:3]
+; GFX940-NEXT: v_max_f64 v[0:1], v[0:1], v[4:5]
+; GFX940-NEXT: v_max_f64 v[2:3], v[2:3], v[6:7]
; GFX940-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximum_v2f64__nnan:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_cmp_gt_f64_e32 vcc_lo, v[0:1], v[4:5]
-; GFX10-NEXT: v_cmp_gt_f64_e64 s4, v[2:3], v[6:7]
-; GFX10-NEXT: v_cmp_class_f64_e64 s5, v[4:5], 64
-; GFX10-NEXT: v_cmp_class_f64_e64 s6, v[6:7], 64
-; GFX10-NEXT: v_cndmask_b32_e32 v9, v5, v1, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v11, v7, v3, s4
-; GFX10-NEXT: v_cndmask_b32_e32 v8, v4, v0, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v10, v6, v2, s4
-; GFX10-NEXT: v_cmp_class_f64_e64 vcc_lo, v[0:1], 64
-; GFX10-NEXT: v_cmp_class_f64_e64 s4, v[2:3], 64
-; GFX10-NEXT: v_cmp_eq_f64_e64 s7, 0, v[8:9]
-; GFX10-NEXT: v_cmp_eq_f64_e64 s8, 0, v[10:11]
-; GFX10-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v2, v10, v2, s4
-; GFX10-NEXT: v_cndmask_b32_e32 v1, v9, v1, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v3, v11, v3, s4
-; GFX10-NEXT: v_cndmask_b32_e64 v0, v0, v4, s5
-; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, v6, s6
-; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, v5, s5
-; GFX10-NEXT: v_cndmask_b32_e64 v3, v3, v7, s6
-; GFX10-NEXT: v_cndmask_b32_e64 v0, v8, v0, s7
-; GFX10-NEXT: v_cndmask_b32_e64 v2, v10, v2, s8
-; GFX10-NEXT: v_cndmask_b32_e64 v1, v9, v1, s7
-; GFX10-NEXT: v_cndmask_b32_e64 v3, v11, v3, s8
+; GFX10-NEXT: v_max_f64 v[0:1], v[0:1], v[4:5]
+; GFX10-NEXT: v_max_f64 v[2:3], v[2:3], v[6:7]
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_maximum_v2f64__nnan:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_cmp_gt_f64_e32 vcc_lo, v[0:1], v[4:5]
-; GFX11-NEXT: v_cmp_gt_f64_e64 s0, v[2:3], v[6:7]
-; GFX11-NEXT: v_cmp_class_f64_e64 s1, v[4:5], 64
-; GFX11-NEXT: v_cmp_class_f64_e64 s2, v[6:7], 64
-; GFX11-NEXT: v_dual_cndmask_b32 v9, v5, v1 :: v_dual_cndmask_b32 v8, v4, v0
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_4) | instid1(VALU_DEP_4)
-; GFX11-NEXT: v_cndmask_b32_e64 v11, v7, v3, s0
-; GFX11-NEXT: v_cndmask_b32_e64 v10, v6, v2, s0
-; GFX11-NEXT: v_cmp_class_f64_e64 vcc_lo, v[0:1], 64
-; GFX11-NEXT: v_cmp_class_f64_e64 s0, v[2:3], 64
-; GFX11-NEXT: v_cmp_eq_f64_e64 s3, 0, v[8:9]
-; GFX11-NEXT: v_cmp_eq_f64_e64 s4, 0, v[10:11]
-; GFX11-NEXT: v_dual_cndmask_b32 v0, v8, v0 :: v_dual_cndmask_b32 v1, v9, v1
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_3)
-; GFX11-NEXT: v_cndmask_b32_e64 v2, v10, v2, s0
-; GFX11-NEXT: v_cndmask_b32_e64 v3, v11, v3, s0
-; GFX11-NEXT: v_cndmask_b32_e64 v0, v0, v4, s1
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
-; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, v5, s1
-; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, v6, s2
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
-; GFX11-NEXT: v_cndmask_b32_e64 v3, v3, v7, s2
-; GFX11-NEXT: v_cndmask_b32_e64 v0, v8, v0, s3
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
-; GFX11-NEXT: v_cndmask_b32_e64 v1, v9, v1, s3
-; GFX11-NEXT: v_cndmask_b32_e64 v2, v10, v2, s4
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4)
-; GFX11-NEXT: v_cndmask_b32_e64 v3, v11, v3, s4
+; GFX11-NEXT: v_max_f64 v[0:1], v[0:1], v[4:5]
+; GFX11-NEXT: v_max_f64 v[2:3], v[2:3], v[6:7]
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_maximum_v2f64__nnan:
@@ -961,111 +687,86 @@ define <2 x double> @v_maximum_v2f64__nsz(<2 x double> %src0, <2 x double> %src1
; GFX7-LABEL: v_maximum_v2f64__nsz:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_cmp_gt_f64_e32 vcc, v[0:1], v[4:5]
-; GFX7-NEXT: v_cmp_gt_f64_e64 s[6:7], v[2:3], v[6:7]
-; GFX7-NEXT: v_cmp_o_f64_e64 s[4:5], v[0:1], v[4:5]
-; GFX7-NEXT: v_cmp_o_f64_e64 s[8:9], v[2:3], v[6:7]
-; GFX7-NEXT: v_cndmask_b32_e32 v8, v4, v0, vcc
-; GFX7-NEXT: v_cndmask_b32_e64 v4, v6, v2, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
-; GFX7-NEXT: v_mov_b32_e32 v5, 0x7ff80000
-; GFX7-NEXT: v_cndmask_b32_e64 v3, v7, v3, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v0, 0, v8, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v1, v5, v1, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v2, 0, v4, s[8:9]
-; GFX7-NEXT: v_cndmask_b32_e64 v3, v5, v3, s[8:9]
+; GFX7-NEXT: v_max_f64 v[8:9], v[0:1], v[4:5]
+; GFX7-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
+; GFX7-NEXT: v_max_f64 v[4:5], v[2:3], v[6:7]
+; GFX7-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[6:7]
+; GFX7-NEXT: v_mov_b32_e32 v3, 0x7ff80000
+; GFX7-NEXT: v_cndmask_b32_e64 v0, v8, 0, vcc
+; GFX7-NEXT: v_cndmask_b32_e32 v1, v9, v3, vcc
+; GFX7-NEXT: v_cndmask_b32_e64 v2, v4, 0, s[4:5]
+; GFX7-NEXT: v_cndmask_b32_e64 v3, v5, v3, s[4:5]
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_maximum_v2f64__nsz:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_cmp_gt_f64_e32 vcc, v[0:1], v[4:5]
-; GFX8-NEXT: v_cmp_gt_f64_e64 s[6:7], v[2:3], v[6:7]
-; GFX8-NEXT: v_cmp_o_f64_e64 s[4:5], v[0:1], v[4:5]
-; GFX8-NEXT: v_cmp_o_f64_e64 s[8:9], v[2:3], v[6:7]
-; GFX8-NEXT: v_cndmask_b32_e32 v8, v4, v0, vcc
-; GFX8-NEXT: v_cndmask_b32_e64 v4, v6, v2, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
-; GFX8-NEXT: v_mov_b32_e32 v5, 0x7ff80000
-; GFX8-NEXT: v_cndmask_b32_e64 v3, v7, v3, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v0, 0, v8, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v1, v5, v1, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, v4, s[8:9]
-; GFX8-NEXT: v_cndmask_b32_e64 v3, v5, v3, s[8:9]
+; GFX8-NEXT: v_max_f64 v[8:9], v[0:1], v[4:5]
+; GFX8-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
+; GFX8-NEXT: v_max_f64 v[4:5], v[2:3], v[6:7]
+; GFX8-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[6:7]
+; GFX8-NEXT: v_mov_b32_e32 v3, 0x7ff80000
+; GFX8-NEXT: v_cndmask_b32_e64 v0, v8, 0, vcc
+; GFX8-NEXT: v_cndmask_b32_e32 v1, v9, v3, vcc
+; GFX8-NEXT: v_cndmask_b32_e64 v2, v4, 0, s[4:5]
+; GFX8-NEXT: v_cndmask_b32_e64 v3, v5, v3, s[4:5]
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_maximum_v2f64__nsz:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_cmp_gt_f64_e32 vcc, v[0:1], v[4:5]
-; GFX9-NEXT: v_cmp_gt_f64_e64 s[6:7], v[2:3], v[6:7]
-; GFX9-NEXT: v_cmp_o_f64_e64 s[4:5], v[0:1], v[4:5]
-; GFX9-NEXT: v_cmp_o_f64_e64 s[8:9], v[2:3], v[6:7]
-; GFX9-NEXT: v_cndmask_b32_e32 v8, v4, v0, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v4, v6, v2, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
-; GFX9-NEXT: v_mov_b32_e32 v5, 0x7ff80000
-; GFX9-NEXT: v_cndmask_b32_e64 v3, v7, v3, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v0, 0, v8, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v1, v5, v1, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, v4, s[8:9]
-; GFX9-NEXT: v_cndmask_b32_e64 v3, v5, v3, s[8:9]
+; GFX9-NEXT: v_max_f64 v[8:9], v[0:1], v[4:5]
+; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
+; GFX9-NEXT: v_max_f64 v[4:5], v[2:3], v[6:7]
+; GFX9-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[6:7]
+; GFX9-NEXT: v_mov_b32_e32 v3, 0x7ff80000
+; GFX9-NEXT: v_cndmask_b32_e64 v0, v8, 0, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v9, v3, vcc
+; GFX9-NEXT: v_cndmask_b32_e64 v2, v4, 0, s[4:5]
+; GFX9-NEXT: v_cndmask_b32_e64 v3, v5, v3, s[4:5]
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX940-LABEL: v_maximum_v2f64__nsz:
; GFX940: ; %bb.0:
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_cmp_gt_f64_e32 vcc, v[0:1], v[4:5]
-; GFX940-NEXT: v_cmp_o_f64_e64 s[0:1], v[0:1], v[4:5]
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v8, v4, v0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
-; GFX940-NEXT: v_mov_b32_e32 v4, 0x7ff80000
-; GFX940-NEXT: v_cmp_gt_f64_e32 vcc, v[2:3], v[6:7]
-; GFX940-NEXT: v_cndmask_b32_e64 v0, 0, v8, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v1, v4, v1, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v5, v6, v2, vcc
-; GFX940-NEXT: v_cmp_o_f64_e64 s[0:1], v[2:3], v[6:7]
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v7, v3, vcc
+; GFX940-NEXT: v_max_f64 v[8:9], v[0:1], v[4:5]
+; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
+; GFX940-NEXT: v_max_f64 v[4:5], v[2:3], v[6:7]
; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e64 v2, 0, v5, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v3, v4, v3, s[0:1]
+; GFX940-NEXT: v_cndmask_b32_e64 v0, v8, 0, vcc
+; GFX940-NEXT: v_mov_b32_e32 v8, 0x7ff80000
+; GFX940-NEXT: v_cndmask_b32_e32 v1, v9, v8, vcc
+; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[6:7]
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e64 v2, v4, 0, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v3, v5, v8, vcc
; GFX940-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximum_v2f64__nsz:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_cmp_gt_f64_e32 vcc_lo, v[0:1], v[4:5]
-; GFX10-NEXT: v_cmp_gt_f64_e64 s4, v[2:3], v[6:7]
-; GFX10-NEXT: v_cmp_o_f64_e64 s5, v[0:1], v[4:5]
-; GFX10-NEXT: v_cmp_o_f64_e64 s6, v[2:3], v[6:7]
-; GFX10-NEXT: v_cndmask_b32_e32 v8, v4, v0, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v9, v6, v2, s4
-; GFX10-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v3, v7, v3, s4
-; GFX10-NEXT: v_cndmask_b32_e64 v0, 0, v8, s5
-; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, v9, s6
-; GFX10-NEXT: v_cndmask_b32_e64 v1, 0x7ff80000, v1, s5
-; GFX10-NEXT: v_cndmask_b32_e64 v3, 0x7ff80000, v3, s6
+; GFX10-NEXT: v_max_f64 v[8:9], v[0:1], v[4:5]
+; GFX10-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[4:5]
+; GFX10-NEXT: v_max_f64 v[4:5], v[2:3], v[6:7]
+; GFX10-NEXT: v_cmp_u_f64_e64 s4, v[2:3], v[6:7]
+; GFX10-NEXT: v_cndmask_b32_e64 v0, v8, 0, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e64 v1, v9, 0x7ff80000, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e64 v2, v4, 0, s4
+; GFX10-NEXT: v_cndmask_b32_e64 v3, v5, 0x7ff80000, s4
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_maximum_v2f64__nsz:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_cmp_gt_f64_e32 vcc_lo, v[0:1], v[4:5]
-; GFX11-NEXT: v_cmp_gt_f64_e64 s0, v[2:3], v[6:7]
-; GFX11-NEXT: v_cmp_o_f64_e64 s1, v[0:1], v[4:5]
-; GFX11-NEXT: v_cmp_o_f64_e64 s2, v[2:3], v[6:7]
-; GFX11-NEXT: v_dual_cndmask_b32 v8, v4, v0 :: v_dual_cndmask_b32 v1, v5, v1
+; GFX11-NEXT: v_max_f64 v[8:9], v[0:1], v[4:5]
+; GFX11-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[4:5]
+; GFX11-NEXT: v_max_f64 v[4:5], v[2:3], v[6:7]
+; GFX11-NEXT: v_cmp_u_f64_e64 s0, v[2:3], v[6:7]
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_3)
-; GFX11-NEXT: v_cndmask_b32_e64 v9, v6, v2, s0
-; GFX11-NEXT: v_cndmask_b32_e64 v3, v7, v3, s0
-; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, v8, s1
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
-; GFX11-NEXT: v_cndmask_b32_e64 v1, 0x7ff80000, v1, s1
-; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, v9, s2
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4)
-; GFX11-NEXT: v_cndmask_b32_e64 v3, 0x7ff80000, v3, s2
+; GFX11-NEXT: v_cndmask_b32_e64 v0, v8, 0, vcc_lo
+; GFX11-NEXT: v_cndmask_b32_e64 v1, v9, 0x7ff80000, vcc_lo
+; GFX11-NEXT: v_cndmask_b32_e64 v2, v4, 0, s0
+; GFX11-NEXT: v_cndmask_b32_e64 v3, v5, 0x7ff80000, s0
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_maximum_v2f64__nsz:
@@ -1086,69 +787,43 @@ define <2 x double> @v_maximum_v2f64__nnan_nsz(<2 x double> %src0, <2 x double>
; GFX7-LABEL: v_maximum_v2f64__nnan_nsz:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_cmp_gt_f64_e32 vcc, v[0:1], v[4:5]
-; GFX7-NEXT: v_cmp_gt_f64_e64 s[4:5], v[2:3], v[6:7]
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
-; GFX7-NEXT: v_cndmask_b32_e64 v2, v6, v2, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v3, v7, v3, s[4:5]
+; GFX7-NEXT: v_max_f64 v[0:1], v[0:1], v[4:5]
+; GFX7-NEXT: v_max_f64 v[2:3], v[2:3], v[6:7]
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_maximum_v2f64__nnan_nsz:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_cmp_gt_f64_e32 vcc, v[0:1], v[4:5]
-; GFX8-NEXT: v_cmp_gt_f64_e64 s[4:5], v[2:3], v[6:7]
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
-; GFX8-NEXT: v_cndmask_b32_e64 v2, v6, v2, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v3, v7, v3, s[4:5]
+; GFX8-NEXT: v_max_f64 v[0:1], v[0:1], v[4:5]
+; GFX8-NEXT: v_max_f64 v[2:3], v[2:3], v[6:7]
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_maximum_v2f64__nnan_nsz:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_cmp_gt_f64_e32 vcc, v[0:1], v[4:5]
-; GFX9-NEXT: v_cmp_gt_f64_e64 s[4:5], v[2:3], v[6:7]
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v2, v6, v2, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v3, v7, v3, s[4:5]
+; GFX9-NEXT: v_max_f64 v[0:1], v[0:1], v[4:5]
+; GFX9-NEXT: v_max_f64 v[2:3], v[2:3], v[6:7]
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX940-LABEL: v_maximum_v2f64__nnan_nsz:
; GFX940: ; %bb.0:
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_cmp_gt_f64_e32 vcc, v[0:1], v[4:5]
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
-; GFX940-NEXT: v_cmp_gt_f64_e32 vcc, v[2:3], v[6:7]
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v6, v2, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v7, v3, vcc
+; GFX940-NEXT: v_max_f64 v[0:1], v[0:1], v[4:5]
+; GFX940-NEXT: v_max_f64 v[2:3], v[2:3], v[6:7]
; GFX940-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximum_v2f64__nnan_nsz:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_cmp_gt_f64_e32 vcc_lo, v[0:1], v[4:5]
-; GFX10-NEXT: v_cmp_gt_f64_e64 s4, v[2:3], v[6:7]
-; GFX10-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v2, v6, v2, s4
-; GFX10-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v3, v7, v3, s4
+; GFX10-NEXT: v_max_f64 v[0:1], v[0:1], v[4:5]
+; GFX10-NEXT: v_max_f64 v[2:3], v[2:3], v[6:7]
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_maximum_v2f64__nnan_nsz:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_cmp_gt_f64_e32 vcc_lo, v[0:1], v[4:5]
-; GFX11-NEXT: v_cmp_gt_f64_e64 s0, v[2:3], v[6:7]
-; GFX11-NEXT: v_dual_cndmask_b32 v0, v4, v0 :: v_dual_cndmask_b32 v1, v5, v1
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX11-NEXT: v_cndmask_b32_e64 v2, v6, v2, s0
-; GFX11-NEXT: v_cndmask_b32_e64 v3, v7, v3, s0
+; GFX11-NEXT: v_max_f64 v[0:1], v[0:1], v[4:5]
+; GFX11-NEXT: v_max_f64 v[2:3], v[2:3], v[6:7]
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_maximum_v2f64__nnan_nsz:
@@ -1170,61 +845,20 @@ define void @s_maximum_v2f64(<2 x double> inreg %src0, <2 x double> inreg %src1)
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7-NEXT: v_mov_b32_e32 v0, s10
+; GFX7-NEXT: v_mov_b32_e32 v4, s8
; GFX7-NEXT: v_mov_b32_e32 v1, s11
-; GFX7-NEXT: v_cmp_gt_f64_e32 vcc, s[6:7], v[0:1]
-; GFX7-NEXT: v_cmp_o_f64_e64 s[12:13], s[6:7], v[0:1]
-; GFX7-NEXT: v_cmp_class_f64_e64 s[18:19], s[10:11], 64
-; GFX7-NEXT: v_mov_b32_e32 v0, s8
-; GFX7-NEXT: v_mov_b32_e32 v1, s9
-; GFX7-NEXT: s_and_b64 s[14:15], vcc, exec
-; GFX7-NEXT: s_cselect_b32 s16, s7, s11
-; GFX7-NEXT: s_and_b64 s[14:15], s[12:13], exec
-; GFX7-NEXT: s_cselect_b32 s15, s16, 0x7ff80000
-; GFX7-NEXT: s_and_b64 s[16:17], vcc, exec
-; GFX7-NEXT: s_cselect_b32 s14, s6, s10
-; GFX7-NEXT: v_cmp_class_f64_e64 s[16:17], s[6:7], 64
-; GFX7-NEXT: s_and_b64 s[12:13], s[12:13], exec
-; GFX7-NEXT: s_cselect_b32 s14, s14, 0
-; GFX7-NEXT: v_cmp_eq_f64_e64 s[20:21], s[14:15], 0
-; GFX7-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
-; GFX7-NEXT: s_and_b64 s[12:13], s[16:17], exec
-; GFX7-NEXT: s_cselect_b32 s7, s7, s15
-; GFX7-NEXT: s_and_b64 s[12:13], s[18:19], exec
-; GFX7-NEXT: s_cselect_b32 s7, s11, s7
-; GFX7-NEXT: s_and_b64 s[12:13], s[20:21], exec
-; GFX7-NEXT: s_cselect_b32 s7, s7, s15
-; GFX7-NEXT: s_and_b64 s[12:13], s[16:17], exec
-; GFX7-NEXT: v_cmp_o_f64_e64 s[12:13], s[4:5], v[0:1]
-; GFX7-NEXT: s_cselect_b32 s6, s6, s14
-; GFX7-NEXT: s_and_b64 s[16:17], s[18:19], exec
-; GFX7-NEXT: s_cselect_b32 s6, s10, s6
-; GFX7-NEXT: s_and_b64 s[10:11], s[20:21], exec
-; GFX7-NEXT: s_cselect_b32 s6, s6, s14
-; GFX7-NEXT: s_and_b64 s[10:11], vcc, exec
-; GFX7-NEXT: s_cselect_b32 s14, s5, s9
-; GFX7-NEXT: s_and_b64 s[10:11], s[12:13], exec
-; GFX7-NEXT: s_cselect_b32 s11, s14, 0x7ff80000
-; GFX7-NEXT: s_and_b64 s[14:15], vcc, exec
-; GFX7-NEXT: s_cselect_b32 s10, s4, s8
-; GFX7-NEXT: v_cmp_class_f64_e64 s[14:15], s[4:5], 64
-; GFX7-NEXT: s_and_b64 s[12:13], s[12:13], exec
-; GFX7-NEXT: v_cmp_class_f64_e64 s[12:13], s[8:9], 64
-; GFX7-NEXT: s_cselect_b32 s10, s10, 0
-; GFX7-NEXT: v_cmp_eq_f64_e64 s[16:17], s[10:11], 0
-; GFX7-NEXT: s_and_b64 s[18:19], s[14:15], exec
-; GFX7-NEXT: s_cselect_b32 s5, s5, s11
-; GFX7-NEXT: s_and_b64 s[18:19], s[12:13], exec
-; GFX7-NEXT: s_cselect_b32 s5, s9, s5
-; GFX7-NEXT: s_and_b64 s[18:19], s[16:17], exec
-; GFX7-NEXT: s_cselect_b32 s5, s5, s11
-; GFX7-NEXT: s_and_b64 s[14:15], s[14:15], exec
-; GFX7-NEXT: s_cselect_b32 s4, s4, s10
-; GFX7-NEXT: s_and_b64 s[12:13], s[12:13], exec
-; GFX7-NEXT: s_cselect_b32 s4, s8, s4
-; GFX7-NEXT: s_and_b64 s[8:9], s[16:17], exec
-; GFX7-NEXT: s_cselect_b32 s4, s4, s10
+; GFX7-NEXT: v_mov_b32_e32 v5, s9
+; GFX7-NEXT: v_max_f64 v[2:3], s[6:7], v[0:1]
+; GFX7-NEXT: v_cmp_u_f64_e32 vcc, s[6:7], v[0:1]
+; GFX7-NEXT: v_max_f64 v[0:1], s[4:5], v[4:5]
+; GFX7-NEXT: v_cmp_u_f64_e64 s[4:5], s[4:5], v[4:5]
+; GFX7-NEXT: v_mov_b32_e32 v6, 0x7ff80000
+; GFX7-NEXT: v_cndmask_b32_e32 v3, v3, v6, vcc
+; GFX7-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc
+; GFX7-NEXT: v_cndmask_b32_e64 v1, v1, v6, s[4:5]
+; GFX7-NEXT: v_cndmask_b32_e64 v0, v0, 0, s[4:5]
; GFX7-NEXT: ;;#ASMSTART
-; GFX7-NEXT: ; use s[4:7]
+; GFX7-NEXT: ; use v[0:3]
; GFX7-NEXT: ;;#ASMEND
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
@@ -1232,61 +866,20 @@ define void @s_maximum_v2f64(<2 x double> inreg %src0, <2 x double> inreg %src1)
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_mov_b32_e32 v0, s10
+; GFX8-NEXT: v_mov_b32_e32 v4, s8
; GFX8-NEXT: v_mov_b32_e32 v1, s11
-; GFX8-NEXT: v_cmp_gt_f64_e32 vcc, s[6:7], v[0:1]
-; GFX8-NEXT: v_cmp_o_f64_e64 s[12:13], s[6:7], v[0:1]
-; GFX8-NEXT: v_cmp_class_f64_e64 s[18:19], s[10:11], 64
-; GFX8-NEXT: v_mov_b32_e32 v0, s8
-; GFX8-NEXT: v_mov_b32_e32 v1, s9
-; GFX8-NEXT: s_and_b64 s[14:15], vcc, exec
-; GFX8-NEXT: s_cselect_b32 s16, s7, s11
-; GFX8-NEXT: s_and_b64 s[14:15], s[12:13], exec
-; GFX8-NEXT: s_cselect_b32 s15, s16, 0x7ff80000
-; GFX8-NEXT: s_and_b64 s[16:17], vcc, exec
-; GFX8-NEXT: s_cselect_b32 s14, s6, s10
-; GFX8-NEXT: v_cmp_class_f64_e64 s[16:17], s[6:7], 64
-; GFX8-NEXT: s_and_b64 s[12:13], s[12:13], exec
-; GFX8-NEXT: s_cselect_b32 s14, s14, 0
-; GFX8-NEXT: v_cmp_eq_f64_e64 s[20:21], s[14:15], 0
-; GFX8-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
-; GFX8-NEXT: s_and_b64 s[12:13], s[16:17], exec
-; GFX8-NEXT: s_cselect_b32 s7, s7, s15
-; GFX8-NEXT: s_and_b64 s[12:13], s[18:19], exec
-; GFX8-NEXT: s_cselect_b32 s7, s11, s7
-; GFX8-NEXT: s_and_b64 s[12:13], s[20:21], exec
-; GFX8-NEXT: s_cselect_b32 s7, s7, s15
-; GFX8-NEXT: s_and_b64 s[12:13], s[16:17], exec
-; GFX8-NEXT: v_cmp_o_f64_e64 s[12:13], s[4:5], v[0:1]
-; GFX8-NEXT: s_cselect_b32 s6, s6, s14
-; GFX8-NEXT: s_and_b64 s[16:17], s[18:19], exec
-; GFX8-NEXT: s_cselect_b32 s6, s10, s6
-; GFX8-NEXT: s_and_b64 s[10:11], s[20:21], exec
-; GFX8-NEXT: s_cselect_b32 s6, s6, s14
-; GFX8-NEXT: s_and_b64 s[10:11], vcc, exec
-; GFX8-NEXT: s_cselect_b32 s14, s5, s9
-; GFX8-NEXT: s_and_b64 s[10:11], s[12:13], exec
-; GFX8-NEXT: s_cselect_b32 s11, s14, 0x7ff80000
-; GFX8-NEXT: s_and_b64 s[14:15], vcc, exec
-; GFX8-NEXT: s_cselect_b32 s10, s4, s8
-; GFX8-NEXT: v_cmp_class_f64_e64 s[14:15], s[4:5], 64
-; GFX8-NEXT: s_and_b64 s[12:13], s[12:13], exec
-; GFX8-NEXT: v_cmp_class_f64_e64 s[12:13], s[8:9], 64
-; GFX8-NEXT: s_cselect_b32 s10, s10, 0
-; GFX8-NEXT: v_cmp_eq_f64_e64 s[16:17], s[10:11], 0
-; GFX8-NEXT: s_and_b64 s[18:19], s[14:15], exec
-; GFX8-NEXT: s_cselect_b32 s5, s5, s11
-; GFX8-NEXT: s_and_b64 s[18:19], s[12:13], exec
-; GFX8-NEXT: s_cselect_b32 s5, s9, s5
-; GFX8-NEXT: s_and_b64 s[18:19], s[16:17], exec
-; GFX8-NEXT: s_cselect_b32 s5, s5, s11
-; GFX8-NEXT: s_and_b64 s[14:15], s[14:15], exec
-; GFX8-NEXT: s_cselect_b32 s4, s4, s10
-; GFX8-NEXT: s_and_b64 s[12:13], s[12:13], exec
-; GFX8-NEXT: s_cselect_b32 s4, s8, s4
-; GFX8-NEXT: s_and_b64 s[8:9], s[16:17], exec
-; GFX8-NEXT: s_cselect_b32 s4, s4, s10
+; GFX8-NEXT: v_mov_b32_e32 v5, s9
+; GFX8-NEXT: v_max_f64 v[2:3], s[6:7], v[0:1]
+; GFX8-NEXT: v_cmp_u_f64_e32 vcc, s[6:7], v[0:1]
+; GFX8-NEXT: v_max_f64 v[0:1], s[4:5], v[4:5]
+; GFX8-NEXT: v_cmp_u_f64_e64 s[4:5], s[4:5], v[4:5]
+; GFX8-NEXT: v_mov_b32_e32 v6, 0x7ff80000
+; GFX8-NEXT: v_cndmask_b32_e32 v3, v3, v6, vcc
+; GFX8-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc
+; GFX8-NEXT: v_cndmask_b32_e64 v1, v1, v6, s[4:5]
+; GFX8-NEXT: v_cndmask_b32_e64 v0, v0, 0, s[4:5]
; GFX8-NEXT: ;;#ASMSTART
-; GFX8-NEXT: ; use s[4:7]
+; GFX8-NEXT: ; use v[0:3]
; GFX8-NEXT: ;;#ASMEND
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
@@ -1294,61 +887,20 @@ define void @s_maximum_v2f64(<2 x double> inreg %src0, <2 x double> inreg %src1)
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v0, s10
+; GFX9-NEXT: v_mov_b32_e32 v4, s8
; GFX9-NEXT: v_mov_b32_e32 v1, s11
-; GFX9-NEXT: v_cmp_gt_f64_e32 vcc, s[6:7], v[0:1]
-; GFX9-NEXT: v_cmp_o_f64_e64 s[12:13], s[6:7], v[0:1]
-; GFX9-NEXT: v_cmp_class_f64_e64 s[18:19], s[10:11], 64
-; GFX9-NEXT: v_mov_b32_e32 v0, s8
-; GFX9-NEXT: v_mov_b32_e32 v1, s9
-; GFX9-NEXT: s_and_b64 s[14:15], vcc, exec
-; GFX9-NEXT: s_cselect_b32 s16, s7, s11
-; GFX9-NEXT: s_and_b64 s[14:15], s[12:13], exec
-; GFX9-NEXT: s_cselect_b32 s15, s16, 0x7ff80000
-; GFX9-NEXT: s_and_b64 s[16:17], vcc, exec
-; GFX9-NEXT: s_cselect_b32 s14, s6, s10
-; GFX9-NEXT: v_cmp_class_f64_e64 s[16:17], s[6:7], 64
-; GFX9-NEXT: s_and_b64 s[12:13], s[12:13], exec
-; GFX9-NEXT: s_cselect_b32 s14, s14, 0
-; GFX9-NEXT: v_cmp_eq_f64_e64 s[20:21], s[14:15], 0
-; GFX9-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
-; GFX9-NEXT: s_and_b64 s[12:13], s[16:17], exec
-; GFX9-NEXT: s_cselect_b32 s7, s7, s15
-; GFX9-NEXT: s_and_b64 s[12:13], s[18:19], exec
-; GFX9-NEXT: s_cselect_b32 s7, s11, s7
-; GFX9-NEXT: s_and_b64 s[12:13], s[20:21], exec
-; GFX9-NEXT: s_cselect_b32 s7, s7, s15
-; GFX9-NEXT: s_and_b64 s[12:13], s[16:17], exec
-; GFX9-NEXT: v_cmp_o_f64_e64 s[12:13], s[4:5], v[0:1]
-; GFX9-NEXT: s_cselect_b32 s6, s6, s14
-; GFX9-NEXT: s_and_b64 s[16:17], s[18:19], exec
-; GFX9-NEXT: s_cselect_b32 s6, s10, s6
-; GFX9-NEXT: s_and_b64 s[10:11], s[20:21], exec
-; GFX9-NEXT: s_cselect_b32 s6, s6, s14
-; GFX9-NEXT: s_and_b64 s[10:11], vcc, exec
-; GFX9-NEXT: s_cselect_b32 s14, s5, s9
-; GFX9-NEXT: s_and_b64 s[10:11], s[12:13], exec
-; GFX9-NEXT: s_cselect_b32 s11, s14, 0x7ff80000
-; GFX9-NEXT: s_and_b64 s[14:15], vcc, exec
-; GFX9-NEXT: s_cselect_b32 s10, s4, s8
-; GFX9-NEXT: v_cmp_class_f64_e64 s[14:15], s[4:5], 64
-; GFX9-NEXT: s_and_b64 s[12:13], s[12:13], exec
-; GFX9-NEXT: v_cmp_class_f64_e64 s[12:13], s[8:9], 64
-; GFX9-NEXT: s_cselect_b32 s10, s10, 0
-; GFX9-NEXT: v_cmp_eq_f64_e64 s[16:17], s[10:11], 0
-; GFX9-NEXT: s_and_b64 s[18:19], s[14:15], exec
-; GFX9-NEXT: s_cselect_b32 s5, s5, s11
-; GFX9-NEXT: s_and_b64 s[18:19], s[12:13], exec
-; GFX9-NEXT: s_cselect_b32 s5, s9, s5
-; GFX9-NEXT: s_and_b64 s[18:19], s[16:17], exec
-; GFX9-NEXT: s_cselect_b32 s5, s5, s11
-; GFX9-NEXT: s_and_b64 s[14:15], s[14:15], exec
-; GFX9-NEXT: s_cselect_b32 s4, s4, s10
-; GFX9-NEXT: s_and_b64 s[12:13], s[12:13], exec
-; GFX9-NEXT: s_cselect_b32 s4, s8, s4
-; GFX9-NEXT: s_and_b64 s[8:9], s[16:17], exec
-; GFX9-NEXT: s_cselect_b32 s4, s4, s10
+; GFX9-NEXT: v_mov_b32_e32 v5, s9
+; GFX9-NEXT: v_max_f64 v[2:3], s[6:7], v[0:1]
+; GFX9-NEXT: v_cmp_u_f64_e32 vcc, s[6:7], v[0:1]
+; GFX9-NEXT: v_max_f64 v[0:1], s[4:5], v[4:5]
+; GFX9-NEXT: v_cmp_u_f64_e64 s[4:5], s[4:5], v[4:5]
+; GFX9-NEXT: v_mov_b32_e32 v6, 0x7ff80000
+; GFX9-NEXT: v_cndmask_b32_e32 v3, v3, v6, vcc
+; GFX9-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc
+; GFX9-NEXT: v_cndmask_b32_e64 v1, v1, v6, s[4:5]
+; GFX9-NEXT: v_cndmask_b32_e64 v0, v0, 0, s[4:5]
; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[4:7]
+; GFX9-NEXT: ; use v[0:3]
; GFX9-NEXT: ;;#ASMEND
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
@@ -1356,179 +908,52 @@ define void @s_maximum_v2f64(<2 x double> inreg %src0, <2 x double> inreg %src1)
; GFX940: ; %bb.0:
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[6:7]
-; GFX940-NEXT: v_cmp_gt_f64_e32 vcc, s[2:3], v[0:1]
-; GFX940-NEXT: s_and_b64 s[8:9], vcc, exec
-; GFX940-NEXT: v_cmp_o_f64_e64 s[8:9], s[2:3], v[0:1]
-; GFX940-NEXT: s_cselect_b32 s12, s3, s7
-; GFX940-NEXT: s_and_b64 s[10:11], s[8:9], exec
-; GFX940-NEXT: s_cselect_b32 s11, s12, 0x7ff80000
-; GFX940-NEXT: s_and_b64 s[12:13], vcc, exec
-; GFX940-NEXT: s_cselect_b32 s10, s2, s6
-; GFX940-NEXT: s_and_b64 s[8:9], s[8:9], exec
-; GFX940-NEXT: v_cmp_class_f64_e64 s[12:13], s[2:3], 64
-; GFX940-NEXT: s_cselect_b32 s10, s10, 0
-; GFX940-NEXT: s_and_b64 s[14:15], s[12:13], exec
-; GFX940-NEXT: v_cmp_class_f64_e64 s[14:15], s[6:7], 64
-; GFX940-NEXT: v_cmp_eq_f64_e64 s[8:9], s[10:11], 0
-; GFX940-NEXT: s_cselect_b32 s3, s3, s11
-; GFX940-NEXT: s_and_b64 s[16:17], s[14:15], exec
-; GFX940-NEXT: s_cselect_b32 s3, s7, s3
-; GFX940-NEXT: s_and_b64 s[16:17], s[8:9], exec
-; GFX940-NEXT: s_cselect_b32 s7, s3, s11
-; GFX940-NEXT: s_and_b64 s[12:13], s[12:13], exec
-; GFX940-NEXT: s_cselect_b32 s11, s2, s10
-; GFX940-NEXT: s_and_b64 s[2:3], s[14:15], exec
+; GFX940-NEXT: v_max_f64 v[2:3], s[2:3], v[0:1]
+; GFX940-NEXT: v_mov_b32_e32 v6, 0x7ff80000
+; GFX940-NEXT: v_cmp_u_f64_e32 vcc, s[2:3], v[0:1]
; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[4:5]
-; GFX940-NEXT: s_cselect_b32 s6, s6, s11
-; GFX940-NEXT: s_and_b64 s[2:3], s[8:9], exec
-; GFX940-NEXT: v_cmp_gt_f64_e32 vcc, s[0:1], v[0:1]
-; GFX940-NEXT: s_cselect_b32 s6, s6, s10
-; GFX940-NEXT: s_and_b64 s[2:3], vcc, exec
-; GFX940-NEXT: v_cmp_o_f64_e64 s[2:3], s[0:1], v[0:1]
-; GFX940-NEXT: s_cselect_b32 s10, s1, s5
-; GFX940-NEXT: s_and_b64 s[8:9], s[2:3], exec
-; GFX940-NEXT: s_cselect_b32 s9, s10, 0x7ff80000
-; GFX940-NEXT: s_and_b64 s[10:11], vcc, exec
-; GFX940-NEXT: s_cselect_b32 s8, s0, s4
-; GFX940-NEXT: s_and_b64 s[2:3], s[2:3], exec
-; GFX940-NEXT: v_cmp_class_f64_e64 s[10:11], s[0:1], 64
-; GFX940-NEXT: s_cselect_b32 s8, s8, 0
-; GFX940-NEXT: s_and_b64 s[12:13], s[10:11], exec
-; GFX940-NEXT: v_cmp_class_f64_e64 s[12:13], s[4:5], 64
-; GFX940-NEXT: v_cmp_eq_f64_e64 s[2:3], s[8:9], 0
-; GFX940-NEXT: s_cselect_b32 s1, s1, s9
-; GFX940-NEXT: s_and_b64 s[14:15], s[12:13], exec
-; GFX940-NEXT: s_cselect_b32 s1, s5, s1
-; GFX940-NEXT: s_and_b64 s[14:15], s[2:3], exec
-; GFX940-NEXT: s_cselect_b32 s5, s1, s9
-; GFX940-NEXT: s_and_b64 s[10:11], s[10:11], exec
-; GFX940-NEXT: s_cselect_b32 s9, s0, s8
-; GFX940-NEXT: s_and_b64 s[0:1], s[12:13], exec
-; GFX940-NEXT: s_cselect_b32 s4, s4, s9
-; GFX940-NEXT: s_and_b64 s[0:1], s[2:3], exec
-; GFX940-NEXT: s_cselect_b32 s4, s4, s8
+; GFX940-NEXT: v_max_f64 v[4:5], s[0:1], v[0:1]
+; GFX940-NEXT: v_cndmask_b32_e32 v3, v3, v6, vcc
+; GFX940-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc
+; GFX940-NEXT: v_cmp_u_f64_e32 vcc, s[0:1], v[0:1]
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v1, v5, v6, vcc
+; GFX940-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc
; GFX940-NEXT: ;;#ASMSTART
-; GFX940-NEXT: ; use s[4:7]
+; GFX940-NEXT: ; use v[0:3]
; GFX940-NEXT: ;;#ASMEND
; GFX940-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: s_maximum_v2f64:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_cmp_gt_f64_e64 s12, s[6:7], s[10:11]
-; GFX10-NEXT: v_cmp_o_f64_e64 s14, s[6:7], s[10:11]
-; GFX10-NEXT: v_cmp_class_f64_e64 s15, s[6:7], 64
-; GFX10-NEXT: v_cmp_class_f64_e64 s16, s[10:11], 64
-; GFX10-NEXT: v_cmp_o_f64_e64 s18, s[4:5], s[8:9]
-; GFX10-NEXT: v_cmp_class_f64_e64 s19, s[4:5], 64
-; GFX10-NEXT: v_cmp_class_f64_e64 s20, s[8:9], 64
-; GFX10-NEXT: s_and_b32 s13, s12, exec_lo
-; GFX10-NEXT: s_cselect_b32 s13, s7, s11
-; GFX10-NEXT: s_and_b32 s17, s14, exec_lo
-; GFX10-NEXT: s_cselect_b32 s13, s13, 0x7ff80000
-; GFX10-NEXT: s_and_b32 s12, s12, exec_lo
-; GFX10-NEXT: s_cselect_b32 s12, s6, s10
-; GFX10-NEXT: s_and_b32 s14, s14, exec_lo
-; GFX10-NEXT: s_cselect_b32 s12, s12, 0
-; GFX10-NEXT: v_cmp_gt_f64_e64 s17, s[4:5], s[8:9]
-; GFX10-NEXT: v_cmp_eq_f64_e64 s14, s[12:13], 0
-; GFX10-NEXT: s_and_b32 s21, s15, exec_lo
-; GFX10-NEXT: s_cselect_b32 s7, s7, s13
-; GFX10-NEXT: s_and_b32 s21, s16, exec_lo
-; GFX10-NEXT: s_cselect_b32 s7, s11, s7
-; GFX10-NEXT: s_and_b32 s11, s14, exec_lo
-; GFX10-NEXT: s_cselect_b32 s7, s7, s13
-; GFX10-NEXT: s_and_b32 s11, s15, exec_lo
-; GFX10-NEXT: s_cselect_b32 s6, s6, s12
-; GFX10-NEXT: s_and_b32 s11, s16, exec_lo
-; GFX10-NEXT: s_cselect_b32 s6, s10, s6
-; GFX10-NEXT: s_and_b32 s10, s14, exec_lo
-; GFX10-NEXT: s_cselect_b32 s6, s6, s12
-; GFX10-NEXT: s_and_b32 s10, s17, exec_lo
-; GFX10-NEXT: s_cselect_b32 s10, s5, s9
-; GFX10-NEXT: s_and_b32 s11, s18, exec_lo
-; GFX10-NEXT: s_cselect_b32 s11, s10, 0x7ff80000
-; GFX10-NEXT: s_and_b32 s10, s17, exec_lo
-; GFX10-NEXT: s_cselect_b32 s10, s4, s8
-; GFX10-NEXT: s_and_b32 s12, s18, exec_lo
-; GFX10-NEXT: s_cselect_b32 s10, s10, 0
-; GFX10-NEXT: s_and_b32 s13, s19, exec_lo
-; GFX10-NEXT: v_cmp_eq_f64_e64 s12, s[10:11], 0
-; GFX10-NEXT: s_cselect_b32 s5, s5, s11
-; GFX10-NEXT: s_and_b32 s13, s20, exec_lo
-; GFX10-NEXT: s_cselect_b32 s5, s9, s5
-; GFX10-NEXT: s_and_b32 s9, s12, exec_lo
-; GFX10-NEXT: s_cselect_b32 s5, s5, s11
-; GFX10-NEXT: s_and_b32 s9, s19, exec_lo
-; GFX10-NEXT: s_cselect_b32 s4, s4, s10
-; GFX10-NEXT: s_and_b32 s9, s20, exec_lo
-; GFX10-NEXT: s_cselect_b32 s4, s8, s4
-; GFX10-NEXT: s_and_b32 s8, s12, exec_lo
-; GFX10-NEXT: s_cselect_b32 s4, s4, s10
+; GFX10-NEXT: v_max_f64 v[0:1], s[6:7], s[10:11]
+; GFX10-NEXT: v_cmp_u_f64_e64 s6, s[6:7], s[10:11]
+; GFX10-NEXT: v_max_f64 v[4:5], s[4:5], s[8:9]
+; GFX10-NEXT: v_cmp_u_f64_e64 s4, s[4:5], s[8:9]
+; GFX10-NEXT: v_cndmask_b32_e64 v3, v1, 0x7ff80000, s6
+; GFX10-NEXT: v_cndmask_b32_e64 v2, v0, 0, s6
+; GFX10-NEXT: v_cndmask_b32_e64 v1, v5, 0x7ff80000, s4
+; GFX10-NEXT: v_cndmask_b32_e64 v0, v4, 0, s4
; GFX10-NEXT: ;;#ASMSTART
-; GFX10-NEXT: ; use s[4:7]
+; GFX10-NEXT: ; use v[0:3]
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: s_maximum_v2f64:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_cmp_gt_f64_e64 s8, s[2:3], s[6:7]
-; GFX11-NEXT: v_cmp_o_f64_e64 s10, s[2:3], s[6:7]
-; GFX11-NEXT: v_cmp_class_f64_e64 s11, s[2:3], 64
-; GFX11-NEXT: v_cmp_class_f64_e64 s12, s[6:7], 64
-; GFX11-NEXT: v_cmp_o_f64_e64 s14, s[0:1], s[4:5]
-; GFX11-NEXT: v_cmp_class_f64_e64 s15, s[0:1], 64
-; GFX11-NEXT: v_cmp_class_f64_e64 s16, s[4:5], 64
-; GFX11-NEXT: s_and_b32 s9, s8, exec_lo
-; GFX11-NEXT: s_cselect_b32 s9, s3, s7
-; GFX11-NEXT: s_and_b32 s13, s10, exec_lo
-; GFX11-NEXT: s_cselect_b32 s9, s9, 0x7ff80000
-; GFX11-NEXT: s_and_b32 s8, s8, exec_lo
-; GFX11-NEXT: s_cselect_b32 s8, s2, s6
-; GFX11-NEXT: s_and_b32 s10, s10, exec_lo
-; GFX11-NEXT: s_cselect_b32 s8, s8, 0
-; GFX11-NEXT: v_cmp_gt_f64_e64 s13, s[0:1], s[4:5]
-; GFX11-NEXT: v_cmp_eq_f64_e64 s10, s[8:9], 0
-; GFX11-NEXT: s_and_b32 s17, s11, exec_lo
-; GFX11-NEXT: s_cselect_b32 s3, s3, s9
-; GFX11-NEXT: s_and_b32 s17, s12, exec_lo
-; GFX11-NEXT: s_cselect_b32 s3, s7, s3
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT: s_and_b32 s7, s10, exec_lo
-; GFX11-NEXT: s_cselect_b32 s3, s3, s9
-; GFX11-NEXT: s_and_b32 s7, s11, exec_lo
-; GFX11-NEXT: s_cselect_b32 s2, s2, s8
-; GFX11-NEXT: s_and_b32 s7, s12, exec_lo
-; GFX11-NEXT: s_cselect_b32 s2, s6, s2
-; GFX11-NEXT: s_and_b32 s6, s10, exec_lo
-; GFX11-NEXT: s_cselect_b32 s2, s2, s8
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX11-NEXT: s_and_b32 s6, s13, exec_lo
-; GFX11-NEXT: s_cselect_b32 s6, s1, s5
-; GFX11-NEXT: s_and_b32 s7, s14, exec_lo
-; GFX11-NEXT: s_cselect_b32 s7, s6, 0x7ff80000
-; GFX11-NEXT: s_and_b32 s6, s13, exec_lo
-; GFX11-NEXT: s_cselect_b32 s6, s0, s4
-; GFX11-NEXT: s_and_b32 s8, s14, exec_lo
-; GFX11-NEXT: s_cselect_b32 s6, s6, 0
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_2) | instid1(VALU_DEP_4)
-; GFX11-NEXT: s_and_b32 s9, s15, exec_lo
-; GFX11-NEXT: v_cmp_eq_f64_e64 s8, s[6:7], 0
-; GFX11-NEXT: s_cselect_b32 s1, s1, s7
-; GFX11-NEXT: s_and_b32 s9, s16, exec_lo
-; GFX11-NEXT: s_cselect_b32 s1, s5, s1
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT: s_and_b32 s5, s8, exec_lo
-; GFX11-NEXT: s_cselect_b32 s1, s1, s7
-; GFX11-NEXT: s_and_b32 s5, s15, exec_lo
-; GFX11-NEXT: s_cselect_b32 s0, s0, s6
-; GFX11-NEXT: s_and_b32 s5, s16, exec_lo
-; GFX11-NEXT: s_cselect_b32 s0, s4, s0
-; GFX11-NEXT: s_and_b32 s4, s8, exec_lo
-; GFX11-NEXT: s_cselect_b32 s0, s0, s6
+; GFX11-NEXT: v_max_f64 v[0:1], s[2:3], s[6:7]
+; GFX11-NEXT: v_cmp_u_f64_e64 s2, s[2:3], s[6:7]
+; GFX11-NEXT: v_max_f64 v[4:5], s[0:1], s[4:5]
+; GFX11-NEXT: v_cmp_u_f64_e64 s0, s[0:1], s[4:5]
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3)
+; GFX11-NEXT: v_cndmask_b32_e64 v3, v1, 0x7ff80000, s2
+; GFX11-NEXT: v_cndmask_b32_e64 v2, v0, 0, s2
+; GFX11-NEXT: v_cndmask_b32_e64 v1, v5, 0x7ff80000, s0
+; GFX11-NEXT: v_cndmask_b32_e64 v0, v4, 0, s0
; GFX11-NEXT: ;;#ASMSTART
-; GFX11-NEXT: ; use s[0:3]
+; GFX11-NEXT: ; use v[0:3]
; GFX11-NEXT: ;;#ASMEND
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
@@ -1554,306 +979,110 @@ define <3 x double> @v_maximum_v3f64(<3 x double> %src0, <3 x double> %src1) {
; GFX7-LABEL: v_maximum_v3f64:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_cmp_gt_f64_e32 vcc, v[0:1], v[6:7]
-; GFX7-NEXT: v_cmp_o_f64_e64 s[4:5], v[0:1], v[6:7]
-; GFX7-NEXT: v_mov_b32_e32 v14, 0x7ff80000
-; GFX7-NEXT: v_cmp_gt_f64_e64 s[8:9], v[2:3], v[8:9]
-; GFX7-NEXT: v_cmp_o_f64_e64 s[10:11], v[2:3], v[8:9]
-; GFX7-NEXT: v_cndmask_b32_e32 v12, v7, v1, vcc
-; GFX7-NEXT: v_cndmask_b32_e64 v13, v14, v12, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e32 v12, v6, v0, vcc
-; GFX7-NEXT: v_cmp_class_f64_e64 vcc, v[0:1], 64
-; GFX7-NEXT: v_cndmask_b32_e64 v12, 0, v12, s[4:5]
-; GFX7-NEXT: v_cmp_class_f64_e64 s[4:5], v[6:7], 64
-; GFX7-NEXT: v_cmp_eq_f64_e64 s[6:7], 0, v[12:13]
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v12, v0, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v13, v1, vcc
-; GFX7-NEXT: v_cndmask_b32_e64 v0, v0, v6, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v1, v1, v7, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v6, v9, v3, s[8:9]
-; GFX7-NEXT: v_cndmask_b32_e64 v0, v12, v0, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v1, v13, v1, s[6:7]
-; GFX7-NEXT: v_cmp_class_f64_e64 s[6:7], v[2:3], 64
-; GFX7-NEXT: v_cndmask_b32_e64 v7, v14, v6, s[10:11]
-; GFX7-NEXT: v_cndmask_b32_e64 v6, v8, v2, s[8:9]
-; GFX7-NEXT: v_cmp_gt_f64_e32 vcc, v[4:5], v[10:11]
-; GFX7-NEXT: v_cmp_class_f64_e64 s[8:9], v[8:9], 64
-; GFX7-NEXT: v_cmp_o_f64_e64 s[4:5], v[4:5], v[10:11]
-; GFX7-NEXT: v_cndmask_b32_e64 v6, 0, v6, s[10:11]
-; GFX7-NEXT: v_cmp_eq_f64_e64 s[10:11], 0, v[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v2, v6, v2, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v3, v7, v3, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e32 v12, v11, v5, vcc
-; GFX7-NEXT: v_cndmask_b32_e64 v2, v2, v8, s[8:9]
-; GFX7-NEXT: v_cndmask_b32_e32 v8, v10, v4, vcc
-; GFX7-NEXT: v_cmp_class_f64_e64 vcc, v[4:5], 64
-; GFX7-NEXT: v_cndmask_b32_e64 v13, v14, v12, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v12, 0, v8, s[4:5]
-; GFX7-NEXT: v_cmp_class_f64_e64 s[4:5], v[10:11], 64
-; GFX7-NEXT: v_cmp_eq_f64_e64 s[12:13], 0, v[12:13]
-; GFX7-NEXT: v_cndmask_b32_e64 v3, v3, v9, s[8:9]
-; GFX7-NEXT: v_cndmask_b32_e64 v2, v6, v2, s[10:11]
-; GFX7-NEXT: v_cndmask_b32_e64 v3, v7, v3, s[10:11]
-; GFX7-NEXT: v_cndmask_b32_e32 v4, v12, v4, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v5, v13, v5, vcc
-; GFX7-NEXT: v_cndmask_b32_e64 v4, v4, v10, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v5, v5, v11, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v4, v12, v4, s[12:13]
-; GFX7-NEXT: v_cndmask_b32_e64 v5, v13, v5, s[12:13]
+; GFX7-NEXT: v_max_f64 v[12:13], v[0:1], v[6:7]
+; GFX7-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[6:7]
+; GFX7-NEXT: v_max_f64 v[6:7], v[2:3], v[8:9]
+; GFX7-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[8:9]
+; GFX7-NEXT: v_max_f64 v[8:9], v[4:5], v[10:11]
+; GFX7-NEXT: v_cmp_u_f64_e64 s[6:7], v[4:5], v[10:11]
+; GFX7-NEXT: v_mov_b32_e32 v5, 0x7ff80000
+; GFX7-NEXT: v_cndmask_b32_e64 v0, v12, 0, vcc
+; GFX7-NEXT: v_cndmask_b32_e32 v1, v13, v5, vcc
+; GFX7-NEXT: v_cndmask_b32_e64 v2, v6, 0, s[4:5]
+; GFX7-NEXT: v_cndmask_b32_e64 v3, v7, v5, s[4:5]
+; GFX7-NEXT: v_cndmask_b32_e64 v4, v8, 0, s[6:7]
+; GFX7-NEXT: v_cndmask_b32_e64 v5, v9, v5, s[6:7]
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_maximum_v3f64:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_cmp_gt_f64_e32 vcc, v[0:1], v[6:7]
-; GFX8-NEXT: v_cmp_o_f64_e64 s[4:5], v[0:1], v[6:7]
-; GFX8-NEXT: v_mov_b32_e32 v14, 0x7ff80000
-; GFX8-NEXT: v_cmp_gt_f64_e64 s[8:9], v[2:3], v[8:9]
-; GFX8-NEXT: v_cmp_o_f64_e64 s[10:11], v[2:3], v[8:9]
-; GFX8-NEXT: v_cndmask_b32_e32 v12, v7, v1, vcc
-; GFX8-NEXT: v_cndmask_b32_e64 v13, v14, v12, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e32 v12, v6, v0, vcc
-; GFX8-NEXT: v_cmp_class_f64_e64 vcc, v[0:1], 64
-; GFX8-NEXT: v_cndmask_b32_e64 v12, 0, v12, s[4:5]
-; GFX8-NEXT: v_cmp_class_f64_e64 s[4:5], v[6:7], 64
-; GFX8-NEXT: v_cmp_eq_f64_e64 s[6:7], 0, v[12:13]
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v12, v0, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v13, v1, vcc
-; GFX8-NEXT: v_cndmask_b32_e64 v0, v0, v6, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v1, v1, v7, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v6, v9, v3, s[8:9]
-; GFX8-NEXT: v_cndmask_b32_e64 v0, v12, v0, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v1, v13, v1, s[6:7]
-; GFX8-NEXT: v_cmp_class_f64_e64 s[6:7], v[2:3], 64
-; GFX8-NEXT: v_cndmask_b32_e64 v7, v14, v6, s[10:11]
-; GFX8-NEXT: v_cndmask_b32_e64 v6, v8, v2, s[8:9]
-; GFX8-NEXT: v_cmp_gt_f64_e32 vcc, v[4:5], v[10:11]
-; GFX8-NEXT: v_cmp_class_f64_e64 s[8:9], v[8:9], 64
-; GFX8-NEXT: v_cmp_o_f64_e64 s[4:5], v[4:5], v[10:11]
-; GFX8-NEXT: v_cndmask_b32_e64 v6, 0, v6, s[10:11]
-; GFX8-NEXT: v_cmp_eq_f64_e64 s[10:11], 0, v[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v2, v6, v2, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v3, v7, v3, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e32 v12, v11, v5, vcc
-; GFX8-NEXT: v_cndmask_b32_e64 v2, v2, v8, s[8:9]
-; GFX8-NEXT: v_cndmask_b32_e32 v8, v10, v4, vcc
-; GFX8-NEXT: v_cmp_class_f64_e64 vcc, v[4:5], 64
-; GFX8-NEXT: v_cndmask_b32_e64 v13, v14, v12, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v12, 0, v8, s[4:5]
-; GFX8-NEXT: v_cmp_class_f64_e64 s[4:5], v[10:11], 64
-; GFX8-NEXT: v_cmp_eq_f64_e64 s[12:13], 0, v[12:13]
-; GFX8-NEXT: v_cndmask_b32_e64 v3, v3, v9, s[8:9]
-; GFX8-NEXT: v_cndmask_b32_e64 v2, v6, v2, s[10:11]
-; GFX8-NEXT: v_cndmask_b32_e64 v3, v7, v3, s[10:11]
-; GFX8-NEXT: v_cndmask_b32_e32 v4, v12, v4, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v5, v13, v5, vcc
-; GFX8-NEXT: v_cndmask_b32_e64 v4, v4, v10, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v5, v5, v11, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v4, v12, v4, s[12:13]
-; GFX8-NEXT: v_cndmask_b32_e64 v5, v13, v5, s[12:13]
+; GFX8-NEXT: v_max_f64 v[12:13], v[0:1], v[6:7]
+; GFX8-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[6:7]
+; GFX8-NEXT: v_max_f64 v[6:7], v[2:3], v[8:9]
+; GFX8-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[8:9]
+; GFX8-NEXT: v_max_f64 v[8:9], v[4:5], v[10:11]
+; GFX8-NEXT: v_cmp_u_f64_e64 s[6:7], v[4:5], v[10:11]
+; GFX8-NEXT: v_mov_b32_e32 v5, 0x7ff80000
+; GFX8-NEXT: v_cndmask_b32_e64 v0, v12, 0, vcc
+; GFX8-NEXT: v_cndmask_b32_e32 v1, v13, v5, vcc
+; GFX8-NEXT: v_cndmask_b32_e64 v2, v6, 0, s[4:5]
+; GFX8-NEXT: v_cndmask_b32_e64 v3, v7, v5, s[4:5]
+; GFX8-NEXT: v_cndmask_b32_e64 v4, v8, 0, s[6:7]
+; GFX8-NEXT: v_cndmask_b32_e64 v5, v9, v5, s[6:7]
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_maximum_v3f64:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_cmp_gt_f64_e32 vcc, v[0:1], v[6:7]
-; GFX9-NEXT: v_cmp_o_f64_e64 s[4:5], v[0:1], v[6:7]
-; GFX9-NEXT: v_mov_b32_e32 v14, 0x7ff80000
-; GFX9-NEXT: v_cmp_gt_f64_e64 s[8:9], v[2:3], v[8:9]
-; GFX9-NEXT: v_cmp_o_f64_e64 s[10:11], v[2:3], v[8:9]
-; GFX9-NEXT: v_cndmask_b32_e32 v12, v7, v1, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v13, v14, v12, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e32 v12, v6, v0, vcc
-; GFX9-NEXT: v_cmp_class_f64_e64 vcc, v[0:1], 64
-; GFX9-NEXT: v_cndmask_b32_e64 v12, 0, v12, s[4:5]
-; GFX9-NEXT: v_cmp_class_f64_e64 s[4:5], v[6:7], 64
-; GFX9-NEXT: v_cmp_eq_f64_e64 s[6:7], 0, v[12:13]
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v12, v0, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v13, v1, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v0, v6, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v1, v1, v7, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v6, v9, v3, s[8:9]
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v12, v0, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v1, v13, v1, s[6:7]
-; GFX9-NEXT: v_cmp_class_f64_e64 s[6:7], v[2:3], 64
-; GFX9-NEXT: v_cndmask_b32_e64 v7, v14, v6, s[10:11]
-; GFX9-NEXT: v_cndmask_b32_e64 v6, v8, v2, s[8:9]
-; GFX9-NEXT: v_cmp_gt_f64_e32 vcc, v[4:5], v[10:11]
-; GFX9-NEXT: v_cmp_class_f64_e64 s[8:9], v[8:9], 64
-; GFX9-NEXT: v_cmp_o_f64_e64 s[4:5], v[4:5], v[10:11]
-; GFX9-NEXT: v_cndmask_b32_e64 v6, 0, v6, s[10:11]
-; GFX9-NEXT: v_cmp_eq_f64_e64 s[10:11], 0, v[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v2, v6, v2, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v3, v7, v3, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e32 v12, v11, v5, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v2, v2, v8, s[8:9]
-; GFX9-NEXT: v_cndmask_b32_e32 v8, v10, v4, vcc
-; GFX9-NEXT: v_cmp_class_f64_e64 vcc, v[4:5], 64
-; GFX9-NEXT: v_cndmask_b32_e64 v13, v14, v12, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v12, 0, v8, s[4:5]
-; GFX9-NEXT: v_cmp_class_f64_e64 s[4:5], v[10:11], 64
-; GFX9-NEXT: v_cmp_eq_f64_e64 s[12:13], 0, v[12:13]
-; GFX9-NEXT: v_cndmask_b32_e64 v3, v3, v9, s[8:9]
-; GFX9-NEXT: v_cndmask_b32_e64 v2, v6, v2, s[10:11]
-; GFX9-NEXT: v_cndmask_b32_e64 v3, v7, v3, s[10:11]
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v12, v4, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v5, v13, v5, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v4, v4, v10, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v5, v5, v11, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v4, v12, v4, s[12:13]
-; GFX9-NEXT: v_cndmask_b32_e64 v5, v13, v5, s[12:13]
+; GFX9-NEXT: v_max_f64 v[12:13], v[0:1], v[6:7]
+; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[6:7]
+; GFX9-NEXT: v_max_f64 v[6:7], v[2:3], v[8:9]
+; GFX9-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[8:9]
+; GFX9-NEXT: v_max_f64 v[8:9], v[4:5], v[10:11]
+; GFX9-NEXT: v_cmp_u_f64_e64 s[6:7], v[4:5], v[10:11]
+; GFX9-NEXT: v_mov_b32_e32 v5, 0x7ff80000
+; GFX9-NEXT: v_cndmask_b32_e64 v0, v12, 0, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v13, v5, vcc
+; GFX9-NEXT: v_cndmask_b32_e64 v2, v6, 0, s[4:5]
+; GFX9-NEXT: v_cndmask_b32_e64 v3, v7, v5, s[4:5]
+; GFX9-NEXT: v_cndmask_b32_e64 v4, v8, 0, s[6:7]
+; GFX9-NEXT: v_cndmask_b32_e64 v5, v9, v5, s[6:7]
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX940-LABEL: v_maximum_v3f64:
; GFX940: ; %bb.0:
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_cmp_gt_f64_e32 vcc, v[0:1], v[6:7]
-; GFX940-NEXT: v_mov_b32_e32 v14, 0x7ff80000
-; GFX940-NEXT: v_cmp_o_f64_e64 s[0:1], v[0:1], v[6:7]
-; GFX940-NEXT: v_cndmask_b32_e32 v12, v7, v1, vcc
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e64 v13, v14, v12, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v12, v6, v0, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v12, 0, v12, s[0:1]
-; GFX940-NEXT: v_cmp_class_f64_e64 vcc, v[0:1], 64
-; GFX940-NEXT: v_cmp_class_f64_e64 s[0:1], v[6:7], 64
-; GFX940-NEXT: v_cmp_eq_f64_e64 s[2:3], 0, v[12:13]
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v12, v0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v13, v1, vcc
-; GFX940-NEXT: v_cmp_gt_f64_e32 vcc, v[2:3], v[8:9]
-; GFX940-NEXT: v_cndmask_b32_e64 v0, v0, v6, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v1, v1, v7, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v6, v9, v3, vcc
-; GFX940-NEXT: v_cmp_o_f64_e64 s[0:1], v[2:3], v[8:9]
-; GFX940-NEXT: v_cndmask_b32_e64 v0, v12, v0, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v1, v13, v1, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v7, v14, v6, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v6, v8, v2, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v6, 0, v6, s[0:1]
-; GFX940-NEXT: v_cmp_class_f64_e64 vcc, v[2:3], 64
-; GFX940-NEXT: v_cmp_class_f64_e64 s[0:1], v[8:9], 64
-; GFX940-NEXT: v_cmp_eq_f64_e64 s[2:3], 0, v[6:7]
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v6, v2, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v2, v2, v8, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v7, v3, vcc
-; GFX940-NEXT: v_cmp_gt_f64_e32 vcc, v[4:5], v[10:11]
-; GFX940-NEXT: v_cndmask_b32_e64 v2, v6, v2, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v3, v3, v9, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v6, v11, v5, vcc
-; GFX940-NEXT: v_cmp_o_f64_e64 s[0:1], v[4:5], v[10:11]
-; GFX940-NEXT: v_cndmask_b32_e64 v3, v7, v3, s[2:3]
+; GFX940-NEXT: v_max_f64 v[12:13], v[0:1], v[6:7]
+; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[6:7]
+; GFX940-NEXT: v_max_f64 v[6:7], v[2:3], v[8:9]
; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e64 v7, v14, v6, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v6, v10, v4, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v6, 0, v6, s[0:1]
-; GFX940-NEXT: v_cmp_class_f64_e64 vcc, v[4:5], 64
-; GFX940-NEXT: v_cmp_class_f64_e64 s[0:1], v[10:11], 64
-; GFX940-NEXT: v_cmp_eq_f64_e64 s[2:3], 0, v[6:7]
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v6, v4, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v5, v7, v5, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v4, v4, v10, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v5, v5, v11, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v4, v6, v4, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v5, v7, v5, s[2:3]
+; GFX940-NEXT: v_cndmask_b32_e64 v0, v12, 0, vcc
+; GFX940-NEXT: v_mov_b32_e32 v12, 0x7ff80000
+; GFX940-NEXT: v_cndmask_b32_e32 v1, v13, v12, vcc
+; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[8:9]
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e64 v2, v6, 0, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v3, v7, v12, vcc
+; GFX940-NEXT: v_max_f64 v[6:7], v[4:5], v[10:11]
+; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[4:5], v[10:11]
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e64 v4, v6, 0, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v5, v7, v12, vcc
; GFX940-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximum_v3f64:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_cmp_gt_f64_e32 vcc_lo, v[0:1], v[6:7]
-; GFX10-NEXT: v_cmp_gt_f64_e64 s4, v[2:3], v[8:9]
-; GFX10-NEXT: v_cmp_gt_f64_e64 s5, v[4:5], v[10:11]
-; GFX10-NEXT: v_cmp_o_f64_e64 s6, v[0:1], v[6:7]
-; GFX10-NEXT: v_cmp_o_f64_e64 s7, v[2:3], v[8:9]
-; GFX10-NEXT: v_cmp_o_f64_e64 s8, v[4:5], v[10:11]
-; GFX10-NEXT: v_cndmask_b32_e32 v12, v7, v1, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v14, v9, v3, s4
-; GFX10-NEXT: v_cndmask_b32_e64 v16, v11, v5, s5
-; GFX10-NEXT: v_cndmask_b32_e32 v17, v6, v0, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v18, v8, v2, s4
-; GFX10-NEXT: v_cndmask_b32_e64 v19, v10, v4, s5
-; GFX10-NEXT: v_cmp_class_f64_e64 vcc_lo, v[0:1], 64
-; GFX10-NEXT: v_cmp_class_f64_e64 s4, v[2:3], 64
-; GFX10-NEXT: v_cmp_class_f64_e64 s5, v[4:5], 64
-; GFX10-NEXT: v_cndmask_b32_e64 v13, 0x7ff80000, v12, s6
-; GFX10-NEXT: v_cndmask_b32_e64 v15, 0x7ff80000, v14, s7
-; GFX10-NEXT: v_cndmask_b32_e64 v12, 0, v17, s6
-; GFX10-NEXT: v_cndmask_b32_e64 v17, 0x7ff80000, v16, s8
-; GFX10-NEXT: v_cndmask_b32_e64 v14, 0, v18, s7
-; GFX10-NEXT: v_cndmask_b32_e64 v16, 0, v19, s8
-; GFX10-NEXT: v_cmp_class_f64_e64 s6, v[8:9], 64
-; GFX10-NEXT: v_cmp_class_f64_e64 s7, v[6:7], 64
-; GFX10-NEXT: v_cmp_class_f64_e64 s8, v[10:11], 64
-; GFX10-NEXT: v_cmp_eq_f64_e64 s9, 0, v[12:13]
-; GFX10-NEXT: v_cmp_eq_f64_e64 s10, 0, v[14:15]
-; GFX10-NEXT: v_cmp_eq_f64_e64 s11, 0, v[16:17]
-; GFX10-NEXT: v_cndmask_b32_e32 v0, v12, v0, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v2, v14, v2, s4
-; GFX10-NEXT: v_cndmask_b32_e64 v4, v16, v4, s5
-; GFX10-NEXT: v_cndmask_b32_e32 v1, v13, v1, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v3, v15, v3, s4
-; GFX10-NEXT: v_cndmask_b32_e64 v5, v17, v5, s5
-; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, v8, s6
-; GFX10-NEXT: v_cndmask_b32_e64 v0, v0, v6, s7
-; GFX10-NEXT: v_cndmask_b32_e64 v4, v4, v10, s8
-; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, v7, s7
-; GFX10-NEXT: v_cndmask_b32_e64 v3, v3, v9, s6
-; GFX10-NEXT: v_cndmask_b32_e64 v5, v5, v11, s8
-; GFX10-NEXT: v_cndmask_b32_e64 v0, v12, v0, s9
-; GFX10-NEXT: v_cndmask_b32_e64 v2, v14, v2, s10
-; GFX10-NEXT: v_cndmask_b32_e64 v4, v16, v4, s11
-; GFX10-NEXT: v_cndmask_b32_e64 v1, v13, v1, s9
-; GFX10-NEXT: v_cndmask_b32_e64 v3, v15, v3, s10
-; GFX10-NEXT: v_cndmask_b32_e64 v5, v17, v5, s11
+; GFX10-NEXT: v_max_f64 v[12:13], v[0:1], v[6:7]
+; GFX10-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[6:7]
+; GFX10-NEXT: v_max_f64 v[6:7], v[2:3], v[8:9]
+; GFX10-NEXT: v_cmp_u_f64_e64 s4, v[2:3], v[8:9]
+; GFX10-NEXT: v_max_f64 v[8:9], v[4:5], v[10:11]
+; GFX10-NEXT: v_cmp_u_f64_e64 s5, v[4:5], v[10:11]
+; GFX10-NEXT: v_cndmask_b32_e64 v0, v12, 0, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e64 v1, v13, 0x7ff80000, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e64 v2, v6, 0, s4
+; GFX10-NEXT: v_cndmask_b32_e64 v3, v7, 0x7ff80000, s4
+; GFX10-NEXT: v_cndmask_b32_e64 v4, v8, 0, s5
+; GFX10-NEXT: v_cndmask_b32_e64 v5, v9, 0x7ff80000, s5
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_maximum_v3f64:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_cmp_gt_f64_e32 vcc_lo, v[0:1], v[6:7]
-; GFX11-NEXT: v_cmp_gt_f64_e64 s0, v[2:3], v[8:9]
-; GFX11-NEXT: v_cmp_gt_f64_e64 s1, v[4:5], v[10:11]
-; GFX11-NEXT: v_cmp_o_f64_e64 s2, v[0:1], v[6:7]
-; GFX11-NEXT: v_cmp_o_f64_e64 s3, v[2:3], v[8:9]
-; GFX11-NEXT: v_cmp_o_f64_e64 s4, v[4:5], v[10:11]
-; GFX11-NEXT: v_dual_cndmask_b32 v12, v7, v1 :: v_dual_cndmask_b32 v17, v6, v0
-; GFX11-NEXT: v_cndmask_b32_e64 v14, v9, v3, s0
-; GFX11-NEXT: v_cndmask_b32_e64 v16, v11, v5, s1
-; GFX11-NEXT: v_cndmask_b32_e64 v18, v8, v2, s0
-; GFX11-NEXT: v_cndmask_b32_e64 v19, v10, v4, s1
-; GFX11-NEXT: v_cmp_class_f64_e64 vcc_lo, v[0:1], 64
-; GFX11-NEXT: v_cmp_class_f64_e64 s0, v[2:3], 64
-; GFX11-NEXT: v_cmp_class_f64_e64 s1, v[4:5], 64
-; GFX11-NEXT: v_cndmask_b32_e64 v13, 0x7ff80000, v12, s2
-; GFX11-NEXT: v_cndmask_b32_e64 v15, 0x7ff80000, v14, s3
-; GFX11-NEXT: v_cndmask_b32_e64 v12, 0, v17, s2
-; GFX11-NEXT: v_cndmask_b32_e64 v17, 0x7ff80000, v16, s4
-; GFX11-NEXT: v_cndmask_b32_e64 v14, 0, v18, s3
-; GFX11-NEXT: v_cndmask_b32_e64 v16, 0, v19, s4
-; GFX11-NEXT: v_cmp_class_f64_e64 s2, v[8:9], 64
-; GFX11-NEXT: v_cmp_class_f64_e64 s3, v[6:7], 64
-; GFX11-NEXT: v_cmp_class_f64_e64 s4, v[10:11], 64
-; GFX11-NEXT: v_cmp_eq_f64_e64 s5, 0, v[12:13]
-; GFX11-NEXT: v_cmp_eq_f64_e64 s6, 0, v[14:15]
-; GFX11-NEXT: v_cmp_eq_f64_e64 s7, 0, v[16:17]
-; GFX11-NEXT: v_dual_cndmask_b32 v0, v12, v0 :: v_dual_cndmask_b32 v1, v13, v1
-; GFX11-NEXT: v_cndmask_b32_e64 v2, v14, v2, s0
-; GFX11-NEXT: v_cndmask_b32_e64 v4, v16, v4, s1
-; GFX11-NEXT: v_cndmask_b32_e64 v3, v15, v3, s0
-; GFX11-NEXT: v_cndmask_b32_e64 v5, v17, v5, s1
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4)
-; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, v8, s2
-; GFX11-NEXT: v_cndmask_b32_e64 v0, v0, v6, s3
-; GFX11-NEXT: v_cndmask_b32_e64 v4, v4, v10, s4
-; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, v7, s3
-; GFX11-NEXT: v_cndmask_b32_e64 v3, v3, v9, s2
-; GFX11-NEXT: v_cndmask_b32_e64 v5, v5, v11, s4
-; GFX11-NEXT: v_cndmask_b32_e64 v0, v12, v0, s5
-; GFX11-NEXT: v_cndmask_b32_e64 v2, v14, v2, s6
-; GFX11-NEXT: v_cndmask_b32_e64 v4, v16, v4, s7
-; GFX11-NEXT: v_cndmask_b32_e64 v1, v13, v1, s5
-; GFX11-NEXT: v_cndmask_b32_e64 v3, v15, v3, s6
-; GFX11-NEXT: v_cndmask_b32_e64 v5, v17, v5, s7
+; GFX11-NEXT: v_max_f64 v[12:13], v[0:1], v[6:7]
+; GFX11-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[6:7]
+; GFX11-NEXT: v_max_f64 v[6:7], v[2:3], v[8:9]
+; GFX11-NEXT: v_cmp_u_f64_e64 s0, v[2:3], v[8:9]
+; GFX11-NEXT: v_max_f64 v[8:9], v[4:5], v[10:11]
+; GFX11-NEXT: v_cmp_u_f64_e64 s1, v[4:5], v[10:11]
+; GFX11-NEXT: v_cndmask_b32_e64 v0, v12, 0, vcc_lo
+; GFX11-NEXT: v_cndmask_b32_e64 v1, v13, 0x7ff80000, vcc_lo
+; GFX11-NEXT: v_cndmask_b32_e64 v2, v6, 0, s0
+; GFX11-NEXT: v_cndmask_b32_e64 v3, v7, 0x7ff80000, s0
+; GFX11-NEXT: v_cndmask_b32_e64 v4, v8, 0, s1
+; GFX11-NEXT: v_cndmask_b32_e64 v5, v9, 0x7ff80000, s1
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_maximum_v3f64:
@@ -1875,247 +1104,49 @@ define <3 x double> @v_maximum_v3f64__nnan(<3 x double> %src0, <3 x double> %src
; GFX7-LABEL: v_maximum_v3f64__nnan:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_cmp_gt_f64_e32 vcc, v[0:1], v[6:7]
-; GFX7-NEXT: v_cmp_class_f64_e64 s[4:5], v[6:7], 64
-; GFX7-NEXT: v_cmp_gt_f64_e64 s[8:9], v[2:3], v[8:9]
-; GFX7-NEXT: v_cmp_class_f64_e64 s[10:11], v[10:11], 64
-; GFX7-NEXT: v_cndmask_b32_e32 v13, v7, v1, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v12, v6, v0, vcc
-; GFX7-NEXT: v_cmp_class_f64_e64 vcc, v[0:1], 64
-; GFX7-NEXT: v_cmp_eq_f64_e64 s[6:7], 0, v[12:13]
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v12, v0, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v13, v1, vcc
-; GFX7-NEXT: v_cmp_gt_f64_e32 vcc, v[4:5], v[10:11]
-; GFX7-NEXT: v_cndmask_b32_e64 v0, v0, v6, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v1, v1, v7, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v0, v12, v0, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v1, v13, v1, s[6:7]
-; GFX7-NEXT: v_cmp_class_f64_e64 s[4:5], v[2:3], 64
-; GFX7-NEXT: v_cndmask_b32_e64 v7, v9, v3, s[8:9]
-; GFX7-NEXT: v_cndmask_b32_e64 v6, v8, v2, s[8:9]
-; GFX7-NEXT: v_cndmask_b32_e32 v13, v11, v5, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v12, v10, v4, vcc
-; GFX7-NEXT: v_cmp_class_f64_e64 vcc, v[4:5], 64
-; GFX7-NEXT: v_cmp_class_f64_e64 s[6:7], v[8:9], 64
-; GFX7-NEXT: v_cmp_eq_f64_e64 s[8:9], 0, v[6:7]
-; GFX7-NEXT: v_cmp_eq_f64_e64 s[12:13], 0, v[12:13]
-; GFX7-NEXT: v_cndmask_b32_e64 v2, v6, v2, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v3, v7, v3, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e32 v4, v12, v4, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v5, v13, v5, vcc
-; GFX7-NEXT: v_cndmask_b32_e64 v2, v2, v8, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v4, v4, v10, s[10:11]
-; GFX7-NEXT: v_cndmask_b32_e64 v3, v3, v9, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v5, v5, v11, s[10:11]
-; GFX7-NEXT: v_cndmask_b32_e64 v2, v6, v2, s[8:9]
-; GFX7-NEXT: v_cndmask_b32_e64 v3, v7, v3, s[8:9]
-; GFX7-NEXT: v_cndmask_b32_e64 v4, v12, v4, s[12:13]
-; GFX7-NEXT: v_cndmask_b32_e64 v5, v13, v5, s[12:13]
+; GFX7-NEXT: v_max_f64 v[0:1], v[0:1], v[6:7]
+; GFX7-NEXT: v_max_f64 v[2:3], v[2:3], v[8:9]
+; GFX7-NEXT: v_max_f64 v[4:5], v[4:5], v[10:11]
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_maximum_v3f64__nnan:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_cmp_gt_f64_e32 vcc, v[0:1], v[6:7]
-; GFX8-NEXT: v_cmp_class_f64_e64 s[4:5], v[6:7], 64
-; GFX8-NEXT: v_cmp_gt_f64_e64 s[8:9], v[2:3], v[8:9]
-; GFX8-NEXT: v_cmp_class_f64_e64 s[10:11], v[10:11], 64
-; GFX8-NEXT: v_cndmask_b32_e32 v13, v7, v1, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v12, v6, v0, vcc
-; GFX8-NEXT: v_cmp_class_f64_e64 vcc, v[0:1], 64
-; GFX8-NEXT: v_cmp_eq_f64_e64 s[6:7], 0, v[12:13]
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v12, v0, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v13, v1, vcc
-; GFX8-NEXT: v_cmp_gt_f64_e32 vcc, v[4:5], v[10:11]
-; GFX8-NEXT: v_cndmask_b32_e64 v0, v0, v6, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v1, v1, v7, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v0, v12, v0, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v1, v13, v1, s[6:7]
-; GFX8-NEXT: v_cmp_class_f64_e64 s[4:5], v[2:3], 64
-; GFX8-NEXT: v_cndmask_b32_e64 v7, v9, v3, s[8:9]
-; GFX8-NEXT: v_cndmask_b32_e64 v6, v8, v2, s[8:9]
-; GFX8-NEXT: v_cndmask_b32_e32 v13, v11, v5, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v12, v10, v4, vcc
-; GFX8-NEXT: v_cmp_class_f64_e64 vcc, v[4:5], 64
-; GFX8-NEXT: v_cmp_class_f64_e64 s[6:7], v[8:9], 64
-; GFX8-NEXT: v_cmp_eq_f64_e64 s[8:9], 0, v[6:7]
-; GFX8-NEXT: v_cmp_eq_f64_e64 s[12:13], 0, v[12:13]
-; GFX8-NEXT: v_cndmask_b32_e64 v2, v6, v2, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v3, v7, v3, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e32 v4, v12, v4, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v5, v13, v5, vcc
-; GFX8-NEXT: v_cndmask_b32_e64 v2, v2, v8, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v4, v4, v10, s[10:11]
-; GFX8-NEXT: v_cndmask_b32_e64 v3, v3, v9, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v5, v5, v11, s[10:11]
-; GFX8-NEXT: v_cndmask_b32_e64 v2, v6, v2, s[8:9]
-; GFX8-NEXT: v_cndmask_b32_e64 v3, v7, v3, s[8:9]
-; GFX8-NEXT: v_cndmask_b32_e64 v4, v12, v4, s[12:13]
-; GFX8-NEXT: v_cndmask_b32_e64 v5, v13, v5, s[12:13]
+; GFX8-NEXT: v_max_f64 v[0:1], v[0:1], v[6:7]
+; GFX8-NEXT: v_max_f64 v[2:3], v[2:3], v[8:9]
+; GFX8-NEXT: v_max_f64 v[4:5], v[4:5], v[10:11]
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_maximum_v3f64__nnan:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_cmp_gt_f64_e32 vcc, v[0:1], v[6:7]
-; GFX9-NEXT: v_cmp_class_f64_e64 s[4:5], v[6:7], 64
-; GFX9-NEXT: v_cmp_gt_f64_e64 s[8:9], v[2:3], v[8:9]
-; GFX9-NEXT: v_cmp_class_f64_e64 s[10:11], v[10:11], 64
-; GFX9-NEXT: v_cndmask_b32_e32 v13, v7, v1, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v12, v6, v0, vcc
-; GFX9-NEXT: v_cmp_class_f64_e64 vcc, v[0:1], 64
-; GFX9-NEXT: v_cmp_eq_f64_e64 s[6:7], 0, v[12:13]
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v12, v0, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v13, v1, vcc
-; GFX9-NEXT: v_cmp_gt_f64_e32 vcc, v[4:5], v[10:11]
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v0, v6, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v1, v1, v7, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v12, v0, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v1, v13, v1, s[6:7]
-; GFX9-NEXT: v_cmp_class_f64_e64 s[4:5], v[2:3], 64
-; GFX9-NEXT: v_cndmask_b32_e64 v7, v9, v3, s[8:9]
-; GFX9-NEXT: v_cndmask_b32_e64 v6, v8, v2, s[8:9]
-; GFX9-NEXT: v_cndmask_b32_e32 v13, v11, v5, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v12, v10, v4, vcc
-; GFX9-NEXT: v_cmp_class_f64_e64 vcc, v[4:5], 64
-; GFX9-NEXT: v_cmp_class_f64_e64 s[6:7], v[8:9], 64
-; GFX9-NEXT: v_cmp_eq_f64_e64 s[8:9], 0, v[6:7]
-; GFX9-NEXT: v_cmp_eq_f64_e64 s[12:13], 0, v[12:13]
-; GFX9-NEXT: v_cndmask_b32_e64 v2, v6, v2, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v3, v7, v3, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v12, v4, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v5, v13, v5, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v2, v2, v8, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v4, v4, v10, s[10:11]
-; GFX9-NEXT: v_cndmask_b32_e64 v3, v3, v9, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v5, v5, v11, s[10:11]
-; GFX9-NEXT: v_cndmask_b32_e64 v2, v6, v2, s[8:9]
-; GFX9-NEXT: v_cndmask_b32_e64 v3, v7, v3, s[8:9]
-; GFX9-NEXT: v_cndmask_b32_e64 v4, v12, v4, s[12:13]
-; GFX9-NEXT: v_cndmask_b32_e64 v5, v13, v5, s[12:13]
+; GFX9-NEXT: v_max_f64 v[0:1], v[0:1], v[6:7]
+; GFX9-NEXT: v_max_f64 v[2:3], v[2:3], v[8:9]
+; GFX9-NEXT: v_max_f64 v[4:5], v[4:5], v[10:11]
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX940-LABEL: v_maximum_v3f64__nnan:
; GFX940: ; %bb.0:
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_cmp_gt_f64_e32 vcc, v[0:1], v[6:7]
-; GFX940-NEXT: v_cmp_class_f64_e64 s[0:1], v[6:7], 64
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v13, v7, v1, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v12, v6, v0, vcc
-; GFX940-NEXT: v_cmp_class_f64_e64 vcc, v[0:1], 64
-; GFX940-NEXT: v_cmp_eq_f64_e64 s[2:3], 0, v[12:13]
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v12, v0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v13, v1, vcc
-; GFX940-NEXT: v_cmp_gt_f64_e32 vcc, v[2:3], v[8:9]
-; GFX940-NEXT: v_cndmask_b32_e64 v0, v0, v6, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v1, v1, v7, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v7, v9, v3, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v6, v8, v2, vcc
-; GFX940-NEXT: v_cmp_class_f64_e64 vcc, v[2:3], 64
-; GFX940-NEXT: v_cmp_class_f64_e64 s[0:1], v[8:9], 64
-; GFX940-NEXT: v_cndmask_b32_e64 v0, v12, v0, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v6, v2, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v7, v3, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v1, v13, v1, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v2, v2, v8, s[0:1]
-; GFX940-NEXT: v_cmp_eq_f64_e64 s[2:3], 0, v[6:7]
-; GFX940-NEXT: v_cndmask_b32_e64 v3, v3, v9, s[0:1]
-; GFX940-NEXT: v_cmp_gt_f64_e32 vcc, v[4:5], v[10:11]
-; GFX940-NEXT: v_cndmask_b32_e64 v2, v6, v2, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v3, v7, v3, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e32 v7, v11, v5, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v6, v10, v4, vcc
-; GFX940-NEXT: v_cmp_class_f64_e64 vcc, v[4:5], 64
-; GFX940-NEXT: v_cmp_class_f64_e64 s[0:1], v[10:11], 64
-; GFX940-NEXT: v_cmp_eq_f64_e64 s[2:3], 0, v[6:7]
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v6, v4, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v5, v7, v5, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v4, v4, v10, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v5, v5, v11, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v4, v6, v4, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v5, v7, v5, s[2:3]
+; GFX940-NEXT: v_max_f64 v[0:1], v[0:1], v[6:7]
+; GFX940-NEXT: v_max_f64 v[2:3], v[2:3], v[8:9]
+; GFX940-NEXT: v_max_f64 v[4:5], v[4:5], v[10:11]
; GFX940-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximum_v3f64__nnan:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_cmp_gt_f64_e32 vcc_lo, v[0:1], v[6:7]
-; GFX10-NEXT: v_cmp_gt_f64_e64 s4, v[2:3], v[8:9]
-; GFX10-NEXT: v_cmp_gt_f64_e64 s5, v[4:5], v[10:11]
-; GFX10-NEXT: v_cmp_class_f64_e64 s6, v[8:9], 64
-; GFX10-NEXT: v_cmp_class_f64_e64 s7, v[6:7], 64
-; GFX10-NEXT: v_cmp_class_f64_e64 s8, v[10:11], 64
-; GFX10-NEXT: v_cndmask_b32_e32 v13, v7, v1, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v15, v9, v3, s4
-; GFX10-NEXT: v_cndmask_b32_e32 v12, v6, v0, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v17, v11, v5, s5
-; GFX10-NEXT: v_cndmask_b32_e64 v14, v8, v2, s4
-; GFX10-NEXT: v_cndmask_b32_e64 v16, v10, v4, s5
-; GFX10-NEXT: v_cmp_class_f64_e64 vcc_lo, v[0:1], 64
-; GFX10-NEXT: v_cmp_class_f64_e64 s4, v[2:3], 64
-; GFX10-NEXT: v_cmp_class_f64_e64 s5, v[4:5], 64
-; GFX10-NEXT: v_cmp_eq_f64_e64 s9, 0, v[12:13]
-; GFX10-NEXT: v_cmp_eq_f64_e64 s10, 0, v[14:15]
-; GFX10-NEXT: v_cmp_eq_f64_e64 s11, 0, v[16:17]
-; GFX10-NEXT: v_cndmask_b32_e32 v0, v12, v0, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v2, v14, v2, s4
-; GFX10-NEXT: v_cndmask_b32_e64 v4, v16, v4, s5
-; GFX10-NEXT: v_cndmask_b32_e32 v1, v13, v1, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v3, v15, v3, s4
-; GFX10-NEXT: v_cndmask_b32_e64 v5, v17, v5, s5
-; GFX10-NEXT: v_cndmask_b32_e64 v0, v0, v6, s7
-; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, v8, s6
-; GFX10-NEXT: v_cndmask_b32_e64 v4, v4, v10, s8
-; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, v7, s7
-; GFX10-NEXT: v_cndmask_b32_e64 v3, v3, v9, s6
-; GFX10-NEXT: v_cndmask_b32_e64 v5, v5, v11, s8
-; GFX10-NEXT: v_cndmask_b32_e64 v0, v12, v0, s9
-; GFX10-NEXT: v_cndmask_b32_e64 v2, v14, v2, s10
-; GFX10-NEXT: v_cndmask_b32_e64 v4, v16, v4, s11
-; GFX10-NEXT: v_cndmask_b32_e64 v1, v13, v1, s9
-; GFX10-NEXT: v_cndmask_b32_e64 v3, v15, v3, s10
-; GFX10-NEXT: v_cndmask_b32_e64 v5, v17, v5, s11
+; GFX10-NEXT: v_max_f64 v[0:1], v[0:1], v[6:7]
+; GFX10-NEXT: v_max_f64 v[2:3], v[2:3], v[8:9]
+; GFX10-NEXT: v_max_f64 v[4:5], v[4:5], v[10:11]
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_maximum_v3f64__nnan:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_cmp_gt_f64_e32 vcc_lo, v[0:1], v[6:7]
-; GFX11-NEXT: v_cmp_gt_f64_e64 s0, v[2:3], v[8:9]
-; GFX11-NEXT: v_cmp_gt_f64_e64 s1, v[4:5], v[10:11]
-; GFX11-NEXT: v_cmp_class_f64_e64 s2, v[8:9], 64
-; GFX11-NEXT: v_cmp_class_f64_e64 s3, v[6:7], 64
-; GFX11-NEXT: v_cmp_class_f64_e64 s4, v[10:11], 64
-; GFX11-NEXT: v_dual_cndmask_b32 v13, v7, v1 :: v_dual_cndmask_b32 v12, v6, v0
-; GFX11-NEXT: v_cndmask_b32_e64 v15, v9, v3, s0
-; GFX11-NEXT: v_cndmask_b32_e64 v17, v11, v5, s1
-; GFX11-NEXT: v_cndmask_b32_e64 v14, v8, v2, s0
-; GFX11-NEXT: v_cndmask_b32_e64 v16, v10, v4, s1
-; GFX11-NEXT: v_cmp_class_f64_e64 vcc_lo, v[0:1], 64
-; GFX11-NEXT: v_cmp_class_f64_e64 s0, v[2:3], 64
-; GFX11-NEXT: v_cmp_class_f64_e64 s1, v[4:5], 64
-; GFX11-NEXT: v_cmp_eq_f64_e64 s5, 0, v[12:13]
-; GFX11-NEXT: v_cmp_eq_f64_e64 s6, 0, v[14:15]
-; GFX11-NEXT: v_cmp_eq_f64_e64 s7, 0, v[16:17]
-; GFX11-NEXT: v_dual_cndmask_b32 v0, v12, v0 :: v_dual_cndmask_b32 v1, v13, v1
-; GFX11-NEXT: v_cndmask_b32_e64 v2, v14, v2, s0
-; GFX11-NEXT: v_cndmask_b32_e64 v4, v16, v4, s1
-; GFX11-NEXT: v_cndmask_b32_e64 v3, v15, v3, s0
-; GFX11-NEXT: v_cndmask_b32_e64 v5, v17, v5, s1
-; GFX11-NEXT: v_cndmask_b32_e64 v0, v0, v6, s3
-; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, v8, s2
-; GFX11-NEXT: v_cndmask_b32_e64 v4, v4, v10, s4
-; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, v7, s3
-; GFX11-NEXT: v_cndmask_b32_e64 v3, v3, v9, s2
-; GFX11-NEXT: v_cndmask_b32_e64 v5, v5, v11, s4
-; GFX11-NEXT: v_cndmask_b32_e64 v0, v12, v0, s5
-; GFX11-NEXT: v_cndmask_b32_e64 v2, v14, v2, s6
-; GFX11-NEXT: v_cndmask_b32_e64 v4, v16, v4, s7
-; GFX11-NEXT: v_cndmask_b32_e64 v1, v13, v1, s5
-; GFX11-NEXT: v_cndmask_b32_e64 v3, v15, v3, s6
-; GFX11-NEXT: v_cndmask_b32_e64 v5, v17, v5, s7
+; GFX11-NEXT: v_max_f64 v[0:1], v[0:1], v[6:7]
+; GFX11-NEXT: v_max_f64 v[2:3], v[2:3], v[8:9]
+; GFX11-NEXT: v_max_f64 v[4:5], v[4:5], v[10:11]
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_maximum_v3f64__nnan:
@@ -2137,144 +1168,110 @@ define <3 x double> @v_maximum_v3f64__nsz(<3 x double> %src0, <3 x double> %src1
; GFX7-LABEL: v_maximum_v3f64__nsz:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_cmp_gt_f64_e32 vcc, v[0:1], v[6:7]
-; GFX7-NEXT: v_cmp_o_f64_e64 s[4:5], v[0:1], v[6:7]
-; GFX7-NEXT: v_cmp_gt_f64_e64 s[8:9], v[4:5], v[10:11]
-; GFX7-NEXT: v_cmp_o_f64_e64 s[6:7], v[2:3], v[8:9]
-; GFX7-NEXT: v_cmp_o_f64_e64 s[10:11], v[4:5], v[10:11]
-; GFX7-NEXT: v_cndmask_b32_e32 v12, v6, v0, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc
-; GFX7-NEXT: v_cmp_gt_f64_e32 vcc, v[2:3], v[8:9]
-; GFX7-NEXT: v_cndmask_b32_e64 v7, v10, v4, s[8:9]
-; GFX7-NEXT: v_cndmask_b32_e64 v5, v11, v5, s[8:9]
-; GFX7-NEXT: v_cndmask_b32_e64 v0, 0, v12, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v4, 0, v7, s[10:11]
-; GFX7-NEXT: v_cndmask_b32_e32 v6, v8, v2, vcc
-; GFX7-NEXT: v_mov_b32_e32 v8, 0x7ff80000
-; GFX7-NEXT: v_cndmask_b32_e32 v3, v9, v3, vcc
-; GFX7-NEXT: v_cndmask_b32_e64 v1, v8, v1, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v2, 0, v6, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v3, v8, v3, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v5, v8, v5, s[10:11]
+; GFX7-NEXT: v_max_f64 v[12:13], v[0:1], v[6:7]
+; GFX7-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[6:7]
+; GFX7-NEXT: v_max_f64 v[6:7], v[2:3], v[8:9]
+; GFX7-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[8:9]
+; GFX7-NEXT: v_max_f64 v[8:9], v[4:5], v[10:11]
+; GFX7-NEXT: v_cmp_u_f64_e64 s[6:7], v[4:5], v[10:11]
+; GFX7-NEXT: v_mov_b32_e32 v5, 0x7ff80000
+; GFX7-NEXT: v_cndmask_b32_e64 v0, v12, 0, vcc
+; GFX7-NEXT: v_cndmask_b32_e32 v1, v13, v5, vcc
+; GFX7-NEXT: v_cndmask_b32_e64 v2, v6, 0, s[4:5]
+; GFX7-NEXT: v_cndmask_b32_e64 v3, v7, v5, s[4:5]
+; GFX7-NEXT: v_cndmask_b32_e64 v4, v8, 0, s[6:7]
+; GFX7-NEXT: v_cndmask_b32_e64 v5, v9, v5, s[6:7]
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_maximum_v3f64__nsz:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_cmp_gt_f64_e32 vcc, v[0:1], v[6:7]
-; GFX8-NEXT: v_cmp_o_f64_e64 s[4:5], v[0:1], v[6:7]
-; GFX8-NEXT: v_cmp_gt_f64_e64 s[8:9], v[4:5], v[10:11]
-; GFX8-NEXT: v_cmp_o_f64_e64 s[6:7], v[2:3], v[8:9]
-; GFX8-NEXT: v_cmp_o_f64_e64 s[10:11], v[4:5], v[10:11]
-; GFX8-NEXT: v_cndmask_b32_e32 v12, v6, v0, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc
-; GFX8-NEXT: v_cmp_gt_f64_e32 vcc, v[2:3], v[8:9]
-; GFX8-NEXT: v_cndmask_b32_e64 v7, v10, v4, s[8:9]
-; GFX8-NEXT: v_cndmask_b32_e64 v5, v11, v5, s[8:9]
-; GFX8-NEXT: v_cndmask_b32_e64 v0, 0, v12, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v4, 0, v7, s[10:11]
-; GFX8-NEXT: v_cndmask_b32_e32 v6, v8, v2, vcc
-; GFX8-NEXT: v_mov_b32_e32 v8, 0x7ff80000
-; GFX8-NEXT: v_cndmask_b32_e32 v3, v9, v3, vcc
-; GFX8-NEXT: v_cndmask_b32_e64 v1, v8, v1, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, v6, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v3, v8, v3, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v5, v8, v5, s[10:11]
+; GFX8-NEXT: v_max_f64 v[12:13], v[0:1], v[6:7]
+; GFX8-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[6:7]
+; GFX8-NEXT: v_max_f64 v[6:7], v[2:3], v[8:9]
+; GFX8-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[8:9]
+; GFX8-NEXT: v_max_f64 v[8:9], v[4:5], v[10:11]
+; GFX8-NEXT: v_cmp_u_f64_e64 s[6:7], v[4:5], v[10:11]
+; GFX8-NEXT: v_mov_b32_e32 v5, 0x7ff80000
+; GFX8-NEXT: v_cndmask_b32_e64 v0, v12, 0, vcc
+; GFX8-NEXT: v_cndmask_b32_e32 v1, v13, v5, vcc
+; GFX8-NEXT: v_cndmask_b32_e64 v2, v6, 0, s[4:5]
+; GFX8-NEXT: v_cndmask_b32_e64 v3, v7, v5, s[4:5]
+; GFX8-NEXT: v_cndmask_b32_e64 v4, v8, 0, s[6:7]
+; GFX8-NEXT: v_cndmask_b32_e64 v5, v9, v5, s[6:7]
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_maximum_v3f64__nsz:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_cmp_gt_f64_e32 vcc, v[0:1], v[6:7]
-; GFX9-NEXT: v_cmp_o_f64_e64 s[4:5], v[0:1], v[6:7]
-; GFX9-NEXT: v_cmp_gt_f64_e64 s[8:9], v[4:5], v[10:11]
-; GFX9-NEXT: v_cmp_o_f64_e64 s[6:7], v[2:3], v[8:9]
-; GFX9-NEXT: v_cmp_o_f64_e64 s[10:11], v[4:5], v[10:11]
-; GFX9-NEXT: v_cndmask_b32_e32 v12, v6, v0, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc
-; GFX9-NEXT: v_cmp_gt_f64_e32 vcc, v[2:3], v[8:9]
-; GFX9-NEXT: v_cndmask_b32_e64 v7, v10, v4, s[8:9]
-; GFX9-NEXT: v_cndmask_b32_e64 v5, v11, v5, s[8:9]
-; GFX9-NEXT: v_cndmask_b32_e64 v0, 0, v12, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v4, 0, v7, s[10:11]
-; GFX9-NEXT: v_cndmask_b32_e32 v6, v8, v2, vcc
-; GFX9-NEXT: v_mov_b32_e32 v8, 0x7ff80000
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v9, v3, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v1, v8, v1, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, v6, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v3, v8, v3, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v5, v8, v5, s[10:11]
+; GFX9-NEXT: v_max_f64 v[12:13], v[0:1], v[6:7]
+; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[6:7]
+; GFX9-NEXT: v_max_f64 v[6:7], v[2:3], v[8:9]
+; GFX9-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[8:9]
+; GFX9-NEXT: v_max_f64 v[8:9], v[4:5], v[10:11]
+; GFX9-NEXT: v_cmp_u_f64_e64 s[6:7], v[4:5], v[10:11]
+; GFX9-NEXT: v_mov_b32_e32 v5, 0x7ff80000
+; GFX9-NEXT: v_cndmask_b32_e64 v0, v12, 0, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v13, v5, vcc
+; GFX9-NEXT: v_cndmask_b32_e64 v2, v6, 0, s[4:5]
+; GFX9-NEXT: v_cndmask_b32_e64 v3, v7, v5, s[4:5]
+; GFX9-NEXT: v_cndmask_b32_e64 v4, v8, 0, s[6:7]
+; GFX9-NEXT: v_cndmask_b32_e64 v5, v9, v5, s[6:7]
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX940-LABEL: v_maximum_v3f64__nsz:
; GFX940: ; %bb.0:
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_cmp_gt_f64_e32 vcc, v[0:1], v[6:7]
-; GFX940-NEXT: v_cmp_o_f64_e64 s[0:1], v[0:1], v[6:7]
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v12, v6, v0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc
-; GFX940-NEXT: v_mov_b32_e32 v6, 0x7ff80000
-; GFX940-NEXT: v_cmp_gt_f64_e32 vcc, v[2:3], v[8:9]
-; GFX940-NEXT: v_cndmask_b32_e64 v0, 0, v12, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v1, v6, v1, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v7, v8, v2, vcc
-; GFX940-NEXT: v_cmp_o_f64_e64 s[0:1], v[2:3], v[8:9]
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v9, v3, vcc
-; GFX940-NEXT: v_cmp_gt_f64_e32 vcc, v[4:5], v[10:11]
-; GFX940-NEXT: v_cndmask_b32_e64 v2, 0, v7, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v3, v6, v3, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v7, v10, v4, vcc
-; GFX940-NEXT: v_cmp_o_f64_e64 s[0:1], v[4:5], v[10:11]
-; GFX940-NEXT: v_cndmask_b32_e32 v5, v11, v5, vcc
+; GFX940-NEXT: v_max_f64 v[12:13], v[0:1], v[6:7]
+; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[6:7]
+; GFX940-NEXT: v_max_f64 v[6:7], v[2:3], v[8:9]
; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e64 v4, 0, v7, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v5, v6, v5, s[0:1]
+; GFX940-NEXT: v_cndmask_b32_e64 v0, v12, 0, vcc
+; GFX940-NEXT: v_mov_b32_e32 v12, 0x7ff80000
+; GFX940-NEXT: v_cndmask_b32_e32 v1, v13, v12, vcc
+; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[8:9]
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e64 v2, v6, 0, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v3, v7, v12, vcc
+; GFX940-NEXT: v_max_f64 v[6:7], v[4:5], v[10:11]
+; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[4:5], v[10:11]
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e64 v4, v6, 0, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v5, v7, v12, vcc
; GFX940-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximum_v3f64__nsz:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_cmp_gt_f64_e32 vcc_lo, v[0:1], v[6:7]
-; GFX10-NEXT: v_cmp_gt_f64_e64 s4, v[2:3], v[8:9]
-; GFX10-NEXT: v_cmp_gt_f64_e64 s5, v[4:5], v[10:11]
-; GFX10-NEXT: v_cmp_o_f64_e64 s6, v[0:1], v[6:7]
-; GFX10-NEXT: v_cmp_o_f64_e64 s7, v[2:3], v[8:9]
-; GFX10-NEXT: v_cmp_o_f64_e64 s8, v[4:5], v[10:11]
-; GFX10-NEXT: v_cndmask_b32_e32 v12, v6, v0, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v6, v8, v2, s4
-; GFX10-NEXT: v_cndmask_b32_e64 v8, v10, v4, s5
-; GFX10-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v3, v9, v3, s4
-; GFX10-NEXT: v_cndmask_b32_e64 v5, v11, v5, s5
-; GFX10-NEXT: v_cndmask_b32_e64 v0, 0, v12, s6
-; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, v6, s7
-; GFX10-NEXT: v_cndmask_b32_e64 v4, 0, v8, s8
-; GFX10-NEXT: v_cndmask_b32_e64 v1, 0x7ff80000, v1, s6
-; GFX10-NEXT: v_cndmask_b32_e64 v3, 0x7ff80000, v3, s7
-; GFX10-NEXT: v_cndmask_b32_e64 v5, 0x7ff80000, v5, s8
+; GFX10-NEXT: v_max_f64 v[12:13], v[0:1], v[6:7]
+; GFX10-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[6:7]
+; GFX10-NEXT: v_max_f64 v[6:7], v[2:3], v[8:9]
+; GFX10-NEXT: v_cmp_u_f64_e64 s4, v[2:3], v[8:9]
+; GFX10-NEXT: v_max_f64 v[8:9], v[4:5], v[10:11]
+; GFX10-NEXT: v_cmp_u_f64_e64 s5, v[4:5], v[10:11]
+; GFX10-NEXT: v_cndmask_b32_e64 v0, v12, 0, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e64 v1, v13, 0x7ff80000, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e64 v2, v6, 0, s4
+; GFX10-NEXT: v_cndmask_b32_e64 v3, v7, 0x7ff80000, s4
+; GFX10-NEXT: v_cndmask_b32_e64 v4, v8, 0, s5
+; GFX10-NEXT: v_cndmask_b32_e64 v5, v9, 0x7ff80000, s5
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_maximum_v3f64__nsz:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_cmp_gt_f64_e32 vcc_lo, v[0:1], v[6:7]
-; GFX11-NEXT: v_cmp_gt_f64_e64 s0, v[2:3], v[8:9]
-; GFX11-NEXT: v_cmp_gt_f64_e64 s1, v[4:5], v[10:11]
-; GFX11-NEXT: v_cmp_o_f64_e64 s2, v[0:1], v[6:7]
-; GFX11-NEXT: v_cmp_o_f64_e64 s3, v[2:3], v[8:9]
-; GFX11-NEXT: v_cmp_o_f64_e64 s4, v[4:5], v[10:11]
-; GFX11-NEXT: v_dual_cndmask_b32 v12, v6, v0 :: v_dual_cndmask_b32 v1, v7, v1
-; GFX11-NEXT: v_cndmask_b32_e64 v6, v8, v2, s0
-; GFX11-NEXT: v_cndmask_b32_e64 v8, v10, v4, s1
-; GFX11-NEXT: v_cndmask_b32_e64 v3, v9, v3, s0
-; GFX11-NEXT: v_cndmask_b32_e64 v5, v11, v5, s1
-; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, v12, s2
-; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, v6, s3
-; GFX11-NEXT: v_cndmask_b32_e64 v4, 0, v8, s4
-; GFX11-NEXT: v_cndmask_b32_e64 v1, 0x7ff80000, v1, s2
-; GFX11-NEXT: v_cndmask_b32_e64 v3, 0x7ff80000, v3, s3
-; GFX11-NEXT: v_cndmask_b32_e64 v5, 0x7ff80000, v5, s4
+; GFX11-NEXT: v_max_f64 v[12:13], v[0:1], v[6:7]
+; GFX11-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[6:7]
+; GFX11-NEXT: v_max_f64 v[6:7], v[2:3], v[8:9]
+; GFX11-NEXT: v_cmp_u_f64_e64 s0, v[2:3], v[8:9]
+; GFX11-NEXT: v_max_f64 v[8:9], v[4:5], v[10:11]
+; GFX11-NEXT: v_cmp_u_f64_e64 s1, v[4:5], v[10:11]
+; GFX11-NEXT: v_cndmask_b32_e64 v0, v12, 0, vcc_lo
+; GFX11-NEXT: v_cndmask_b32_e64 v1, v13, 0x7ff80000, vcc_lo
+; GFX11-NEXT: v_cndmask_b32_e64 v2, v6, 0, s0
+; GFX11-NEXT: v_cndmask_b32_e64 v3, v7, 0x7ff80000, s0
+; GFX11-NEXT: v_cndmask_b32_e64 v4, v8, 0, s1
+; GFX11-NEXT: v_cndmask_b32_e64 v5, v9, 0x7ff80000, s1
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_maximum_v3f64__nsz:
@@ -2296,88 +1293,49 @@ define <3 x double> @v_maximum_v3f64__nnan_nsz(<3 x double> %src0, <3 x double>
; GFX7-LABEL: v_maximum_v3f64__nnan_nsz:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_cmp_gt_f64_e32 vcc, v[0:1], v[6:7]
-; GFX7-NEXT: v_cmp_gt_f64_e64 s[4:5], v[2:3], v[8:9]
-; GFX7-NEXT: v_cmp_gt_f64_e64 s[6:7], v[4:5], v[10:11]
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc
-; GFX7-NEXT: v_cndmask_b32_e64 v2, v8, v2, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v3, v9, v3, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v4, v10, v4, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v5, v11, v5, s[6:7]
+; GFX7-NEXT: v_max_f64 v[0:1], v[0:1], v[6:7]
+; GFX7-NEXT: v_max_f64 v[2:3], v[2:3], v[8:9]
+; GFX7-NEXT: v_max_f64 v[4:5], v[4:5], v[10:11]
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_maximum_v3f64__nnan_nsz:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_cmp_gt_f64_e32 vcc, v[0:1], v[6:7]
-; GFX8-NEXT: v_cmp_gt_f64_e64 s[4:5], v[2:3], v[8:9]
-; GFX8-NEXT: v_cmp_gt_f64_e64 s[6:7], v[4:5], v[10:11]
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc
-; GFX8-NEXT: v_cndmask_b32_e64 v2, v8, v2, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v3, v9, v3, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v4, v10, v4, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v5, v11, v5, s[6:7]
+; GFX8-NEXT: v_max_f64 v[0:1], v[0:1], v[6:7]
+; GFX8-NEXT: v_max_f64 v[2:3], v[2:3], v[8:9]
+; GFX8-NEXT: v_max_f64 v[4:5], v[4:5], v[10:11]
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_maximum_v3f64__nnan_nsz:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_cmp_gt_f64_e32 vcc, v[0:1], v[6:7]
-; GFX9-NEXT: v_cmp_gt_f64_e64 s[4:5], v[2:3], v[8:9]
-; GFX9-NEXT: v_cmp_gt_f64_e64 s[6:7], v[4:5], v[10:11]
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v2, v8, v2, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v3, v9, v3, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v4, v10, v4, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v5, v11, v5, s[6:7]
+; GFX9-NEXT: v_max_f64 v[0:1], v[0:1], v[6:7]
+; GFX9-NEXT: v_max_f64 v[2:3], v[2:3], v[8:9]
+; GFX9-NEXT: v_max_f64 v[4:5], v[4:5], v[10:11]
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX940-LABEL: v_maximum_v3f64__nnan_nsz:
; GFX940: ; %bb.0:
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_cmp_gt_f64_e32 vcc, v[0:1], v[6:7]
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc
-; GFX940-NEXT: v_cmp_gt_f64_e32 vcc, v[2:3], v[8:9]
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v9, v3, vcc
-; GFX940-NEXT: v_cmp_gt_f64_e32 vcc, v[4:5], v[10:11]
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v10, v4, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v5, v11, v5, vcc
+; GFX940-NEXT: v_max_f64 v[0:1], v[0:1], v[6:7]
+; GFX940-NEXT: v_max_f64 v[2:3], v[2:3], v[8:9]
+; GFX940-NEXT: v_max_f64 v[4:5], v[4:5], v[10:11]
; GFX940-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximum_v3f64__nnan_nsz:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_cmp_gt_f64_e32 vcc_lo, v[0:1], v[6:7]
-; GFX10-NEXT: v_cmp_gt_f64_e64 s4, v[2:3], v[8:9]
-; GFX10-NEXT: v_cmp_gt_f64_e64 s5, v[4:5], v[10:11]
-; GFX10-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v2, v8, v2, s4
-; GFX10-NEXT: v_cndmask_b32_e64 v4, v10, v4, s5
-; GFX10-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v3, v9, v3, s4
-; GFX10-NEXT: v_cndmask_b32_e64 v5, v11, v5, s5
+; GFX10-NEXT: v_max_f64 v[0:1], v[0:1], v[6:7]
+; GFX10-NEXT: v_max_f64 v[2:3], v[2:3], v[8:9]
+; GFX10-NEXT: v_max_f64 v[4:5], v[4:5], v[10:11]
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_maximum_v3f64__nnan_nsz:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_cmp_gt_f64_e32 vcc_lo, v[0:1], v[6:7]
-; GFX11-NEXT: v_cmp_gt_f64_e64 s0, v[2:3], v[8:9]
-; GFX11-NEXT: v_cmp_gt_f64_e64 s1, v[4:5], v[10:11]
-; GFX11-NEXT: v_dual_cndmask_b32 v0, v6, v0 :: v_dual_cndmask_b32 v1, v7, v1
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NEXT: v_cndmask_b32_e64 v2, v8, v2, s0
-; GFX11-NEXT: v_cndmask_b32_e64 v4, v10, v4, s1
-; GFX11-NEXT: v_cndmask_b32_e64 v3, v9, v3, s0
-; GFX11-NEXT: v_cndmask_b32_e64 v5, v11, v5, s1
+; GFX11-NEXT: v_max_f64 v[0:1], v[0:1], v[6:7]
+; GFX11-NEXT: v_max_f64 v[2:3], v[2:3], v[8:9]
+; GFX11-NEXT: v_max_f64 v[4:5], v[4:5], v[10:11]
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_maximum_v3f64__nnan_nsz:
@@ -2399,404 +1357,135 @@ define <4 x double> @v_maximum_v4f64(<4 x double> %src0, <4 x double> %src1) {
; GFX7-LABEL: v_maximum_v4f64:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_cmp_gt_f64_e32 vcc, v[0:1], v[8:9]
-; GFX7-NEXT: v_cmp_o_f64_e64 s[4:5], v[0:1], v[8:9]
-; GFX7-NEXT: v_mov_b32_e32 v18, 0x7ff80000
-; GFX7-NEXT: v_cmp_gt_f64_e64 s[6:7], v[2:3], v[10:11]
-; GFX7-NEXT: v_cmp_o_f64_e64 s[8:9], v[2:3], v[10:11]
-; GFX7-NEXT: v_cmp_o_f64_e64 s[12:13], v[4:5], v[12:13]
-; GFX7-NEXT: v_cndmask_b32_e32 v16, v9, v1, vcc
-; GFX7-NEXT: v_cndmask_b32_e64 v17, v18, v16, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e32 v16, v8, v0, vcc
-; GFX7-NEXT: v_cmp_class_f64_e64 vcc, v[0:1], 64
-; GFX7-NEXT: v_cndmask_b32_e64 v16, 0, v16, s[4:5]
-; GFX7-NEXT: v_cmp_class_f64_e64 s[4:5], v[8:9], 64
-; GFX7-NEXT: v_cndmask_b32_e64 v19, v11, v3, s[6:7]
-; GFX7-NEXT: v_cmp_eq_f64_e64 s[10:11], 0, v[16:17]
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v16, v0, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v17, v1, vcc
-; GFX7-NEXT: v_cmp_class_f64_e64 vcc, v[2:3], 64
-; GFX7-NEXT: v_cndmask_b32_e64 v0, v0, v8, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v1, v1, v9, s[4:5]
-; GFX7-NEXT: v_cmp_class_f64_e64 s[4:5], v[10:11], 64
-; GFX7-NEXT: v_cndmask_b32_e64 v8, v10, v2, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v9, v18, v19, s[8:9]
-; GFX7-NEXT: v_cndmask_b32_e64 v8, 0, v8, s[8:9]
-; GFX7-NEXT: v_cmp_gt_f64_e64 s[8:9], v[4:5], v[12:13]
-; GFX7-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc
-; GFX7-NEXT: v_cmp_eq_f64_e64 s[6:7], 0, v[8:9]
-; GFX7-NEXT: v_cndmask_b32_e32 v3, v9, v3, vcc
-; GFX7-NEXT: v_cmp_gt_f64_e32 vcc, v[6:7], v[14:15]
-; GFX7-NEXT: v_cndmask_b32_e64 v2, v2, v10, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v3, v3, v11, s[4:5]
-; GFX7-NEXT: v_cmp_o_f64_e64 s[4:5], v[6:7], v[14:15]
-; GFX7-NEXT: v_cndmask_b32_e64 v10, v13, v5, s[8:9]
-; GFX7-NEXT: v_cndmask_b32_e64 v0, v16, v0, s[10:11]
-; GFX7-NEXT: v_cndmask_b32_e64 v3, v9, v3, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v9, v18, v10, s[12:13]
-; GFX7-NEXT: v_cndmask_b32_e32 v10, v15, v7, vcc
-; GFX7-NEXT: v_cndmask_b32_e64 v2, v8, v2, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v8, v12, v4, s[8:9]
-; GFX7-NEXT: v_cmp_class_f64_e64 s[6:7], v[4:5], 64
-; GFX7-NEXT: v_cndmask_b32_e64 v11, v18, v10, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e32 v10, v14, v6, vcc
-; GFX7-NEXT: v_cmp_class_f64_e64 vcc, v[6:7], 64
-; GFX7-NEXT: v_cndmask_b32_e64 v8, 0, v8, s[12:13]
-; GFX7-NEXT: v_cmp_class_f64_e64 s[8:9], v[12:13], 64
-; GFX7-NEXT: v_cndmask_b32_e64 v10, 0, v10, s[4:5]
-; GFX7-NEXT: v_cmp_class_f64_e64 s[4:5], v[14:15], 64
-; GFX7-NEXT: v_cndmask_b32_e64 v1, v17, v1, s[10:11]
-; GFX7-NEXT: v_cmp_eq_f64_e64 s[10:11], 0, v[8:9]
-; GFX7-NEXT: v_cmp_eq_f64_e64 s[12:13], 0, v[10:11]
-; GFX7-NEXT: v_cndmask_b32_e64 v4, v8, v4, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e32 v6, v10, v6, vcc
-; GFX7-NEXT: v_cndmask_b32_e64 v5, v9, v5, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e32 v7, v11, v7, vcc
-; GFX7-NEXT: v_cndmask_b32_e64 v4, v4, v12, s[8:9]
-; GFX7-NEXT: v_cndmask_b32_e64 v6, v6, v14, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v5, v5, v13, s[8:9]
-; GFX7-NEXT: v_cndmask_b32_e64 v7, v7, v15, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v4, v8, v4, s[10:11]
-; GFX7-NEXT: v_cndmask_b32_e64 v5, v9, v5, s[10:11]
-; GFX7-NEXT: v_cndmask_b32_e64 v6, v10, v6, s[12:13]
-; GFX7-NEXT: v_cndmask_b32_e64 v7, v11, v7, s[12:13]
+; GFX7-NEXT: v_max_f64 v[16:17], v[0:1], v[8:9]
+; GFX7-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[8:9]
+; GFX7-NEXT: v_max_f64 v[8:9], v[2:3], v[10:11]
+; GFX7-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[10:11]
+; GFX7-NEXT: v_max_f64 v[10:11], v[4:5], v[12:13]
+; GFX7-NEXT: v_cmp_u_f64_e64 s[6:7], v[4:5], v[12:13]
+; GFX7-NEXT: v_max_f64 v[12:13], v[6:7], v[14:15]
+; GFX7-NEXT: v_cmp_u_f64_e64 s[8:9], v[6:7], v[14:15]
+; GFX7-NEXT: v_mov_b32_e32 v7, 0x7ff80000
+; GFX7-NEXT: v_cndmask_b32_e64 v0, v16, 0, vcc
+; GFX7-NEXT: v_cndmask_b32_e32 v1, v17, v7, vcc
+; GFX7-NEXT: v_cndmask_b32_e64 v2, v8, 0, s[4:5]
+; GFX7-NEXT: v_cndmask_b32_e64 v3, v9, v7, s[4:5]
+; GFX7-NEXT: v_cndmask_b32_e64 v4, v10, 0, s[6:7]
+; GFX7-NEXT: v_cndmask_b32_e64 v5, v11, v7, s[6:7]
+; GFX7-NEXT: v_cndmask_b32_e64 v6, v12, 0, s[8:9]
+; GFX7-NEXT: v_cndmask_b32_e64 v7, v13, v7, s[8:9]
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_maximum_v4f64:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_cmp_gt_f64_e32 vcc, v[0:1], v[8:9]
-; GFX8-NEXT: v_cmp_o_f64_e64 s[4:5], v[0:1], v[8:9]
-; GFX8-NEXT: v_mov_b32_e32 v18, 0x7ff80000
-; GFX8-NEXT: v_cmp_gt_f64_e64 s[6:7], v[2:3], v[10:11]
-; GFX8-NEXT: v_cmp_o_f64_e64 s[8:9], v[2:3], v[10:11]
-; GFX8-NEXT: v_cmp_o_f64_e64 s[12:13], v[4:5], v[12:13]
-; GFX8-NEXT: v_cndmask_b32_e32 v16, v9, v1, vcc
-; GFX8-NEXT: v_cndmask_b32_e64 v17, v18, v16, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e32 v16, v8, v0, vcc
-; GFX8-NEXT: v_cmp_class_f64_e64 vcc, v[0:1], 64
-; GFX8-NEXT: v_cndmask_b32_e64 v16, 0, v16, s[4:5]
-; GFX8-NEXT: v_cmp_class_f64_e64 s[4:5], v[8:9], 64
-; GFX8-NEXT: v_cndmask_b32_e64 v19, v11, v3, s[6:7]
-; GFX8-NEXT: v_cmp_eq_f64_e64 s[10:11], 0, v[16:17]
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v16, v0, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v17, v1, vcc
-; GFX8-NEXT: v_cmp_class_f64_e64 vcc, v[2:3], 64
-; GFX8-NEXT: v_cndmask_b32_e64 v0, v0, v8, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v1, v1, v9, s[4:5]
-; GFX8-NEXT: v_cmp_class_f64_e64 s[4:5], v[10:11], 64
-; GFX8-NEXT: v_cndmask_b32_e64 v8, v10, v2, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v9, v18, v19, s[8:9]
-; GFX8-NEXT: v_cndmask_b32_e64 v8, 0, v8, s[8:9]
-; GFX8-NEXT: v_cmp_gt_f64_e64 s[8:9], v[4:5], v[12:13]
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc
-; GFX8-NEXT: v_cmp_eq_f64_e64 s[6:7], 0, v[8:9]
-; GFX8-NEXT: v_cndmask_b32_e32 v3, v9, v3, vcc
-; GFX8-NEXT: v_cmp_gt_f64_e32 vcc, v[6:7], v[14:15]
-; GFX8-NEXT: v_cndmask_b32_e64 v2, v2, v10, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v3, v3, v11, s[4:5]
-; GFX8-NEXT: v_cmp_o_f64_e64 s[4:5], v[6:7], v[14:15]
-; GFX8-NEXT: v_cndmask_b32_e64 v10, v13, v5, s[8:9]
-; GFX8-NEXT: v_cndmask_b32_e64 v0, v16, v0, s[10:11]
-; GFX8-NEXT: v_cndmask_b32_e64 v3, v9, v3, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v9, v18, v10, s[12:13]
-; GFX8-NEXT: v_cndmask_b32_e32 v10, v15, v7, vcc
-; GFX8-NEXT: v_cndmask_b32_e64 v2, v8, v2, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v8, v12, v4, s[8:9]
-; GFX8-NEXT: v_cmp_class_f64_e64 s[6:7], v[4:5], 64
-; GFX8-NEXT: v_cndmask_b32_e64 v11, v18, v10, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e32 v10, v14, v6, vcc
-; GFX8-NEXT: v_cmp_class_f64_e64 vcc, v[6:7], 64
-; GFX8-NEXT: v_cndmask_b32_e64 v8, 0, v8, s[12:13]
-; GFX8-NEXT: v_cmp_class_f64_e64 s[8:9], v[12:13], 64
-; GFX8-NEXT: v_cndmask_b32_e64 v10, 0, v10, s[4:5]
-; GFX8-NEXT: v_cmp_class_f64_e64 s[4:5], v[14:15], 64
-; GFX8-NEXT: v_cndmask_b32_e64 v1, v17, v1, s[10:11]
-; GFX8-NEXT: v_cmp_eq_f64_e64 s[10:11], 0, v[8:9]
-; GFX8-NEXT: v_cmp_eq_f64_e64 s[12:13], 0, v[10:11]
-; GFX8-NEXT: v_cndmask_b32_e64 v4, v8, v4, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e32 v6, v10, v6, vcc
-; GFX8-NEXT: v_cndmask_b32_e64 v5, v9, v5, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e32 v7, v11, v7, vcc
-; GFX8-NEXT: v_cndmask_b32_e64 v4, v4, v12, s[8:9]
-; GFX8-NEXT: v_cndmask_b32_e64 v6, v6, v14, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v5, v5, v13, s[8:9]
-; GFX8-NEXT: v_cndmask_b32_e64 v7, v7, v15, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v4, v8, v4, s[10:11]
-; GFX8-NEXT: v_cndmask_b32_e64 v5, v9, v5, s[10:11]
-; GFX8-NEXT: v_cndmask_b32_e64 v6, v10, v6, s[12:13]
-; GFX8-NEXT: v_cndmask_b32_e64 v7, v11, v7, s[12:13]
+; GFX8-NEXT: v_max_f64 v[16:17], v[0:1], v[8:9]
+; GFX8-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[8:9]
+; GFX8-NEXT: v_max_f64 v[8:9], v[2:3], v[10:11]
+; GFX8-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[10:11]
+; GFX8-NEXT: v_max_f64 v[10:11], v[4:5], v[12:13]
+; GFX8-NEXT: v_cmp_u_f64_e64 s[6:7], v[4:5], v[12:13]
+; GFX8-NEXT: v_max_f64 v[12:13], v[6:7], v[14:15]
+; GFX8-NEXT: v_cmp_u_f64_e64 s[8:9], v[6:7], v[14:15]
+; GFX8-NEXT: v_mov_b32_e32 v7, 0x7ff80000
+; GFX8-NEXT: v_cndmask_b32_e64 v0, v16, 0, vcc
+; GFX8-NEXT: v_cndmask_b32_e32 v1, v17, v7, vcc
+; GFX8-NEXT: v_cndmask_b32_e64 v2, v8, 0, s[4:5]
+; GFX8-NEXT: v_cndmask_b32_e64 v3, v9, v7, s[4:5]
+; GFX8-NEXT: v_cndmask_b32_e64 v4, v10, 0, s[6:7]
+; GFX8-NEXT: v_cndmask_b32_e64 v5, v11, v7, s[6:7]
+; GFX8-NEXT: v_cndmask_b32_e64 v6, v12, 0, s[8:9]
+; GFX8-NEXT: v_cndmask_b32_e64 v7, v13, v7, s[8:9]
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_maximum_v4f64:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_cmp_gt_f64_e32 vcc, v[0:1], v[8:9]
-; GFX9-NEXT: v_cmp_o_f64_e64 s[4:5], v[0:1], v[8:9]
-; GFX9-NEXT: v_mov_b32_e32 v18, 0x7ff80000
-; GFX9-NEXT: v_cmp_gt_f64_e64 s[6:7], v[2:3], v[10:11]
-; GFX9-NEXT: v_cmp_o_f64_e64 s[8:9], v[2:3], v[10:11]
-; GFX9-NEXT: v_cmp_o_f64_e64 s[12:13], v[4:5], v[12:13]
-; GFX9-NEXT: v_cndmask_b32_e32 v16, v9, v1, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v17, v18, v16, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e32 v16, v8, v0, vcc
-; GFX9-NEXT: v_cmp_class_f64_e64 vcc, v[0:1], 64
-; GFX9-NEXT: v_cndmask_b32_e64 v16, 0, v16, s[4:5]
-; GFX9-NEXT: v_cmp_class_f64_e64 s[4:5], v[8:9], 64
-; GFX9-NEXT: v_cndmask_b32_e64 v19, v11, v3, s[6:7]
-; GFX9-NEXT: v_cmp_eq_f64_e64 s[10:11], 0, v[16:17]
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v16, v0, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v17, v1, vcc
-; GFX9-NEXT: v_cmp_class_f64_e64 vcc, v[2:3], 64
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v0, v8, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v1, v1, v9, s[4:5]
-; GFX9-NEXT: v_cmp_class_f64_e64 s[4:5], v[10:11], 64
-; GFX9-NEXT: v_cndmask_b32_e64 v8, v10, v2, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v9, v18, v19, s[8:9]
-; GFX9-NEXT: v_cndmask_b32_e64 v8, 0, v8, s[8:9]
-; GFX9-NEXT: v_cmp_gt_f64_e64 s[8:9], v[4:5], v[12:13]
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc
-; GFX9-NEXT: v_cmp_eq_f64_e64 s[6:7], 0, v[8:9]
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v9, v3, vcc
-; GFX9-NEXT: v_cmp_gt_f64_e32 vcc, v[6:7], v[14:15]
-; GFX9-NEXT: v_cndmask_b32_e64 v2, v2, v10, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v3, v3, v11, s[4:5]
-; GFX9-NEXT: v_cmp_o_f64_e64 s[4:5], v[6:7], v[14:15]
-; GFX9-NEXT: v_cndmask_b32_e64 v10, v13, v5, s[8:9]
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v16, v0, s[10:11]
-; GFX9-NEXT: v_cndmask_b32_e64 v3, v9, v3, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v9, v18, v10, s[12:13]
-; GFX9-NEXT: v_cndmask_b32_e32 v10, v15, v7, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v2, v8, v2, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v8, v12, v4, s[8:9]
-; GFX9-NEXT: v_cmp_class_f64_e64 s[6:7], v[4:5], 64
-; GFX9-NEXT: v_cndmask_b32_e64 v11, v18, v10, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e32 v10, v14, v6, vcc
-; GFX9-NEXT: v_cmp_class_f64_e64 vcc, v[6:7], 64
-; GFX9-NEXT: v_cndmask_b32_e64 v8, 0, v8, s[12:13]
-; GFX9-NEXT: v_cmp_class_f64_e64 s[8:9], v[12:13], 64
-; GFX9-NEXT: v_cndmask_b32_e64 v10, 0, v10, s[4:5]
-; GFX9-NEXT: v_cmp_class_f64_e64 s[4:5], v[14:15], 64
-; GFX9-NEXT: v_cndmask_b32_e64 v1, v17, v1, s[10:11]
-; GFX9-NEXT: v_cmp_eq_f64_e64 s[10:11], 0, v[8:9]
-; GFX9-NEXT: v_cmp_eq_f64_e64 s[12:13], 0, v[10:11]
-; GFX9-NEXT: v_cndmask_b32_e64 v4, v8, v4, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e32 v6, v10, v6, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v5, v9, v5, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e32 v7, v11, v7, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v4, v4, v12, s[8:9]
-; GFX9-NEXT: v_cndmask_b32_e64 v6, v6, v14, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v5, v5, v13, s[8:9]
-; GFX9-NEXT: v_cndmask_b32_e64 v7, v7, v15, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v4, v8, v4, s[10:11]
-; GFX9-NEXT: v_cndmask_b32_e64 v5, v9, v5, s[10:11]
-; GFX9-NEXT: v_cndmask_b32_e64 v6, v10, v6, s[12:13]
-; GFX9-NEXT: v_cndmask_b32_e64 v7, v11, v7, s[12:13]
+; GFX9-NEXT: v_max_f64 v[16:17], v[0:1], v[8:9]
+; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[8:9]
+; GFX9-NEXT: v_max_f64 v[8:9], v[2:3], v[10:11]
+; GFX9-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[10:11]
+; GFX9-NEXT: v_max_f64 v[10:11], v[4:5], v[12:13]
+; GFX9-NEXT: v_cmp_u_f64_e64 s[6:7], v[4:5], v[12:13]
+; GFX9-NEXT: v_max_f64 v[12:13], v[6:7], v[14:15]
+; GFX9-NEXT: v_cmp_u_f64_e64 s[8:9], v[6:7], v[14:15]
+; GFX9-NEXT: v_mov_b32_e32 v7, 0x7ff80000
+; GFX9-NEXT: v_cndmask_b32_e64 v0, v16, 0, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v17, v7, vcc
+; GFX9-NEXT: v_cndmask_b32_e64 v2, v8, 0, s[4:5]
+; GFX9-NEXT: v_cndmask_b32_e64 v3, v9, v7, s[4:5]
+; GFX9-NEXT: v_cndmask_b32_e64 v4, v10, 0, s[6:7]
+; GFX9-NEXT: v_cndmask_b32_e64 v5, v11, v7, s[6:7]
+; GFX9-NEXT: v_cndmask_b32_e64 v6, v12, 0, s[8:9]
+; GFX9-NEXT: v_cndmask_b32_e64 v7, v13, v7, s[8:9]
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX940-LABEL: v_maximum_v4f64:
; GFX940: ; %bb.0:
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_cmp_gt_f64_e32 vcc, v[0:1], v[8:9]
-; GFX940-NEXT: v_mov_b32_e32 v18, 0x7ff80000
-; GFX940-NEXT: v_cmp_o_f64_e64 s[0:1], v[0:1], v[8:9]
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v9, v1, vcc
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e64 v17, v18, v16, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v8, v0, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v16, 0, v16, s[0:1]
-; GFX940-NEXT: v_cmp_class_f64_e64 vcc, v[0:1], 64
-; GFX940-NEXT: v_cmp_class_f64_e64 s[0:1], v[8:9], 64
-; GFX940-NEXT: v_cmp_eq_f64_e64 s[2:3], 0, v[16:17]
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v16, v0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v17, v1, vcc
-; GFX940-NEXT: v_cmp_gt_f64_e32 vcc, v[2:3], v[10:11]
-; GFX940-NEXT: v_cndmask_b32_e64 v0, v0, v8, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v1, v1, v9, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v8, v11, v3, vcc
-; GFX940-NEXT: v_cmp_o_f64_e64 s[0:1], v[2:3], v[10:11]
-; GFX940-NEXT: v_cndmask_b32_e64 v0, v16, v0, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v1, v17, v1, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v9, v18, v8, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v8, v10, v2, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v8, 0, v8, s[0:1]
-; GFX940-NEXT: v_cmp_class_f64_e64 vcc, v[2:3], 64
-; GFX940-NEXT: v_cmp_class_f64_e64 s[0:1], v[10:11], 64
-; GFX940-NEXT: v_cmp_eq_f64_e64 s[2:3], 0, v[8:9]
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v2, v2, v10, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v9, v3, vcc
-; GFX940-NEXT: v_cmp_gt_f64_e32 vcc, v[4:5], v[12:13]
-; GFX940-NEXT: v_cndmask_b32_e64 v2, v8, v2, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v3, v3, v11, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v8, v13, v5, vcc
-; GFX940-NEXT: v_cmp_o_f64_e64 s[0:1], v[4:5], v[12:13]
-; GFX940-NEXT: v_cndmask_b32_e64 v3, v9, v3, s[2:3]
+; GFX940-NEXT: v_max_f64 v[16:17], v[0:1], v[8:9]
+; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[8:9]
+; GFX940-NEXT: v_max_f64 v[8:9], v[2:3], v[10:11]
; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e64 v9, v18, v8, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v8, v12, v4, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v8, 0, v8, s[0:1]
-; GFX940-NEXT: v_cmp_class_f64_e64 vcc, v[4:5], 64
-; GFX940-NEXT: v_cmp_class_f64_e64 s[0:1], v[12:13], 64
-; GFX940-NEXT: v_cmp_eq_f64_e64 s[2:3], 0, v[8:9]
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v8, v4, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v4, v4, v12, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v5, v9, v5, vcc
-; GFX940-NEXT: v_cmp_gt_f64_e32 vcc, v[6:7], v[14:15]
-; GFX940-NEXT: v_cndmask_b32_e64 v4, v8, v4, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v5, v5, v13, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v8, v15, v7, vcc
-; GFX940-NEXT: v_cmp_o_f64_e64 s[0:1], v[6:7], v[14:15]
-; GFX940-NEXT: v_cndmask_b32_e64 v5, v9, v5, s[2:3]
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e64 v9, v18, v8, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v8, v14, v6, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v8, 0, v8, s[0:1]
-; GFX940-NEXT: v_cmp_class_f64_e64 vcc, v[6:7], 64
-; GFX940-NEXT: v_cmp_class_f64_e64 s[0:1], v[14:15], 64
-; GFX940-NEXT: v_cmp_eq_f64_e64 s[2:3], 0, v[8:9]
-; GFX940-NEXT: v_cndmask_b32_e32 v6, v8, v6, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v7, v9, v7, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v6, v6, v14, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v7, v7, v15, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v6, v8, v6, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v7, v9, v7, s[2:3]
+; GFX940-NEXT: v_cndmask_b32_e64 v0, v16, 0, vcc
+; GFX940-NEXT: v_mov_b32_e32 v16, 0x7ff80000
+; GFX940-NEXT: v_cndmask_b32_e32 v1, v17, v16, vcc
+; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[10:11]
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e64 v2, v8, 0, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v3, v9, v16, vcc
+; GFX940-NEXT: v_max_f64 v[8:9], v[4:5], v[12:13]
+; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[4:5], v[12:13]
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e64 v4, v8, 0, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v5, v9, v16, vcc
+; GFX940-NEXT: v_max_f64 v[8:9], v[6:7], v[14:15]
+; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[6:7], v[14:15]
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e64 v6, v8, 0, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v7, v9, v16, vcc
; GFX940-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximum_v4f64:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_cmp_gt_f64_e32 vcc_lo, v[0:1], v[8:9]
-; GFX10-NEXT: v_cmp_gt_f64_e64 s4, v[2:3], v[10:11]
-; GFX10-NEXT: v_cmp_o_f64_e64 s5, v[0:1], v[8:9]
-; GFX10-NEXT: v_cmp_gt_f64_e64 s6, v[4:5], v[12:13]
-; GFX10-NEXT: v_cmp_o_f64_e64 s7, v[2:3], v[10:11]
-; GFX10-NEXT: v_cmp_gt_f64_e64 s8, v[6:7], v[14:15]
-; GFX10-NEXT: v_cmp_o_f64_e64 s9, v[4:5], v[12:13]
-; GFX10-NEXT: v_cmp_o_f64_e64 s10, v[6:7], v[14:15]
-; GFX10-NEXT: v_cmp_class_f64_e64 s11, v[14:15], 64
-; GFX10-NEXT: v_cndmask_b32_e32 v16, v9, v1, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v18, v11, v3, s4
-; GFX10-NEXT: v_cndmask_b32_e32 v19, v8, v0, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v21, v10, v2, s4
-; GFX10-NEXT: v_cndmask_b32_e64 v20, v13, v5, s6
-; GFX10-NEXT: v_cndmask_b32_e64 v22, v15, v7, s8
-; GFX10-NEXT: v_cndmask_b32_e64 v17, 0x7ff80000, v16, s5
-; GFX10-NEXT: v_cndmask_b32_e64 v16, 0, v19, s5
-; GFX10-NEXT: v_cndmask_b32_e64 v19, 0x7ff80000, v18, s7
-; GFX10-NEXT: v_cndmask_b32_e64 v23, v12, v4, s6
-; GFX10-NEXT: v_cndmask_b32_e64 v18, 0, v21, s7
-; GFX10-NEXT: v_cmp_class_f64_e64 vcc_lo, v[0:1], 64
-; GFX10-NEXT: v_cndmask_b32_e64 v24, v14, v6, s8
-; GFX10-NEXT: v_cmp_class_f64_e64 s4, v[2:3], 64
-; GFX10-NEXT: v_cmp_class_f64_e64 s7, v[4:5], 64
-; GFX10-NEXT: v_cmp_class_f64_e64 s8, v[6:7], 64
-; GFX10-NEXT: v_cmp_class_f64_e64 s5, v[8:9], 64
-; GFX10-NEXT: v_cndmask_b32_e64 v21, 0x7ff80000, v20, s9
-; GFX10-NEXT: v_cndmask_b32_e64 v20, 0, v23, s9
-; GFX10-NEXT: v_cndmask_b32_e64 v23, 0x7ff80000, v22, s10
-; GFX10-NEXT: v_cndmask_b32_e64 v22, 0, v24, s10
-; GFX10-NEXT: v_cmp_class_f64_e64 s9, v[10:11], 64
-; GFX10-NEXT: v_cmp_class_f64_e64 s10, v[12:13], 64
-; GFX10-NEXT: v_cmp_eq_f64_e64 s6, 0, v[16:17]
-; GFX10-NEXT: v_cmp_eq_f64_e64 s12, 0, v[18:19]
-; GFX10-NEXT: v_cmp_eq_f64_e64 s13, 0, v[20:21]
-; GFX10-NEXT: v_cmp_eq_f64_e64 s14, 0, v[22:23]
-; GFX10-NEXT: v_cndmask_b32_e32 v0, v16, v0, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e32 v1, v17, v1, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v2, v18, v2, s4
-; GFX10-NEXT: v_cndmask_b32_e64 v4, v20, v4, s7
-; GFX10-NEXT: v_cndmask_b32_e64 v6, v22, v6, s8
-; GFX10-NEXT: v_cndmask_b32_e64 v3, v19, v3, s4
-; GFX10-NEXT: v_cndmask_b32_e64 v5, v21, v5, s7
-; GFX10-NEXT: v_cndmask_b32_e64 v7, v23, v7, s8
-; GFX10-NEXT: v_cndmask_b32_e64 v0, v0, v8, s5
-; GFX10-NEXT: v_cndmask_b32_e64 v6, v6, v14, s11
-; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, v10, s9
-; GFX10-NEXT: v_cndmask_b32_e64 v4, v4, v12, s10
-; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, v9, s5
-; GFX10-NEXT: v_cndmask_b32_e64 v3, v3, v11, s9
-; GFX10-NEXT: v_cndmask_b32_e64 v5, v5, v13, s10
-; GFX10-NEXT: v_cndmask_b32_e64 v7, v7, v15, s11
-; GFX10-NEXT: v_cndmask_b32_e64 v0, v16, v0, s6
-; GFX10-NEXT: v_cndmask_b32_e64 v1, v17, v1, s6
-; GFX10-NEXT: v_cndmask_b32_e64 v2, v18, v2, s12
-; GFX10-NEXT: v_cndmask_b32_e64 v3, v19, v3, s12
-; GFX10-NEXT: v_cndmask_b32_e64 v4, v20, v4, s13
-; GFX10-NEXT: v_cndmask_b32_e64 v5, v21, v5, s13
-; GFX10-NEXT: v_cndmask_b32_e64 v6, v22, v6, s14
-; GFX10-NEXT: v_cndmask_b32_e64 v7, v23, v7, s14
+; GFX10-NEXT: v_max_f64 v[16:17], v[0:1], v[8:9]
+; GFX10-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[8:9]
+; GFX10-NEXT: v_max_f64 v[8:9], v[2:3], v[10:11]
+; GFX10-NEXT: v_cmp_u_f64_e64 s4, v[2:3], v[10:11]
+; GFX10-NEXT: v_max_f64 v[10:11], v[4:5], v[12:13]
+; GFX10-NEXT: v_cmp_u_f64_e64 s5, v[4:5], v[12:13]
+; GFX10-NEXT: v_max_f64 v[12:13], v[6:7], v[14:15]
+; GFX10-NEXT: v_cmp_u_f64_e64 s6, v[6:7], v[14:15]
+; GFX10-NEXT: v_cndmask_b32_e64 v0, v16, 0, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e64 v1, v17, 0x7ff80000, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e64 v2, v8, 0, s4
+; GFX10-NEXT: v_cndmask_b32_e64 v3, v9, 0x7ff80000, s4
+; GFX10-NEXT: v_cndmask_b32_e64 v4, v10, 0, s5
+; GFX10-NEXT: v_cndmask_b32_e64 v5, v11, 0x7ff80000, s5
+; GFX10-NEXT: v_cndmask_b32_e64 v6, v12, 0, s6
+; GFX10-NEXT: v_cndmask_b32_e64 v7, v13, 0x7ff80000, s6
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_maximum_v4f64:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_cmp_gt_f64_e32 vcc_lo, v[0:1], v[8:9]
-; GFX11-NEXT: v_cmp_o_f64_e64 s0, v[0:1], v[8:9]
-; GFX11-NEXT: v_cmp_gt_f64_e64 s1, v[2:3], v[10:11]
-; GFX11-NEXT: v_cmp_gt_f64_e64 s2, v[4:5], v[12:13]
-; GFX11-NEXT: v_cmp_gt_f64_e64 s3, v[6:7], v[14:15]
-; GFX11-NEXT: v_cmp_o_f64_e64 s4, v[2:3], v[10:11]
-; GFX11-NEXT: v_cmp_o_f64_e64 s5, v[4:5], v[12:13]
-; GFX11-NEXT: v_cmp_o_f64_e64 s6, v[6:7], v[14:15]
-; GFX11-NEXT: v_cndmask_b32_e32 v16, v9, v1, vcc_lo
-; GFX11-NEXT: v_cndmask_b32_e64 v18, v11, v3, s1
-; GFX11-NEXT: v_cndmask_b32_e64 v20, v13, v5, s2
-; GFX11-NEXT: v_cndmask_b32_e64 v22, v15, v7, s3
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4)
-; GFX11-NEXT: v_cndmask_b32_e64 v17, 0x7ff80000, v16, s0
-; GFX11-NEXT: v_cndmask_b32_e32 v16, v8, v0, vcc_lo
-; GFX11-NEXT: v_cmp_class_f64_e64 vcc_lo, v[0:1], 64
-; GFX11-NEXT: v_cndmask_b32_e64 v19, 0x7ff80000, v18, s4
-; GFX11-NEXT: v_cndmask_b32_e64 v21, 0x7ff80000, v20, s5
-; GFX11-NEXT: v_cndmask_b32_e64 v18, v10, v2, s1
-; GFX11-NEXT: v_cndmask_b32_e64 v20, v12, v4, s2
-; GFX11-NEXT: v_cndmask_b32_e64 v24, v14, v6, s3
-; GFX11-NEXT: v_cmp_class_f64_e64 s1, v[4:5], 64
-; GFX11-NEXT: v_cmp_class_f64_e64 s3, v[6:7], 64
-; GFX11-NEXT: v_cndmask_b32_e64 v23, 0x7ff80000, v22, s6
-; GFX11-NEXT: v_cndmask_b32_e64 v18, 0, v18, s4
-; GFX11-NEXT: v_cndmask_b32_e64 v20, 0, v20, s5
-; GFX11-NEXT: v_cndmask_b32_e64 v22, 0, v24, s6
-; GFX11-NEXT: v_cmp_class_f64_e64 s2, v[12:13], 64
-; GFX11-NEXT: v_cmp_class_f64_e64 s4, v[14:15], 64
-; GFX11-NEXT: v_cmp_class_f64_e64 s5, v[8:9], 64
-; GFX11-NEXT: v_cmp_class_f64_e64 s6, v[10:11], 64
-; GFX11-NEXT: v_cmp_eq_f64_e64 s8, 0, v[18:19]
-; GFX11-NEXT: v_cmp_eq_f64_e64 s9, 0, v[20:21]
-; GFX11-NEXT: v_cmp_eq_f64_e64 s10, 0, v[22:23]
-; GFX11-NEXT: v_cndmask_b32_e32 v1, v17, v1, vcc_lo
-; GFX11-NEXT: v_cndmask_b32_e64 v16, 0, v16, s0
-; GFX11-NEXT: v_cmp_class_f64_e64 s0, v[2:3], 64
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX11-NEXT: v_cmp_eq_f64_e64 s7, 0, v[16:17]
-; GFX11-NEXT: v_cndmask_b32_e32 v0, v16, v0, vcc_lo
-; GFX11-NEXT: v_cndmask_b32_e64 v4, v20, v4, s1
-; GFX11-NEXT: v_cndmask_b32_e64 v6, v22, v6, s3
-; GFX11-NEXT: v_cndmask_b32_e64 v5, v21, v5, s1
-; GFX11-NEXT: v_cndmask_b32_e64 v7, v23, v7, s3
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4)
-; GFX11-NEXT: v_cndmask_b32_e64 v4, v4, v12, s2
-; GFX11-NEXT: v_cndmask_b32_e64 v0, v0, v8, s5
-; GFX11-NEXT: v_cndmask_b32_e64 v6, v6, v14, s4
-; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, v9, s5
-; GFX11-NEXT: v_cndmask_b32_e64 v5, v5, v13, s2
-; GFX11-NEXT: v_cndmask_b32_e64 v7, v7, v15, s4
-; GFX11-NEXT: v_cndmask_b32_e64 v4, v20, v4, s9
-; GFX11-NEXT: v_cndmask_b32_e64 v6, v22, v6, s10
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
-; GFX11-NEXT: v_cndmask_b32_e64 v5, v21, v5, s9
-; GFX11-NEXT: v_cndmask_b32_e64 v7, v23, v7, s10
-; GFX11-NEXT: v_cndmask_b32_e64 v2, v18, v2, s0
-; GFX11-NEXT: v_cndmask_b32_e64 v3, v19, v3, s0
-; GFX11-NEXT: v_cndmask_b32_e64 v0, v16, v0, s7
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, v10, s6
-; GFX11-NEXT: v_cndmask_b32_e64 v3, v3, v11, s6
-; GFX11-NEXT: v_cndmask_b32_e64 v1, v17, v1, s7
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NEXT: v_cndmask_b32_e64 v2, v18, v2, s8
-; GFX11-NEXT: v_cndmask_b32_e64 v3, v19, v3, s8
+; GFX11-NEXT: v_max_f64 v[16:17], v[0:1], v[8:9]
+; GFX11-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[8:9]
+; GFX11-NEXT: v_max_f64 v[8:9], v[2:3], v[10:11]
+; GFX11-NEXT: v_cmp_u_f64_e64 s0, v[2:3], v[10:11]
+; GFX11-NEXT: v_max_f64 v[10:11], v[4:5], v[12:13]
+; GFX11-NEXT: v_cmp_u_f64_e64 s1, v[4:5], v[12:13]
+; GFX11-NEXT: v_max_f64 v[12:13], v[6:7], v[14:15]
+; GFX11-NEXT: v_cmp_u_f64_e64 s2, v[6:7], v[14:15]
+; GFX11-NEXT: v_cndmask_b32_e64 v0, v16, 0, vcc_lo
+; GFX11-NEXT: v_cndmask_b32_e64 v1, v17, 0x7ff80000, vcc_lo
+; GFX11-NEXT: v_cndmask_b32_e64 v2, v8, 0, s0
+; GFX11-NEXT: v_cndmask_b32_e64 v3, v9, 0x7ff80000, s0
+; GFX11-NEXT: v_cndmask_b32_e64 v4, v10, 0, s1
+; GFX11-NEXT: v_cndmask_b32_e64 v5, v11, 0x7ff80000, s1
+; GFX11-NEXT: v_cndmask_b32_e64 v6, v12, 0, s2
+; GFX11-NEXT: v_cndmask_b32_e64 v7, v13, 0x7ff80000, s2
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_maximum_v4f64:
@@ -2819,320 +1508,55 @@ define <4 x double> @v_maximum_v4f64__nnan(<4 x double> %src0, <4 x double> %src
; GFX7-LABEL: v_maximum_v4f64__nnan:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_cmp_gt_f64_e32 vcc, v[0:1], v[8:9]
-; GFX7-NEXT: v_cmp_class_f64_e64 s[4:5], v[0:1], 64
-; GFX7-NEXT: v_cmp_class_f64_e64 s[6:7], v[8:9], 64
-; GFX7-NEXT: v_cmp_class_f64_e64 s[10:11], v[10:11], 64
-; GFX7-NEXT: v_cndmask_b32_e32 v17, v9, v1, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v16, v8, v0, vcc
-; GFX7-NEXT: v_cmp_gt_f64_e32 vcc, v[2:3], v[10:11]
-; GFX7-NEXT: v_cndmask_b32_e64 v0, v16, v0, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v1, v17, v1, s[4:5]
-; GFX7-NEXT: v_cmp_gt_f64_e64 s[4:5], v[4:5], v[12:13]
-; GFX7-NEXT: v_cmp_eq_f64_e64 s[8:9], 0, v[16:17]
-; GFX7-NEXT: v_cndmask_b32_e64 v0, v0, v8, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v1, v1, v9, s[6:7]
-; GFX7-NEXT: v_cmp_class_f64_e64 s[6:7], v[12:13], 64
-; GFX7-NEXT: v_cndmask_b32_e32 v19, v11, v3, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v18, v10, v2, vcc
-; GFX7-NEXT: v_cmp_class_f64_e64 vcc, v[2:3], 64
-; GFX7-NEXT: v_cmp_eq_f64_e64 s[12:13], 0, v[18:19]
-; GFX7-NEXT: v_cndmask_b32_e64 v9, v13, v5, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v8, v12, v4, s[4:5]
-; GFX7-NEXT: v_cmp_class_f64_e64 s[4:5], v[4:5], 64
-; GFX7-NEXT: v_cndmask_b32_e64 v0, v16, v0, s[8:9]
-; GFX7-NEXT: v_cndmask_b32_e64 v1, v17, v1, s[8:9]
-; GFX7-NEXT: v_cmp_eq_f64_e64 s[8:9], 0, v[8:9]
-; GFX7-NEXT: v_cndmask_b32_e32 v2, v18, v2, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v3, v19, v3, vcc
-; GFX7-NEXT: v_cmp_gt_f64_e32 vcc, v[6:7], v[14:15]
-; GFX7-NEXT: v_cndmask_b32_e64 v2, v2, v10, s[10:11]
-; GFX7-NEXT: v_cndmask_b32_e64 v3, v3, v11, s[10:11]
-; GFX7-NEXT: v_cmp_class_f64_e64 s[10:11], v[14:15], 64
-; GFX7-NEXT: v_cndmask_b32_e64 v2, v18, v2, s[12:13]
-; GFX7-NEXT: v_cndmask_b32_e64 v3, v19, v3, s[12:13]
-; GFX7-NEXT: v_cndmask_b32_e64 v4, v8, v4, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v5, v9, v5, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e32 v11, v15, v7, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v10, v14, v6, vcc
-; GFX7-NEXT: v_cmp_class_f64_e64 vcc, v[6:7], 64
-; GFX7-NEXT: v_cmp_eq_f64_e64 s[12:13], 0, v[10:11]
-; GFX7-NEXT: v_cndmask_b32_e64 v4, v4, v12, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v5, v5, v13, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v4, v8, v4, s[8:9]
-; GFX7-NEXT: v_cndmask_b32_e64 v5, v9, v5, s[8:9]
-; GFX7-NEXT: v_cndmask_b32_e32 v6, v10, v6, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v7, v11, v7, vcc
-; GFX7-NEXT: v_cndmask_b32_e64 v6, v6, v14, s[10:11]
-; GFX7-NEXT: v_cndmask_b32_e64 v7, v7, v15, s[10:11]
-; GFX7-NEXT: v_cndmask_b32_e64 v6, v10, v6, s[12:13]
-; GFX7-NEXT: v_cndmask_b32_e64 v7, v11, v7, s[12:13]
+; GFX7-NEXT: v_max_f64 v[0:1], v[0:1], v[8:9]
+; GFX7-NEXT: v_max_f64 v[2:3], v[2:3], v[10:11]
+; GFX7-NEXT: v_max_f64 v[4:5], v[4:5], v[12:13]
+; GFX7-NEXT: v_max_f64 v[6:7], v[6:7], v[14:15]
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_maximum_v4f64__nnan:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_cmp_gt_f64_e32 vcc, v[0:1], v[8:9]
-; GFX8-NEXT: v_cmp_class_f64_e64 s[4:5], v[0:1], 64
-; GFX8-NEXT: v_cmp_class_f64_e64 s[6:7], v[8:9], 64
-; GFX8-NEXT: v_cmp_class_f64_e64 s[10:11], v[10:11], 64
-; GFX8-NEXT: v_cndmask_b32_e32 v17, v9, v1, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v16, v8, v0, vcc
-; GFX8-NEXT: v_cmp_gt_f64_e32 vcc, v[2:3], v[10:11]
-; GFX8-NEXT: v_cndmask_b32_e64 v0, v16, v0, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v1, v17, v1, s[4:5]
-; GFX8-NEXT: v_cmp_gt_f64_e64 s[4:5], v[4:5], v[12:13]
-; GFX8-NEXT: v_cmp_eq_f64_e64 s[8:9], 0, v[16:17]
-; GFX8-NEXT: v_cndmask_b32_e64 v0, v0, v8, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v1, v1, v9, s[6:7]
-; GFX8-NEXT: v_cmp_class_f64_e64 s[6:7], v[12:13], 64
-; GFX8-NEXT: v_cndmask_b32_e32 v19, v11, v3, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v18, v10, v2, vcc
-; GFX8-NEXT: v_cmp_class_f64_e64 vcc, v[2:3], 64
-; GFX8-NEXT: v_cmp_eq_f64_e64 s[12:13], 0, v[18:19]
-; GFX8-NEXT: v_cndmask_b32_e64 v9, v13, v5, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v8, v12, v4, s[4:5]
-; GFX8-NEXT: v_cmp_class_f64_e64 s[4:5], v[4:5], 64
-; GFX8-NEXT: v_cndmask_b32_e64 v0, v16, v0, s[8:9]
-; GFX8-NEXT: v_cndmask_b32_e64 v1, v17, v1, s[8:9]
-; GFX8-NEXT: v_cmp_eq_f64_e64 s[8:9], 0, v[8:9]
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v18, v2, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v3, v19, v3, vcc
-; GFX8-NEXT: v_cmp_gt_f64_e32 vcc, v[6:7], v[14:15]
-; GFX8-NEXT: v_cndmask_b32_e64 v2, v2, v10, s[10:11]
-; GFX8-NEXT: v_cndmask_b32_e64 v3, v3, v11, s[10:11]
-; GFX8-NEXT: v_cmp_class_f64_e64 s[10:11], v[14:15], 64
-; GFX8-NEXT: v_cndmask_b32_e64 v2, v18, v2, s[12:13]
-; GFX8-NEXT: v_cndmask_b32_e64 v3, v19, v3, s[12:13]
-; GFX8-NEXT: v_cndmask_b32_e64 v4, v8, v4, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v5, v9, v5, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e32 v11, v15, v7, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v10, v14, v6, vcc
-; GFX8-NEXT: v_cmp_class_f64_e64 vcc, v[6:7], 64
-; GFX8-NEXT: v_cmp_eq_f64_e64 s[12:13], 0, v[10:11]
-; GFX8-NEXT: v_cndmask_b32_e64 v4, v4, v12, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v5, v5, v13, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v4, v8, v4, s[8:9]
-; GFX8-NEXT: v_cndmask_b32_e64 v5, v9, v5, s[8:9]
-; GFX8-NEXT: v_cndmask_b32_e32 v6, v10, v6, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v7, v11, v7, vcc
-; GFX8-NEXT: v_cndmask_b32_e64 v6, v6, v14, s[10:11]
-; GFX8-NEXT: v_cndmask_b32_e64 v7, v7, v15, s[10:11]
-; GFX8-NEXT: v_cndmask_b32_e64 v6, v10, v6, s[12:13]
-; GFX8-NEXT: v_cndmask_b32_e64 v7, v11, v7, s[12:13]
+; GFX8-NEXT: v_max_f64 v[0:1], v[0:1], v[8:9]
+; GFX8-NEXT: v_max_f64 v[2:3], v[2:3], v[10:11]
+; GFX8-NEXT: v_max_f64 v[4:5], v[4:5], v[12:13]
+; GFX8-NEXT: v_max_f64 v[6:7], v[6:7], v[14:15]
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_maximum_v4f64__nnan:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_cmp_gt_f64_e32 vcc, v[0:1], v[8:9]
-; GFX9-NEXT: v_cmp_class_f64_e64 s[4:5], v[0:1], 64
-; GFX9-NEXT: v_cmp_class_f64_e64 s[6:7], v[8:9], 64
-; GFX9-NEXT: v_cmp_class_f64_e64 s[10:11], v[10:11], 64
-; GFX9-NEXT: v_cndmask_b32_e32 v17, v9, v1, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v16, v8, v0, vcc
-; GFX9-NEXT: v_cmp_gt_f64_e32 vcc, v[2:3], v[10:11]
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v16, v0, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v1, v17, v1, s[4:5]
-; GFX9-NEXT: v_cmp_gt_f64_e64 s[4:5], v[4:5], v[12:13]
-; GFX9-NEXT: v_cmp_eq_f64_e64 s[8:9], 0, v[16:17]
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v0, v8, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v1, v1, v9, s[6:7]
-; GFX9-NEXT: v_cmp_class_f64_e64 s[6:7], v[12:13], 64
-; GFX9-NEXT: v_cndmask_b32_e32 v19, v11, v3, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v18, v10, v2, vcc
-; GFX9-NEXT: v_cmp_class_f64_e64 vcc, v[2:3], 64
-; GFX9-NEXT: v_cmp_eq_f64_e64 s[12:13], 0, v[18:19]
-; GFX9-NEXT: v_cndmask_b32_e64 v9, v13, v5, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v8, v12, v4, s[4:5]
-; GFX9-NEXT: v_cmp_class_f64_e64 s[4:5], v[4:5], 64
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v16, v0, s[8:9]
-; GFX9-NEXT: v_cndmask_b32_e64 v1, v17, v1, s[8:9]
-; GFX9-NEXT: v_cmp_eq_f64_e64 s[8:9], 0, v[8:9]
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v18, v2, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v19, v3, vcc
-; GFX9-NEXT: v_cmp_gt_f64_e32 vcc, v[6:7], v[14:15]
-; GFX9-NEXT: v_cndmask_b32_e64 v2, v2, v10, s[10:11]
-; GFX9-NEXT: v_cndmask_b32_e64 v3, v3, v11, s[10:11]
-; GFX9-NEXT: v_cmp_class_f64_e64 s[10:11], v[14:15], 64
-; GFX9-NEXT: v_cndmask_b32_e64 v2, v18, v2, s[12:13]
-; GFX9-NEXT: v_cndmask_b32_e64 v3, v19, v3, s[12:13]
-; GFX9-NEXT: v_cndmask_b32_e64 v4, v8, v4, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v5, v9, v5, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e32 v11, v15, v7, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v10, v14, v6, vcc
-; GFX9-NEXT: v_cmp_class_f64_e64 vcc, v[6:7], 64
-; GFX9-NEXT: v_cmp_eq_f64_e64 s[12:13], 0, v[10:11]
-; GFX9-NEXT: v_cndmask_b32_e64 v4, v4, v12, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v5, v5, v13, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v4, v8, v4, s[8:9]
-; GFX9-NEXT: v_cndmask_b32_e64 v5, v9, v5, s[8:9]
-; GFX9-NEXT: v_cndmask_b32_e32 v6, v10, v6, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v7, v11, v7, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v6, v6, v14, s[10:11]
-; GFX9-NEXT: v_cndmask_b32_e64 v7, v7, v15, s[10:11]
-; GFX9-NEXT: v_cndmask_b32_e64 v6, v10, v6, s[12:13]
-; GFX9-NEXT: v_cndmask_b32_e64 v7, v11, v7, s[12:13]
+; GFX9-NEXT: v_max_f64 v[0:1], v[0:1], v[8:9]
+; GFX9-NEXT: v_max_f64 v[2:3], v[2:3], v[10:11]
+; GFX9-NEXT: v_max_f64 v[4:5], v[4:5], v[12:13]
+; GFX9-NEXT: v_max_f64 v[6:7], v[6:7], v[14:15]
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX940-LABEL: v_maximum_v4f64__nnan:
; GFX940: ; %bb.0:
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_cmp_gt_f64_e32 vcc, v[0:1], v[8:9]
-; GFX940-NEXT: v_cmp_class_f64_e64 s[0:1], v[8:9], 64
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v17, v9, v1, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v8, v0, vcc
-; GFX940-NEXT: v_cmp_class_f64_e64 vcc, v[0:1], 64
-; GFX940-NEXT: v_cmp_eq_f64_e64 s[2:3], 0, v[16:17]
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v16, v0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v17, v1, vcc
-; GFX940-NEXT: v_cmp_gt_f64_e32 vcc, v[2:3], v[10:11]
-; GFX940-NEXT: v_cndmask_b32_e64 v0, v0, v8, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v1, v1, v9, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v9, v11, v3, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v8, v10, v2, vcc
-; GFX940-NEXT: v_cmp_class_f64_e64 vcc, v[2:3], 64
-; GFX940-NEXT: v_cmp_class_f64_e64 s[0:1], v[10:11], 64
-; GFX940-NEXT: v_cndmask_b32_e64 v0, v16, v0, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v9, v3, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v1, v17, v1, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v2, v2, v10, s[0:1]
-; GFX940-NEXT: v_cmp_eq_f64_e64 s[2:3], 0, v[8:9]
-; GFX940-NEXT: v_cndmask_b32_e64 v3, v3, v11, s[0:1]
-; GFX940-NEXT: v_cmp_gt_f64_e32 vcc, v[4:5], v[12:13]
-; GFX940-NEXT: v_cndmask_b32_e64 v2, v8, v2, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v3, v9, v3, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e32 v9, v13, v5, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v8, v12, v4, vcc
-; GFX940-NEXT: v_cmp_class_f64_e64 vcc, v[4:5], 64
-; GFX940-NEXT: v_cmp_class_f64_e64 s[0:1], v[12:13], 64
-; GFX940-NEXT: v_cmp_eq_f64_e64 s[2:3], 0, v[8:9]
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v8, v4, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v5, v9, v5, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v4, v4, v12, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v5, v5, v13, s[0:1]
-; GFX940-NEXT: v_cmp_gt_f64_e32 vcc, v[6:7], v[14:15]
-; GFX940-NEXT: v_cndmask_b32_e64 v4, v8, v4, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v5, v9, v5, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e32 v9, v15, v7, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v8, v14, v6, vcc
-; GFX940-NEXT: v_cmp_class_f64_e64 vcc, v[6:7], 64
-; GFX940-NEXT: v_cmp_class_f64_e64 s[0:1], v[14:15], 64
-; GFX940-NEXT: v_cmp_eq_f64_e64 s[2:3], 0, v[8:9]
-; GFX940-NEXT: v_cndmask_b32_e32 v6, v8, v6, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v7, v9, v7, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v6, v6, v14, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v7, v7, v15, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v6, v8, v6, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v7, v9, v7, s[2:3]
+; GFX940-NEXT: v_max_f64 v[0:1], v[0:1], v[8:9]
+; GFX940-NEXT: v_max_f64 v[2:3], v[2:3], v[10:11]
+; GFX940-NEXT: v_max_f64 v[4:5], v[4:5], v[12:13]
+; GFX940-NEXT: v_max_f64 v[6:7], v[6:7], v[14:15]
; GFX940-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximum_v4f64__nnan:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_cmp_gt_f64_e32 vcc_lo, v[0:1], v[8:9]
-; GFX10-NEXT: v_cmp_gt_f64_e64 s4, v[2:3], v[10:11]
-; GFX10-NEXT: v_cmp_gt_f64_e64 s5, v[4:5], v[12:13]
-; GFX10-NEXT: v_cmp_gt_f64_e64 s6, v[6:7], v[14:15]
-; GFX10-NEXT: v_cmp_class_f64_e64 s7, v[10:11], 64
-; GFX10-NEXT: v_cmp_class_f64_e64 s8, v[8:9], 64
-; GFX10-NEXT: v_cmp_class_f64_e64 s10, v[12:13], 64
-; GFX10-NEXT: v_cmp_class_f64_e64 s11, v[14:15], 64
-; GFX10-NEXT: v_cndmask_b32_e32 v17, v9, v1, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v19, v11, v3, s4
-; GFX10-NEXT: v_cndmask_b32_e64 v21, v13, v5, s5
-; GFX10-NEXT: v_cndmask_b32_e64 v23, v15, v7, s6
-; GFX10-NEXT: v_cndmask_b32_e32 v16, v8, v0, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v18, v10, v2, s4
-; GFX10-NEXT: v_cndmask_b32_e64 v20, v12, v4, s5
-; GFX10-NEXT: v_cmp_class_f64_e64 vcc_lo, v[4:5], 64
-; GFX10-NEXT: v_cndmask_b32_e64 v22, v14, v6, s6
-; GFX10-NEXT: v_cmp_class_f64_e64 s4, v[6:7], 64
-; GFX10-NEXT: v_cmp_class_f64_e64 s5, v[0:1], 64
-; GFX10-NEXT: v_cmp_class_f64_e64 s6, v[2:3], 64
-; GFX10-NEXT: v_cmp_eq_f64_e64 s9, 0, v[16:17]
-; GFX10-NEXT: v_cmp_eq_f64_e64 s12, 0, v[18:19]
-; GFX10-NEXT: v_cmp_eq_f64_e64 s13, 0, v[20:21]
-; GFX10-NEXT: v_cmp_eq_f64_e64 s14, 0, v[22:23]
-; GFX10-NEXT: v_cndmask_b32_e32 v4, v20, v4, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e32 v5, v21, v5, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v6, v22, v6, s4
-; GFX10-NEXT: v_cndmask_b32_e64 v0, v16, v0, s5
-; GFX10-NEXT: v_cndmask_b32_e64 v2, v18, v2, s6
-; GFX10-NEXT: v_cndmask_b32_e64 v1, v17, v1, s5
-; GFX10-NEXT: v_cndmask_b32_e64 v3, v19, v3, s6
-; GFX10-NEXT: v_cndmask_b32_e64 v7, v23, v7, s4
-; GFX10-NEXT: v_cndmask_b32_e64 v0, v0, v8, s8
-; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, v10, s7
-; GFX10-NEXT: v_cndmask_b32_e64 v4, v4, v12, s10
-; GFX10-NEXT: v_cndmask_b32_e64 v6, v6, v14, s11
-; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, v9, s8
-; GFX10-NEXT: v_cndmask_b32_e64 v3, v3, v11, s7
-; GFX10-NEXT: v_cndmask_b32_e64 v5, v5, v13, s10
-; GFX10-NEXT: v_cndmask_b32_e64 v7, v7, v15, s11
-; GFX10-NEXT: v_cndmask_b32_e64 v0, v16, v0, s9
-; GFX10-NEXT: v_cndmask_b32_e64 v2, v18, v2, s12
-; GFX10-NEXT: v_cndmask_b32_e64 v1, v17, v1, s9
-; GFX10-NEXT: v_cndmask_b32_e64 v3, v19, v3, s12
-; GFX10-NEXT: v_cndmask_b32_e64 v4, v20, v4, s13
-; GFX10-NEXT: v_cndmask_b32_e64 v5, v21, v5, s13
-; GFX10-NEXT: v_cndmask_b32_e64 v6, v22, v6, s14
-; GFX10-NEXT: v_cndmask_b32_e64 v7, v23, v7, s14
+; GFX10-NEXT: v_max_f64 v[0:1], v[0:1], v[8:9]
+; GFX10-NEXT: v_max_f64 v[2:3], v[2:3], v[10:11]
+; GFX10-NEXT: v_max_f64 v[4:5], v[4:5], v[12:13]
+; GFX10-NEXT: v_max_f64 v[6:7], v[6:7], v[14:15]
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_maximum_v4f64__nnan:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_cmp_gt_f64_e32 vcc_lo, v[0:1], v[8:9]
-; GFX11-NEXT: v_cmp_gt_f64_e64 s0, v[2:3], v[10:11]
-; GFX11-NEXT: v_cmp_gt_f64_e64 s1, v[4:5], v[12:13]
-; GFX11-NEXT: v_cmp_gt_f64_e64 s2, v[6:7], v[14:15]
-; GFX11-NEXT: v_cmp_class_f64_e64 s3, v[6:7], 64
-; GFX11-NEXT: v_cmp_class_f64_e64 s4, v[14:15], 64
-; GFX11-NEXT: v_cmp_class_f64_e64 s5, v[8:9], 64
-; GFX11-NEXT: v_cmp_class_f64_e64 s6, v[10:11], 64
-; GFX11-NEXT: v_dual_cndmask_b32 v17, v9, v1 :: v_dual_cndmask_b32 v16, v8, v0
-; GFX11-NEXT: v_cndmask_b32_e64 v19, v11, v3, s0
-; GFX11-NEXT: v_cndmask_b32_e64 v21, v13, v5, s1
-; GFX11-NEXT: v_cndmask_b32_e64 v18, v10, v2, s0
-; GFX11-NEXT: v_cndmask_b32_e64 v20, v12, v4, s1
-; GFX11-NEXT: v_cmp_class_f64_e64 vcc_lo, v[0:1], 64
-; GFX11-NEXT: v_cmp_class_f64_e64 s0, v[2:3], 64
-; GFX11-NEXT: v_cmp_class_f64_e64 s1, v[4:5], 64
-; GFX11-NEXT: v_cndmask_b32_e64 v23, v15, v7, s2
-; GFX11-NEXT: v_cndmask_b32_e64 v22, v14, v6, s2
-; GFX11-NEXT: v_cmp_class_f64_e64 s2, v[12:13], 64
-; GFX11-NEXT: v_cmp_eq_f64_e64 s7, 0, v[16:17]
-; GFX11-NEXT: v_cmp_eq_f64_e64 s8, 0, v[18:19]
-; GFX11-NEXT: v_cmp_eq_f64_e64 s9, 0, v[20:21]
-; GFX11-NEXT: v_cmp_eq_f64_e64 s10, 0, v[22:23]
-; GFX11-NEXT: v_cndmask_b32_e64 v6, v22, v6, s3
-; GFX11-NEXT: v_cndmask_b32_e64 v7, v23, v7, s3
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NEXT: v_cndmask_b32_e64 v6, v6, v14, s4
-; GFX11-NEXT: v_cndmask_b32_e64 v7, v7, v15, s4
-; GFX11-NEXT: v_dual_cndmask_b32 v0, v16, v0 :: v_dual_cndmask_b32 v1, v17, v1
-; GFX11-NEXT: v_cndmask_b32_e64 v2, v18, v2, s0
-; GFX11-NEXT: v_cndmask_b32_e64 v4, v20, v4, s1
-; GFX11-NEXT: v_cndmask_b32_e64 v3, v19, v3, s0
-; GFX11-NEXT: v_cndmask_b32_e64 v5, v21, v5, s1
-; GFX11-NEXT: v_cndmask_b32_e64 v0, v0, v8, s5
-; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, v10, s6
-; GFX11-NEXT: v_cndmask_b32_e64 v4, v4, v12, s2
-; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, v9, s5
-; GFX11-NEXT: v_cndmask_b32_e64 v3, v3, v11, s6
-; GFX11-NEXT: v_cndmask_b32_e64 v5, v5, v13, s2
-; GFX11-NEXT: v_cndmask_b32_e64 v0, v16, v0, s7
-; GFX11-NEXT: v_cndmask_b32_e64 v2, v18, v2, s8
-; GFX11-NEXT: v_cndmask_b32_e64 v4, v20, v4, s9
-; GFX11-NEXT: v_cndmask_b32_e64 v6, v22, v6, s10
-; GFX11-NEXT: v_cndmask_b32_e64 v1, v17, v1, s7
-; GFX11-NEXT: v_cndmask_b32_e64 v3, v19, v3, s8
-; GFX11-NEXT: v_cndmask_b32_e64 v5, v21, v5, s9
-; GFX11-NEXT: v_cndmask_b32_e64 v7, v23, v7, s10
+; GFX11-NEXT: v_max_f64 v[0:1], v[0:1], v[8:9]
+; GFX11-NEXT: v_max_f64 v[2:3], v[2:3], v[10:11]
+; GFX11-NEXT: v_max_f64 v[4:5], v[4:5], v[12:13]
+; GFX11-NEXT: v_max_f64 v[6:7], v[6:7], v[14:15]
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_maximum_v4f64__nnan:
@@ -3155,180 +1579,135 @@ define <4 x double> @v_maximum_v4f64__nsz(<4 x double> %src0, <4 x double> %src1
; GFX7-LABEL: v_maximum_v4f64__nsz:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_cmp_gt_f64_e32 vcc, v[0:1], v[8:9]
-; GFX7-NEXT: v_cmp_o_f64_e64 s[4:5], v[0:1], v[8:9]
-; GFX7-NEXT: v_cmp_gt_f64_e64 s[6:7], v[4:5], v[12:13]
-; GFX7-NEXT: v_cmp_gt_f64_e64 s[10:11], v[6:7], v[14:15]
-; GFX7-NEXT: v_cmp_o_f64_e64 s[8:9], v[4:5], v[12:13]
-; GFX7-NEXT: v_cmp_o_f64_e64 s[12:13], v[6:7], v[14:15]
-; GFX7-NEXT: v_cndmask_b32_e32 v16, v8, v0, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v9, v1, vcc
-; GFX7-NEXT: v_mov_b32_e32 v8, 0x7ff80000
-; GFX7-NEXT: v_cmp_gt_f64_e32 vcc, v[2:3], v[10:11]
-; GFX7-NEXT: v_cndmask_b32_e64 v0, 0, v16, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v1, v8, v1, s[4:5]
-; GFX7-NEXT: v_cmp_o_f64_e64 s[4:5], v[2:3], v[10:11]
-; GFX7-NEXT: v_cndmask_b32_e64 v5, v13, v5, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v7, v15, v7, s[10:11]
-; GFX7-NEXT: v_cndmask_b32_e64 v5, v8, v5, s[8:9]
-; GFX7-NEXT: v_cndmask_b32_e64 v7, v8, v7, s[12:13]
-; GFX7-NEXT: v_cndmask_b32_e32 v9, v10, v2, vcc
-; GFX7-NEXT: v_cndmask_b32_e64 v10, v14, v6, s[10:11]
-; GFX7-NEXT: v_cndmask_b32_e32 v3, v11, v3, vcc
-; GFX7-NEXT: v_cndmask_b32_e64 v2, 0, v9, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v9, v12, v4, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v3, v8, v3, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v4, 0, v9, s[8:9]
-; GFX7-NEXT: v_cndmask_b32_e64 v6, 0, v10, s[12:13]
+; GFX7-NEXT: v_max_f64 v[16:17], v[0:1], v[8:9]
+; GFX7-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[8:9]
+; GFX7-NEXT: v_max_f64 v[8:9], v[2:3], v[10:11]
+; GFX7-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[10:11]
+; GFX7-NEXT: v_max_f64 v[10:11], v[4:5], v[12:13]
+; GFX7-NEXT: v_cmp_u_f64_e64 s[6:7], v[4:5], v[12:13]
+; GFX7-NEXT: v_max_f64 v[12:13], v[6:7], v[14:15]
+; GFX7-NEXT: v_cmp_u_f64_e64 s[8:9], v[6:7], v[14:15]
+; GFX7-NEXT: v_mov_b32_e32 v7, 0x7ff80000
+; GFX7-NEXT: v_cndmask_b32_e64 v0, v16, 0, vcc
+; GFX7-NEXT: v_cndmask_b32_e32 v1, v17, v7, vcc
+; GFX7-NEXT: v_cndmask_b32_e64 v2, v8, 0, s[4:5]
+; GFX7-NEXT: v_cndmask_b32_e64 v3, v9, v7, s[4:5]
+; GFX7-NEXT: v_cndmask_b32_e64 v4, v10, 0, s[6:7]
+; GFX7-NEXT: v_cndmask_b32_e64 v5, v11, v7, s[6:7]
+; GFX7-NEXT: v_cndmask_b32_e64 v6, v12, 0, s[8:9]
+; GFX7-NEXT: v_cndmask_b32_e64 v7, v13, v7, s[8:9]
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_maximum_v4f64__nsz:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_cmp_gt_f64_e32 vcc, v[0:1], v[8:9]
-; GFX8-NEXT: v_cmp_o_f64_e64 s[4:5], v[0:1], v[8:9]
-; GFX8-NEXT: v_cmp_gt_f64_e64 s[6:7], v[4:5], v[12:13]
-; GFX8-NEXT: v_cmp_gt_f64_e64 s[10:11], v[6:7], v[14:15]
-; GFX8-NEXT: v_cmp_o_f64_e64 s[8:9], v[4:5], v[12:13]
-; GFX8-NEXT: v_cmp_o_f64_e64 s[12:13], v[6:7], v[14:15]
-; GFX8-NEXT: v_cndmask_b32_e32 v16, v8, v0, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v9, v1, vcc
-; GFX8-NEXT: v_mov_b32_e32 v8, 0x7ff80000
-; GFX8-NEXT: v_cmp_gt_f64_e32 vcc, v[2:3], v[10:11]
-; GFX8-NEXT: v_cndmask_b32_e64 v0, 0, v16, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v1, v8, v1, s[4:5]
-; GFX8-NEXT: v_cmp_o_f64_e64 s[4:5], v[2:3], v[10:11]
-; GFX8-NEXT: v_cndmask_b32_e64 v5, v13, v5, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v7, v15, v7, s[10:11]
-; GFX8-NEXT: v_cndmask_b32_e64 v5, v8, v5, s[8:9]
-; GFX8-NEXT: v_cndmask_b32_e64 v7, v8, v7, s[12:13]
-; GFX8-NEXT: v_cndmask_b32_e32 v9, v10, v2, vcc
-; GFX8-NEXT: v_cndmask_b32_e64 v10, v14, v6, s[10:11]
-; GFX8-NEXT: v_cndmask_b32_e32 v3, v11, v3, vcc
-; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, v9, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v9, v12, v4, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v3, v8, v3, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v4, 0, v9, s[8:9]
-; GFX8-NEXT: v_cndmask_b32_e64 v6, 0, v10, s[12:13]
+; GFX8-NEXT: v_max_f64 v[16:17], v[0:1], v[8:9]
+; GFX8-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[8:9]
+; GFX8-NEXT: v_max_f64 v[8:9], v[2:3], v[10:11]
+; GFX8-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[10:11]
+; GFX8-NEXT: v_max_f64 v[10:11], v[4:5], v[12:13]
+; GFX8-NEXT: v_cmp_u_f64_e64 s[6:7], v[4:5], v[12:13]
+; GFX8-NEXT: v_max_f64 v[12:13], v[6:7], v[14:15]
+; GFX8-NEXT: v_cmp_u_f64_e64 s[8:9], v[6:7], v[14:15]
+; GFX8-NEXT: v_mov_b32_e32 v7, 0x7ff80000
+; GFX8-NEXT: v_cndmask_b32_e64 v0, v16, 0, vcc
+; GFX8-NEXT: v_cndmask_b32_e32 v1, v17, v7, vcc
+; GFX8-NEXT: v_cndmask_b32_e64 v2, v8, 0, s[4:5]
+; GFX8-NEXT: v_cndmask_b32_e64 v3, v9, v7, s[4:5]
+; GFX8-NEXT: v_cndmask_b32_e64 v4, v10, 0, s[6:7]
+; GFX8-NEXT: v_cndmask_b32_e64 v5, v11, v7, s[6:7]
+; GFX8-NEXT: v_cndmask_b32_e64 v6, v12, 0, s[8:9]
+; GFX8-NEXT: v_cndmask_b32_e64 v7, v13, v7, s[8:9]
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_maximum_v4f64__nsz:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_cmp_gt_f64_e32 vcc, v[0:1], v[8:9]
-; GFX9-NEXT: v_cmp_o_f64_e64 s[4:5], v[0:1], v[8:9]
-; GFX9-NEXT: v_cmp_gt_f64_e64 s[6:7], v[4:5], v[12:13]
-; GFX9-NEXT: v_cmp_gt_f64_e64 s[10:11], v[6:7], v[14:15]
-; GFX9-NEXT: v_cmp_o_f64_e64 s[8:9], v[4:5], v[12:13]
-; GFX9-NEXT: v_cmp_o_f64_e64 s[12:13], v[6:7], v[14:15]
-; GFX9-NEXT: v_cndmask_b32_e32 v16, v8, v0, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v9, v1, vcc
-; GFX9-NEXT: v_mov_b32_e32 v8, 0x7ff80000
-; GFX9-NEXT: v_cmp_gt_f64_e32 vcc, v[2:3], v[10:11]
-; GFX9-NEXT: v_cndmask_b32_e64 v0, 0, v16, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v1, v8, v1, s[4:5]
-; GFX9-NEXT: v_cmp_o_f64_e64 s[4:5], v[2:3], v[10:11]
-; GFX9-NEXT: v_cndmask_b32_e64 v5, v13, v5, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v7, v15, v7, s[10:11]
-; GFX9-NEXT: v_cndmask_b32_e64 v5, v8, v5, s[8:9]
-; GFX9-NEXT: v_cndmask_b32_e64 v7, v8, v7, s[12:13]
-; GFX9-NEXT: v_cndmask_b32_e32 v9, v10, v2, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v10, v14, v6, s[10:11]
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v11, v3, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, v9, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v9, v12, v4, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v3, v8, v3, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v4, 0, v9, s[8:9]
-; GFX9-NEXT: v_cndmask_b32_e64 v6, 0, v10, s[12:13]
+; GFX9-NEXT: v_max_f64 v[16:17], v[0:1], v[8:9]
+; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[8:9]
+; GFX9-NEXT: v_max_f64 v[8:9], v[2:3], v[10:11]
+; GFX9-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[10:11]
+; GFX9-NEXT: v_max_f64 v[10:11], v[4:5], v[12:13]
+; GFX9-NEXT: v_cmp_u_f64_e64 s[6:7], v[4:5], v[12:13]
+; GFX9-NEXT: v_max_f64 v[12:13], v[6:7], v[14:15]
+; GFX9-NEXT: v_cmp_u_f64_e64 s[8:9], v[6:7], v[14:15]
+; GFX9-NEXT: v_mov_b32_e32 v7, 0x7ff80000
+; GFX9-NEXT: v_cndmask_b32_e64 v0, v16, 0, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v17, v7, vcc
+; GFX9-NEXT: v_cndmask_b32_e64 v2, v8, 0, s[4:5]
+; GFX9-NEXT: v_cndmask_b32_e64 v3, v9, v7, s[4:5]
+; GFX9-NEXT: v_cndmask_b32_e64 v4, v10, 0, s[6:7]
+; GFX9-NEXT: v_cndmask_b32_e64 v5, v11, v7, s[6:7]
+; GFX9-NEXT: v_cndmask_b32_e64 v6, v12, 0, s[8:9]
+; GFX9-NEXT: v_cndmask_b32_e64 v7, v13, v7, s[8:9]
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX940-LABEL: v_maximum_v4f64__nsz:
; GFX940: ; %bb.0:
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_cmp_gt_f64_e32 vcc, v[0:1], v[8:9]
-; GFX940-NEXT: v_cmp_o_f64_e64 s[0:1], v[0:1], v[8:9]
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v8, v0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v9, v1, vcc
-; GFX940-NEXT: v_mov_b32_e32 v8, 0x7ff80000
-; GFX940-NEXT: v_cmp_gt_f64_e32 vcc, v[2:3], v[10:11]
-; GFX940-NEXT: v_cndmask_b32_e64 v0, 0, v16, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v1, v8, v1, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v9, v10, v2, vcc
-; GFX940-NEXT: v_cmp_o_f64_e64 s[0:1], v[2:3], v[10:11]
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v11, v3, vcc
-; GFX940-NEXT: v_cmp_gt_f64_e32 vcc, v[4:5], v[12:13]
-; GFX940-NEXT: v_cndmask_b32_e64 v2, 0, v9, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v3, v8, v3, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v9, v12, v4, vcc
-; GFX940-NEXT: v_cmp_o_f64_e64 s[0:1], v[4:5], v[12:13]
-; GFX940-NEXT: v_cndmask_b32_e32 v5, v13, v5, vcc
-; GFX940-NEXT: v_cmp_gt_f64_e32 vcc, v[6:7], v[14:15]
-; GFX940-NEXT: v_cndmask_b32_e64 v4, 0, v9, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v5, v8, v5, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v9, v14, v6, vcc
-; GFX940-NEXT: v_cmp_o_f64_e64 s[0:1], v[6:7], v[14:15]
-; GFX940-NEXT: v_cndmask_b32_e32 v7, v15, v7, vcc
+; GFX940-NEXT: v_max_f64 v[16:17], v[0:1], v[8:9]
+; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[8:9]
+; GFX940-NEXT: v_max_f64 v[8:9], v[2:3], v[10:11]
; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e64 v6, 0, v9, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v7, v8, v7, s[0:1]
+; GFX940-NEXT: v_cndmask_b32_e64 v0, v16, 0, vcc
+; GFX940-NEXT: v_mov_b32_e32 v16, 0x7ff80000
+; GFX940-NEXT: v_cndmask_b32_e32 v1, v17, v16, vcc
+; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[10:11]
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e64 v2, v8, 0, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v3, v9, v16, vcc
+; GFX940-NEXT: v_max_f64 v[8:9], v[4:5], v[12:13]
+; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[4:5], v[12:13]
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e64 v4, v8, 0, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v5, v9, v16, vcc
+; GFX940-NEXT: v_max_f64 v[8:9], v[6:7], v[14:15]
+; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[6:7], v[14:15]
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e64 v6, v8, 0, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v7, v9, v16, vcc
; GFX940-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximum_v4f64__nsz:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_cmp_gt_f64_e32 vcc_lo, v[0:1], v[8:9]
-; GFX10-NEXT: v_cmp_gt_f64_e64 s4, v[2:3], v[10:11]
-; GFX10-NEXT: v_cmp_gt_f64_e64 s5, v[4:5], v[12:13]
-; GFX10-NEXT: v_cmp_gt_f64_e64 s7, v[6:7], v[14:15]
-; GFX10-NEXT: v_cmp_o_f64_e64 s6, v[0:1], v[8:9]
-; GFX10-NEXT: v_cmp_o_f64_e64 s8, v[2:3], v[10:11]
-; GFX10-NEXT: v_cmp_o_f64_e64 s9, v[4:5], v[12:13]
-; GFX10-NEXT: v_cmp_o_f64_e64 s10, v[6:7], v[14:15]
-; GFX10-NEXT: v_cndmask_b32_e32 v16, v8, v0, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v8, v10, v2, s4
-; GFX10-NEXT: v_cndmask_b32_e64 v10, v12, v4, s5
-; GFX10-NEXT: v_cndmask_b32_e64 v12, v14, v6, s7
-; GFX10-NEXT: v_cndmask_b32_e32 v1, v9, v1, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v3, v11, v3, s4
-; GFX10-NEXT: v_cndmask_b32_e64 v5, v13, v5, s5
-; GFX10-NEXT: v_cndmask_b32_e64 v7, v15, v7, s7
-; GFX10-NEXT: v_cndmask_b32_e64 v0, 0, v16, s6
-; GFX10-NEXT: v_cndmask_b32_e64 v1, 0x7ff80000, v1, s6
-; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, v8, s8
-; GFX10-NEXT: v_cndmask_b32_e64 v3, 0x7ff80000, v3, s8
-; GFX10-NEXT: v_cndmask_b32_e64 v4, 0, v10, s9
-; GFX10-NEXT: v_cndmask_b32_e64 v5, 0x7ff80000, v5, s9
-; GFX10-NEXT: v_cndmask_b32_e64 v6, 0, v12, s10
-; GFX10-NEXT: v_cndmask_b32_e64 v7, 0x7ff80000, v7, s10
+; GFX10-NEXT: v_max_f64 v[16:17], v[0:1], v[8:9]
+; GFX10-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[8:9]
+; GFX10-NEXT: v_max_f64 v[8:9], v[2:3], v[10:11]
+; GFX10-NEXT: v_cmp_u_f64_e64 s4, v[2:3], v[10:11]
+; GFX10-NEXT: v_max_f64 v[10:11], v[4:5], v[12:13]
+; GFX10-NEXT: v_cmp_u_f64_e64 s5, v[4:5], v[12:13]
+; GFX10-NEXT: v_max_f64 v[12:13], v[6:7], v[14:15]
+; GFX10-NEXT: v_cmp_u_f64_e64 s6, v[6:7], v[14:15]
+; GFX10-NEXT: v_cndmask_b32_e64 v0, v16, 0, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e64 v1, v17, 0x7ff80000, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e64 v2, v8, 0, s4
+; GFX10-NEXT: v_cndmask_b32_e64 v3, v9, 0x7ff80000, s4
+; GFX10-NEXT: v_cndmask_b32_e64 v4, v10, 0, s5
+; GFX10-NEXT: v_cndmask_b32_e64 v5, v11, 0x7ff80000, s5
+; GFX10-NEXT: v_cndmask_b32_e64 v6, v12, 0, s6
+; GFX10-NEXT: v_cndmask_b32_e64 v7, v13, 0x7ff80000, s6
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_maximum_v4f64__nsz:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_cmp_gt_f64_e32 vcc_lo, v[0:1], v[8:9]
-; GFX11-NEXT: v_cmp_gt_f64_e64 s1, v[2:3], v[10:11]
-; GFX11-NEXT: v_cmp_gt_f64_e64 s2, v[4:5], v[12:13]
-; GFX11-NEXT: v_cmp_gt_f64_e64 s3, v[6:7], v[14:15]
-; GFX11-NEXT: v_cmp_o_f64_e64 s0, v[0:1], v[8:9]
-; GFX11-NEXT: v_cmp_o_f64_e64 s4, v[2:3], v[10:11]
-; GFX11-NEXT: v_cmp_o_f64_e64 s5, v[4:5], v[12:13]
-; GFX11-NEXT: v_cmp_o_f64_e64 s6, v[6:7], v[14:15]
-; GFX11-NEXT: v_dual_cndmask_b32 v16, v8, v0 :: v_dual_cndmask_b32 v1, v9, v1
-; GFX11-NEXT: v_cndmask_b32_e64 v8, v10, v2, s1
-; GFX11-NEXT: v_cndmask_b32_e64 v10, v12, v4, s2
-; GFX11-NEXT: v_cndmask_b32_e64 v12, v14, v6, s3
-; GFX11-NEXT: v_cndmask_b32_e64 v3, v11, v3, s1
-; GFX11-NEXT: v_cndmask_b32_e64 v5, v13, v5, s2
-; GFX11-NEXT: v_cndmask_b32_e64 v7, v15, v7, s3
-; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, v16, s0
-; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, v8, s4
-; GFX11-NEXT: v_cndmask_b32_e64 v4, 0, v10, s5
-; GFX11-NEXT: v_cndmask_b32_e64 v6, 0, v12, s6
-; GFX11-NEXT: v_cndmask_b32_e64 v1, 0x7ff80000, v1, s0
-; GFX11-NEXT: v_cndmask_b32_e64 v3, 0x7ff80000, v3, s4
-; GFX11-NEXT: v_cndmask_b32_e64 v5, 0x7ff80000, v5, s5
-; GFX11-NEXT: v_cndmask_b32_e64 v7, 0x7ff80000, v7, s6
+; GFX11-NEXT: v_max_f64 v[16:17], v[0:1], v[8:9]
+; GFX11-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[8:9]
+; GFX11-NEXT: v_max_f64 v[8:9], v[2:3], v[10:11]
+; GFX11-NEXT: v_cmp_u_f64_e64 s0, v[2:3], v[10:11]
+; GFX11-NEXT: v_max_f64 v[10:11], v[4:5], v[12:13]
+; GFX11-NEXT: v_cmp_u_f64_e64 s1, v[4:5], v[12:13]
+; GFX11-NEXT: v_max_f64 v[12:13], v[6:7], v[14:15]
+; GFX11-NEXT: v_cmp_u_f64_e64 s2, v[6:7], v[14:15]
+; GFX11-NEXT: v_cndmask_b32_e64 v0, v16, 0, vcc_lo
+; GFX11-NEXT: v_cndmask_b32_e64 v1, v17, 0x7ff80000, vcc_lo
+; GFX11-NEXT: v_cndmask_b32_e64 v2, v8, 0, s0
+; GFX11-NEXT: v_cndmask_b32_e64 v3, v9, 0x7ff80000, s0
+; GFX11-NEXT: v_cndmask_b32_e64 v4, v10, 0, s1
+; GFX11-NEXT: v_cndmask_b32_e64 v5, v11, 0x7ff80000, s1
+; GFX11-NEXT: v_cndmask_b32_e64 v6, v12, 0, s2
+; GFX11-NEXT: v_cndmask_b32_e64 v7, v13, 0x7ff80000, s2
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_maximum_v4f64__nsz:
@@ -3351,108 +1730,55 @@ define <4 x double> @v_maximum_v4f64__nnan_nsz(<4 x double> %src0, <4 x double>
; GFX7-LABEL: v_maximum_v4f64__nnan_nsz:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_cmp_gt_f64_e32 vcc, v[0:1], v[8:9]
-; GFX7-NEXT: v_cmp_gt_f64_e64 s[4:5], v[2:3], v[10:11]
-; GFX7-NEXT: v_cmp_gt_f64_e64 s[6:7], v[4:5], v[12:13]
-; GFX7-NEXT: v_cmp_gt_f64_e64 s[8:9], v[6:7], v[14:15]
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v9, v1, vcc
-; GFX7-NEXT: v_cndmask_b32_e64 v2, v10, v2, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v3, v11, v3, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v4, v12, v4, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v5, v13, v5, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v6, v14, v6, s[8:9]
-; GFX7-NEXT: v_cndmask_b32_e64 v7, v15, v7, s[8:9]
+; GFX7-NEXT: v_max_f64 v[0:1], v[0:1], v[8:9]
+; GFX7-NEXT: v_max_f64 v[2:3], v[2:3], v[10:11]
+; GFX7-NEXT: v_max_f64 v[4:5], v[4:5], v[12:13]
+; GFX7-NEXT: v_max_f64 v[6:7], v[6:7], v[14:15]
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_maximum_v4f64__nnan_nsz:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_cmp_gt_f64_e32 vcc, v[0:1], v[8:9]
-; GFX8-NEXT: v_cmp_gt_f64_e64 s[4:5], v[2:3], v[10:11]
-; GFX8-NEXT: v_cmp_gt_f64_e64 s[6:7], v[4:5], v[12:13]
-; GFX8-NEXT: v_cmp_gt_f64_e64 s[8:9], v[6:7], v[14:15]
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v9, v1, vcc
-; GFX8-NEXT: v_cndmask_b32_e64 v2, v10, v2, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v3, v11, v3, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v4, v12, v4, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v5, v13, v5, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v6, v14, v6, s[8:9]
-; GFX8-NEXT: v_cndmask_b32_e64 v7, v15, v7, s[8:9]
+; GFX8-NEXT: v_max_f64 v[0:1], v[0:1], v[8:9]
+; GFX8-NEXT: v_max_f64 v[2:3], v[2:3], v[10:11]
+; GFX8-NEXT: v_max_f64 v[4:5], v[4:5], v[12:13]
+; GFX8-NEXT: v_max_f64 v[6:7], v[6:7], v[14:15]
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_maximum_v4f64__nnan_nsz:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_cmp_gt_f64_e32 vcc, v[0:1], v[8:9]
-; GFX9-NEXT: v_cmp_gt_f64_e64 s[4:5], v[2:3], v[10:11]
-; GFX9-NEXT: v_cmp_gt_f64_e64 s[6:7], v[4:5], v[12:13]
-; GFX9-NEXT: v_cmp_gt_f64_e64 s[8:9], v[6:7], v[14:15]
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v9, v1, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v2, v10, v2, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v3, v11, v3, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v4, v12, v4, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v5, v13, v5, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v6, v14, v6, s[8:9]
-; GFX9-NEXT: v_cndmask_b32_e64 v7, v15, v7, s[8:9]
+; GFX9-NEXT: v_max_f64 v[0:1], v[0:1], v[8:9]
+; GFX9-NEXT: v_max_f64 v[2:3], v[2:3], v[10:11]
+; GFX9-NEXT: v_max_f64 v[4:5], v[4:5], v[12:13]
+; GFX9-NEXT: v_max_f64 v[6:7], v[6:7], v[14:15]
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX940-LABEL: v_maximum_v4f64__nnan_nsz:
; GFX940: ; %bb.0:
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_cmp_gt_f64_e32 vcc, v[0:1], v[8:9]
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v9, v1, vcc
-; GFX940-NEXT: v_cmp_gt_f64_e32 vcc, v[2:3], v[10:11]
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v10, v2, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v11, v3, vcc
-; GFX940-NEXT: v_cmp_gt_f64_e32 vcc, v[4:5], v[12:13]
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v12, v4, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v5, v13, v5, vcc
-; GFX940-NEXT: v_cmp_gt_f64_e32 vcc, v[6:7], v[14:15]
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v6, v14, v6, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v7, v15, v7, vcc
+; GFX940-NEXT: v_max_f64 v[0:1], v[0:1], v[8:9]
+; GFX940-NEXT: v_max_f64 v[2:3], v[2:3], v[10:11]
+; GFX940-NEXT: v_max_f64 v[4:5], v[4:5], v[12:13]
+; GFX940-NEXT: v_max_f64 v[6:7], v[6:7], v[14:15]
; GFX940-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximum_v4f64__nnan_nsz:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_cmp_gt_f64_e32 vcc_lo, v[0:1], v[8:9]
-; GFX10-NEXT: v_cmp_gt_f64_e64 s4, v[2:3], v[10:11]
-; GFX10-NEXT: v_cmp_gt_f64_e64 s5, v[4:5], v[12:13]
-; GFX10-NEXT: v_cmp_gt_f64_e64 s6, v[6:7], v[14:15]
-; GFX10-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v2, v10, v2, s4
-; GFX10-NEXT: v_cndmask_b32_e64 v4, v12, v4, s5
-; GFX10-NEXT: v_cndmask_b32_e64 v6, v14, v6, s6
-; GFX10-NEXT: v_cndmask_b32_e32 v1, v9, v1, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v3, v11, v3, s4
-; GFX10-NEXT: v_cndmask_b32_e64 v5, v13, v5, s5
-; GFX10-NEXT: v_cndmask_b32_e64 v7, v15, v7, s6
+; GFX10-NEXT: v_max_f64 v[0:1], v[0:1], v[8:9]
+; GFX10-NEXT: v_max_f64 v[2:3], v[2:3], v[10:11]
+; GFX10-NEXT: v_max_f64 v[4:5], v[4:5], v[12:13]
+; GFX10-NEXT: v_max_f64 v[6:7], v[6:7], v[14:15]
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_maximum_v4f64__nnan_nsz:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_cmp_gt_f64_e32 vcc_lo, v[0:1], v[8:9]
-; GFX11-NEXT: v_cmp_gt_f64_e64 s0, v[2:3], v[10:11]
-; GFX11-NEXT: v_cmp_gt_f64_e64 s1, v[4:5], v[12:13]
-; GFX11-NEXT: v_cmp_gt_f64_e64 s2, v[6:7], v[14:15]
-; GFX11-NEXT: v_dual_cndmask_b32 v0, v8, v0 :: v_dual_cndmask_b32 v1, v9, v1
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
-; GFX11-NEXT: v_cndmask_b32_e64 v2, v10, v2, s0
-; GFX11-NEXT: v_cndmask_b32_e64 v4, v12, v4, s1
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4)
-; GFX11-NEXT: v_cndmask_b32_e64 v6, v14, v6, s2
-; GFX11-NEXT: v_cndmask_b32_e64 v3, v11, v3, s0
-; GFX11-NEXT: v_cndmask_b32_e64 v5, v13, v5, s1
-; GFX11-NEXT: v_cndmask_b32_e64 v7, v15, v7, s2
+; GFX11-NEXT: v_max_f64 v[0:1], v[0:1], v[8:9]
+; GFX11-NEXT: v_max_f64 v[2:3], v[2:3], v[10:11]
+; GFX11-NEXT: v_max_f64 v[4:5], v[4:5], v[12:13]
+; GFX11-NEXT: v_max_f64 v[6:7], v[6:7], v[14:15]
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_maximum_v4f64__nnan_nsz:
@@ -3475,782 +1801,244 @@ define <8 x double> @v_maximum_v8f64(<8 x double> %src0, <8 x double> %src1) {
; GFX7-LABEL: v_maximum_v8f64:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_cmp_gt_f64_e32 vcc, v[0:1], v[16:17]
-; GFX7-NEXT: v_cmp_o_f64_e64 s[4:5], v[0:1], v[16:17]
-; GFX7-NEXT: v_mov_b32_e32 v32, 0x7ff80000
-; GFX7-NEXT: v_cmp_o_f64_e64 s[6:7], v[4:5], v[20:21]
-; GFX7-NEXT: v_cmp_o_f64_e64 s[8:9], v[8:9], v[24:25]
-; GFX7-NEXT: v_cmp_gt_f64_e64 s[10:11], v[12:13], v[28:29]
-; GFX7-NEXT: v_cmp_o_f64_e64 s[12:13], v[12:13], v[28:29]
-; GFX7-NEXT: v_cndmask_b32_e32 v31, v17, v1, vcc
-; GFX7-NEXT: v_cndmask_b32_e64 v34, v32, v31, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e32 v31, v16, v0, vcc
-; GFX7-NEXT: v_cmp_class_f64_e64 vcc, v[0:1], 64
-; GFX7-NEXT: v_cndmask_b32_e64 v33, 0, v31, s[4:5]
-; GFX7-NEXT: v_cmp_o_f64_e64 s[4:5], v[2:3], v[18:19]
; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v33, v0, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v34, v1, vcc
-; GFX7-NEXT: v_cmp_class_f64_e64 vcc, v[16:17], 64
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v0, v16, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v1, v17, vcc
-; GFX7-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[33:34]
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v33, v0, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v34, v1, vcc
-; GFX7-NEXT: v_cmp_gt_f64_e32 vcc, v[2:3], v[18:19]
-; GFX7-NEXT: v_cndmask_b32_e32 v16, v19, v3, vcc
-; GFX7-NEXT: v_cndmask_b32_e64 v17, v32, v16, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e32 v16, v18, v2, vcc
-; GFX7-NEXT: v_cmp_class_f64_e64 vcc, v[2:3], 64
-; GFX7-NEXT: v_cndmask_b32_e64 v16, 0, v16, s[4:5]
-; GFX7-NEXT: v_cmp_gt_f64_e64 s[4:5], v[4:5], v[20:21]
-; GFX7-NEXT: v_cndmask_b32_e32 v2, v16, v2, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v3, v17, v3, vcc
-; GFX7-NEXT: v_cmp_class_f64_e64 vcc, v[18:19], 64
-; GFX7-NEXT: v_cndmask_b32_e32 v2, v2, v18, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v3, v3, v19, vcc
-; GFX7-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[16:17]
-; GFX7-NEXT: v_cndmask_b32_e64 v18, v21, v5, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e32 v2, v16, v2, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v3, v17, v3, vcc
-; GFX7-NEXT: v_cmp_class_f64_e64 vcc, v[4:5], 64
-; GFX7-NEXT: v_cndmask_b32_e64 v16, v20, v4, s[4:5]
-; GFX7-NEXT: v_cmp_class_f64_e64 s[4:5], v[20:21], 64
-; GFX7-NEXT: v_cndmask_b32_e64 v17, v32, v18, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v16, 0, v16, s[6:7]
-; GFX7-NEXT: v_cmp_o_f64_e64 s[6:7], v[6:7], v[22:23]
-; GFX7-NEXT: v_cndmask_b32_e32 v4, v16, v4, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v5, v17, v5, vcc
-; GFX7-NEXT: v_cndmask_b32_e64 v4, v4, v20, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v5, v5, v21, s[4:5]
-; GFX7-NEXT: v_cmp_eq_f64_e64 s[4:5], 0, v[16:17]
-; GFX7-NEXT: v_cmp_gt_f64_e32 vcc, v[6:7], v[22:23]
-; GFX7-NEXT: v_cndmask_b32_e64 v4, v16, v4, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v5, v17, v5, s[4:5]
-; GFX7-NEXT: v_cmp_gt_f64_e64 s[4:5], v[8:9], v[24:25]
-; GFX7-NEXT: v_cndmask_b32_e32 v18, v23, v7, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v16, v22, v6, vcc
-; GFX7-NEXT: v_cmp_class_f64_e64 vcc, v[6:7], 64
-; GFX7-NEXT: v_cndmask_b32_e64 v19, v32, v18, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v18, 0, v16, s[6:7]
-; GFX7-NEXT: v_cmp_class_f64_e64 s[6:7], v[22:23], 64
-; GFX7-NEXT: v_cndmask_b32_e64 v16, v25, v9, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v17, v32, v16, s[8:9]
-; GFX7-NEXT: v_cndmask_b32_e64 v16, v24, v8, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e32 v6, v18, v6, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v7, v19, v7, vcc
-; GFX7-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[18:19]
-; GFX7-NEXT: v_cmp_class_f64_e64 s[4:5], v[8:9], 64
-; GFX7-NEXT: v_cndmask_b32_e64 v6, v6, v22, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v7, v7, v23, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v16, 0, v16, s[8:9]
-; GFX7-NEXT: v_cmp_class_f64_e64 s[6:7], v[24:25], 64
-; GFX7-NEXT: v_cmp_o_f64_e64 s[8:9], v[10:11], v[26:27]
-; GFX7-NEXT: v_cndmask_b32_e32 v6, v18, v6, vcc
-; GFX7-NEXT: v_cndmask_b32_e64 v8, v16, v8, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e32 v7, v19, v7, vcc
-; GFX7-NEXT: v_cmp_gt_f64_e32 vcc, v[10:11], v[26:27]
-; GFX7-NEXT: v_cndmask_b32_e64 v9, v17, v9, s[4:5]
-; GFX7-NEXT: v_cmp_eq_f64_e64 s[4:5], 0, v[16:17]
-; GFX7-NEXT: v_cndmask_b32_e64 v8, v8, v24, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v9, v9, v25, s[6:7]
-; GFX7-NEXT: v_cmp_class_f64_e64 s[6:7], v[26:27], 64
-; GFX7-NEXT: v_cndmask_b32_e32 v18, v27, v11, vcc
-; GFX7-NEXT: v_cndmask_b32_e64 v19, v32, v18, s[8:9]
-; GFX7-NEXT: v_cndmask_b32_e64 v8, v16, v8, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e32 v16, v26, v10, vcc
-; GFX7-NEXT: v_cmp_class_f64_e64 vcc, v[10:11], 64
-; GFX7-NEXT: v_cndmask_b32_e64 v18, 0, v16, s[8:9]
-; GFX7-NEXT: v_cmp_eq_f64_e64 s[8:9], 0, v[18:19]
-; GFX7-NEXT: v_cndmask_b32_e64 v9, v17, v9, s[4:5]
+; GFX7-NEXT: v_max_f64 v[32:33], v[2:3], v[18:19]
+; GFX7-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[18:19]
+; GFX7-NEXT: v_max_f64 v[18:19], v[4:5], v[20:21]
+; GFX7-NEXT: v_cmp_u_f64_e64 s[4:5], v[4:5], v[20:21]
+; GFX7-NEXT: v_max_f64 v[2:3], v[0:1], v[16:17]
+; GFX7-NEXT: v_cmp_u_f64_e64 s[8:9], v[0:1], v[16:17]
+; GFX7-NEXT: v_mov_b32_e32 v34, 0x7ff80000
+; GFX7-NEXT: v_max_f64 v[20:21], v[6:7], v[22:23]
+; GFX7-NEXT: v_cmp_u_f64_e64 s[6:7], v[6:7], v[22:23]
+; GFX7-NEXT: v_max_f64 v[16:17], v[8:9], v[24:25]
+; GFX7-NEXT: v_cmp_u_f64_e64 s[10:11], v[8:9], v[24:25]
+; GFX7-NEXT: v_max_f64 v[22:23], v[10:11], v[26:27]
+; GFX7-NEXT: v_cmp_u_f64_e64 s[12:13], v[10:11], v[26:27]
+; GFX7-NEXT: v_max_f64 v[24:25], v[12:13], v[28:29]
+; GFX7-NEXT: v_cmp_u_f64_e64 s[14:15], v[12:13], v[28:29]
+; GFX7-NEXT: v_cndmask_b32_e64 v0, v2, 0, s[8:9]
+; GFX7-NEXT: v_cndmask_b32_e64 v1, v3, v34, s[8:9]
+; GFX7-NEXT: v_cndmask_b32_e64 v2, v32, 0, vcc
+; GFX7-NEXT: v_cndmask_b32_e32 v3, v33, v34, vcc
+; GFX7-NEXT: v_cndmask_b32_e64 v4, v18, 0, s[4:5]
+; GFX7-NEXT: v_cndmask_b32_e64 v5, v19, v34, s[4:5]
+; GFX7-NEXT: v_cndmask_b32_e64 v6, v20, 0, s[6:7]
+; GFX7-NEXT: v_cndmask_b32_e64 v7, v21, v34, s[6:7]
+; GFX7-NEXT: v_cndmask_b32_e64 v8, v16, 0, s[10:11]
+; GFX7-NEXT: v_cndmask_b32_e64 v9, v17, v34, s[10:11]
+; GFX7-NEXT: v_cndmask_b32_e64 v10, v22, 0, s[12:13]
+; GFX7-NEXT: v_cndmask_b32_e64 v11, v23, v34, s[12:13]
+; GFX7-NEXT: v_cndmask_b32_e64 v12, v24, 0, s[14:15]
+; GFX7-NEXT: v_cndmask_b32_e64 v13, v25, v34, s[14:15]
; GFX7-NEXT: s_waitcnt vmcnt(0)
-; GFX7-NEXT: v_cmp_o_f64_e64 s[4:5], v[14:15], v[30:31]
-; GFX7-NEXT: v_cndmask_b32_e64 v16, v29, v13, s[10:11]
-; GFX7-NEXT: v_cndmask_b32_e64 v17, v32, v16, s[12:13]
-; GFX7-NEXT: v_cndmask_b32_e64 v16, v28, v12, s[10:11]
-; GFX7-NEXT: v_cndmask_b32_e32 v10, v18, v10, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v11, v19, v11, vcc
-; GFX7-NEXT: v_cmp_gt_f64_e32 vcc, v[14:15], v[30:31]
-; GFX7-NEXT: v_cndmask_b32_e64 v10, v10, v26, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v11, v11, v27, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v10, v18, v10, s[8:9]
-; GFX7-NEXT: v_cndmask_b32_e64 v11, v19, v11, s[8:9]
-; GFX7-NEXT: v_cmp_class_f64_e64 s[6:7], v[12:13], 64
-; GFX7-NEXT: v_cndmask_b32_e64 v16, 0, v16, s[12:13]
-; GFX7-NEXT: v_cmp_class_f64_e64 s[8:9], v[28:29], 64
-; GFX7-NEXT: v_cndmask_b32_e32 v18, v31, v15, vcc
-; GFX7-NEXT: v_cndmask_b32_e64 v19, v32, v18, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e32 v18, v30, v14, vcc
-; GFX7-NEXT: v_cmp_class_f64_e64 vcc, v[14:15], 64
-; GFX7-NEXT: v_cndmask_b32_e64 v18, 0, v18, s[4:5]
-; GFX7-NEXT: v_cmp_class_f64_e64 s[4:5], v[30:31], 64
-; GFX7-NEXT: v_cmp_eq_f64_e64 s[10:11], 0, v[16:17]
-; GFX7-NEXT: v_cmp_eq_f64_e64 s[12:13], 0, v[18:19]
-; GFX7-NEXT: v_cndmask_b32_e64 v12, v16, v12, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v13, v17, v13, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v12, v12, v28, s[8:9]
-; GFX7-NEXT: v_cndmask_b32_e32 v14, v18, v14, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v15, v19, v15, vcc
-; GFX7-NEXT: v_cndmask_b32_e64 v14, v14, v30, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v13, v13, v29, s[8:9]
-; GFX7-NEXT: v_cndmask_b32_e64 v15, v15, v31, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v12, v16, v12, s[10:11]
-; GFX7-NEXT: v_cndmask_b32_e64 v13, v17, v13, s[10:11]
-; GFX7-NEXT: v_cndmask_b32_e64 v14, v18, v14, s[12:13]
-; GFX7-NEXT: v_cndmask_b32_e64 v15, v19, v15, s[12:13]
+; GFX7-NEXT: v_max_f64 v[18:19], v[14:15], v[30:31]
+; GFX7-NEXT: v_cmp_u_f64_e32 vcc, v[14:15], v[30:31]
+; GFX7-NEXT: v_cndmask_b32_e64 v14, v18, 0, vcc
+; GFX7-NEXT: v_cndmask_b32_e32 v15, v19, v34, vcc
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_maximum_v8f64:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_cmp_gt_f64_e32 vcc, v[0:1], v[16:17]
-; GFX8-NEXT: v_cmp_o_f64_e64 s[4:5], v[0:1], v[16:17]
-; GFX8-NEXT: v_mov_b32_e32 v32, 0x7ff80000
-; GFX8-NEXT: v_cmp_o_f64_e64 s[6:7], v[4:5], v[20:21]
-; GFX8-NEXT: v_cmp_o_f64_e64 s[8:9], v[8:9], v[24:25]
-; GFX8-NEXT: v_cmp_gt_f64_e64 s[10:11], v[12:13], v[28:29]
-; GFX8-NEXT: v_cmp_o_f64_e64 s[12:13], v[12:13], v[28:29]
-; GFX8-NEXT: v_cndmask_b32_e32 v31, v17, v1, vcc
-; GFX8-NEXT: v_cndmask_b32_e64 v34, v32, v31, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e32 v31, v16, v0, vcc
-; GFX8-NEXT: v_cmp_class_f64_e64 vcc, v[0:1], 64
-; GFX8-NEXT: v_cndmask_b32_e64 v33, 0, v31, s[4:5]
-; GFX8-NEXT: v_cmp_o_f64_e64 s[4:5], v[2:3], v[18:19]
; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v33, v0, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v34, v1, vcc
-; GFX8-NEXT: v_cmp_class_f64_e64 vcc, v[16:17], 64
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v16, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v17, vcc
-; GFX8-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[33:34]
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v33, v0, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v34, v1, vcc
-; GFX8-NEXT: v_cmp_gt_f64_e32 vcc, v[2:3], v[18:19]
-; GFX8-NEXT: v_cndmask_b32_e32 v16, v19, v3, vcc
-; GFX8-NEXT: v_cndmask_b32_e64 v17, v32, v16, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e32 v16, v18, v2, vcc
-; GFX8-NEXT: v_cmp_class_f64_e64 vcc, v[2:3], 64
-; GFX8-NEXT: v_cndmask_b32_e64 v16, 0, v16, s[4:5]
-; GFX8-NEXT: v_cmp_gt_f64_e64 s[4:5], v[4:5], v[20:21]
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v16, v2, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v3, v17, v3, vcc
-; GFX8-NEXT: v_cmp_class_f64_e64 vcc, v[18:19], 64
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v18, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v3, v3, v19, vcc
-; GFX8-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[16:17]
-; GFX8-NEXT: v_cndmask_b32_e64 v18, v21, v5, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v16, v2, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v3, v17, v3, vcc
-; GFX8-NEXT: v_cmp_class_f64_e64 vcc, v[4:5], 64
-; GFX8-NEXT: v_cndmask_b32_e64 v16, v20, v4, s[4:5]
-; GFX8-NEXT: v_cmp_class_f64_e64 s[4:5], v[20:21], 64
-; GFX8-NEXT: v_cndmask_b32_e64 v17, v32, v18, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v16, 0, v16, s[6:7]
-; GFX8-NEXT: v_cmp_o_f64_e64 s[6:7], v[6:7], v[22:23]
-; GFX8-NEXT: v_cndmask_b32_e32 v4, v16, v4, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v5, v17, v5, vcc
-; GFX8-NEXT: v_cndmask_b32_e64 v4, v4, v20, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v5, v5, v21, s[4:5]
-; GFX8-NEXT: v_cmp_eq_f64_e64 s[4:5], 0, v[16:17]
-; GFX8-NEXT: v_cmp_gt_f64_e32 vcc, v[6:7], v[22:23]
-; GFX8-NEXT: v_cndmask_b32_e64 v4, v16, v4, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v5, v17, v5, s[4:5]
-; GFX8-NEXT: v_cmp_gt_f64_e64 s[4:5], v[8:9], v[24:25]
-; GFX8-NEXT: v_cndmask_b32_e32 v18, v23, v7, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v16, v22, v6, vcc
-; GFX8-NEXT: v_cmp_class_f64_e64 vcc, v[6:7], 64
-; GFX8-NEXT: v_cndmask_b32_e64 v19, v32, v18, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v18, 0, v16, s[6:7]
-; GFX8-NEXT: v_cmp_class_f64_e64 s[6:7], v[22:23], 64
-; GFX8-NEXT: v_cndmask_b32_e64 v16, v25, v9, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v17, v32, v16, s[8:9]
-; GFX8-NEXT: v_cndmask_b32_e64 v16, v24, v8, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e32 v6, v18, v6, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v7, v19, v7, vcc
-; GFX8-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[18:19]
-; GFX8-NEXT: v_cmp_class_f64_e64 s[4:5], v[8:9], 64
-; GFX8-NEXT: v_cndmask_b32_e64 v6, v6, v22, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v7, v7, v23, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v16, 0, v16, s[8:9]
-; GFX8-NEXT: v_cmp_class_f64_e64 s[6:7], v[24:25], 64
-; GFX8-NEXT: v_cmp_o_f64_e64 s[8:9], v[10:11], v[26:27]
-; GFX8-NEXT: v_cndmask_b32_e32 v6, v18, v6, vcc
-; GFX8-NEXT: v_cndmask_b32_e64 v8, v16, v8, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e32 v7, v19, v7, vcc
-; GFX8-NEXT: v_cmp_gt_f64_e32 vcc, v[10:11], v[26:27]
-; GFX8-NEXT: v_cndmask_b32_e64 v9, v17, v9, s[4:5]
-; GFX8-NEXT: v_cmp_eq_f64_e64 s[4:5], 0, v[16:17]
-; GFX8-NEXT: v_cndmask_b32_e64 v8, v8, v24, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v9, v9, v25, s[6:7]
-; GFX8-NEXT: v_cmp_class_f64_e64 s[6:7], v[26:27], 64
-; GFX8-NEXT: v_cndmask_b32_e32 v18, v27, v11, vcc
-; GFX8-NEXT: v_cndmask_b32_e64 v19, v32, v18, s[8:9]
-; GFX8-NEXT: v_cndmask_b32_e64 v8, v16, v8, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e32 v16, v26, v10, vcc
-; GFX8-NEXT: v_cmp_class_f64_e64 vcc, v[10:11], 64
-; GFX8-NEXT: v_cndmask_b32_e64 v18, 0, v16, s[8:9]
-; GFX8-NEXT: v_cmp_eq_f64_e64 s[8:9], 0, v[18:19]
-; GFX8-NEXT: v_cndmask_b32_e64 v9, v17, v9, s[4:5]
+; GFX8-NEXT: v_max_f64 v[32:33], v[2:3], v[18:19]
+; GFX8-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[18:19]
+; GFX8-NEXT: v_max_f64 v[18:19], v[4:5], v[20:21]
+; GFX8-NEXT: v_cmp_u_f64_e64 s[4:5], v[4:5], v[20:21]
+; GFX8-NEXT: v_max_f64 v[2:3], v[0:1], v[16:17]
+; GFX8-NEXT: v_cmp_u_f64_e64 s[8:9], v[0:1], v[16:17]
+; GFX8-NEXT: v_mov_b32_e32 v34, 0x7ff80000
+; GFX8-NEXT: v_max_f64 v[20:21], v[6:7], v[22:23]
+; GFX8-NEXT: v_cmp_u_f64_e64 s[6:7], v[6:7], v[22:23]
+; GFX8-NEXT: v_max_f64 v[16:17], v[8:9], v[24:25]
+; GFX8-NEXT: v_cmp_u_f64_e64 s[10:11], v[8:9], v[24:25]
+; GFX8-NEXT: v_max_f64 v[22:23], v[10:11], v[26:27]
+; GFX8-NEXT: v_cmp_u_f64_e64 s[12:13], v[10:11], v[26:27]
+; GFX8-NEXT: v_max_f64 v[24:25], v[12:13], v[28:29]
+; GFX8-NEXT: v_cmp_u_f64_e64 s[14:15], v[12:13], v[28:29]
+; GFX8-NEXT: v_cndmask_b32_e64 v0, v2, 0, s[8:9]
+; GFX8-NEXT: v_cndmask_b32_e64 v1, v3, v34, s[8:9]
+; GFX8-NEXT: v_cndmask_b32_e64 v2, v32, 0, vcc
+; GFX8-NEXT: v_cndmask_b32_e32 v3, v33, v34, vcc
+; GFX8-NEXT: v_cndmask_b32_e64 v4, v18, 0, s[4:5]
+; GFX8-NEXT: v_cndmask_b32_e64 v5, v19, v34, s[4:5]
+; GFX8-NEXT: v_cndmask_b32_e64 v6, v20, 0, s[6:7]
+; GFX8-NEXT: v_cndmask_b32_e64 v7, v21, v34, s[6:7]
+; GFX8-NEXT: v_cndmask_b32_e64 v8, v16, 0, s[10:11]
+; GFX8-NEXT: v_cndmask_b32_e64 v9, v17, v34, s[10:11]
+; GFX8-NEXT: v_cndmask_b32_e64 v10, v22, 0, s[12:13]
+; GFX8-NEXT: v_cndmask_b32_e64 v11, v23, v34, s[12:13]
+; GFX8-NEXT: v_cndmask_b32_e64 v12, v24, 0, s[14:15]
+; GFX8-NEXT: v_cndmask_b32_e64 v13, v25, v34, s[14:15]
; GFX8-NEXT: s_waitcnt vmcnt(0)
-; GFX8-NEXT: v_cmp_o_f64_e64 s[4:5], v[14:15], v[30:31]
-; GFX8-NEXT: v_cndmask_b32_e64 v16, v29, v13, s[10:11]
-; GFX8-NEXT: v_cndmask_b32_e64 v17, v32, v16, s[12:13]
-; GFX8-NEXT: v_cndmask_b32_e64 v16, v28, v12, s[10:11]
-; GFX8-NEXT: v_cndmask_b32_e32 v10, v18, v10, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v11, v19, v11, vcc
-; GFX8-NEXT: v_cmp_gt_f64_e32 vcc, v[14:15], v[30:31]
-; GFX8-NEXT: v_cndmask_b32_e64 v10, v10, v26, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v11, v11, v27, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v10, v18, v10, s[8:9]
-; GFX8-NEXT: v_cndmask_b32_e64 v11, v19, v11, s[8:9]
-; GFX8-NEXT: v_cmp_class_f64_e64 s[6:7], v[12:13], 64
-; GFX8-NEXT: v_cndmask_b32_e64 v16, 0, v16, s[12:13]
-; GFX8-NEXT: v_cmp_class_f64_e64 s[8:9], v[28:29], 64
-; GFX8-NEXT: v_cndmask_b32_e32 v18, v31, v15, vcc
-; GFX8-NEXT: v_cndmask_b32_e64 v19, v32, v18, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e32 v18, v30, v14, vcc
-; GFX8-NEXT: v_cmp_class_f64_e64 vcc, v[14:15], 64
-; GFX8-NEXT: v_cndmask_b32_e64 v18, 0, v18, s[4:5]
-; GFX8-NEXT: v_cmp_class_f64_e64 s[4:5], v[30:31], 64
-; GFX8-NEXT: v_cmp_eq_f64_e64 s[10:11], 0, v[16:17]
-; GFX8-NEXT: v_cmp_eq_f64_e64 s[12:13], 0, v[18:19]
-; GFX8-NEXT: v_cndmask_b32_e64 v12, v16, v12, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v13, v17, v13, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v12, v12, v28, s[8:9]
-; GFX8-NEXT: v_cndmask_b32_e32 v14, v18, v14, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v15, v19, v15, vcc
-; GFX8-NEXT: v_cndmask_b32_e64 v14, v14, v30, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v13, v13, v29, s[8:9]
-; GFX8-NEXT: v_cndmask_b32_e64 v15, v15, v31, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v12, v16, v12, s[10:11]
-; GFX8-NEXT: v_cndmask_b32_e64 v13, v17, v13, s[10:11]
-; GFX8-NEXT: v_cndmask_b32_e64 v14, v18, v14, s[12:13]
-; GFX8-NEXT: v_cndmask_b32_e64 v15, v19, v15, s[12:13]
+; GFX8-NEXT: v_max_f64 v[18:19], v[14:15], v[30:31]
+; GFX8-NEXT: v_cmp_u_f64_e32 vcc, v[14:15], v[30:31]
+; GFX8-NEXT: v_cndmask_b32_e64 v14, v18, 0, vcc
+; GFX8-NEXT: v_cndmask_b32_e32 v15, v19, v34, vcc
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_maximum_v8f64:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_cmp_gt_f64_e32 vcc, v[0:1], v[16:17]
-; GFX9-NEXT: v_cmp_o_f64_e64 s[4:5], v[0:1], v[16:17]
-; GFX9-NEXT: v_mov_b32_e32 v32, 0x7ff80000
-; GFX9-NEXT: v_cmp_o_f64_e64 s[6:7], v[4:5], v[20:21]
-; GFX9-NEXT: v_cmp_o_f64_e64 s[8:9], v[8:9], v[24:25]
-; GFX9-NEXT: v_cmp_gt_f64_e64 s[10:11], v[12:13], v[28:29]
-; GFX9-NEXT: v_cmp_o_f64_e64 s[12:13], v[12:13], v[28:29]
-; GFX9-NEXT: v_cndmask_b32_e32 v31, v17, v1, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v34, v32, v31, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e32 v31, v16, v0, vcc
-; GFX9-NEXT: v_cmp_class_f64_e64 vcc, v[0:1], 64
-; GFX9-NEXT: v_cndmask_b32_e64 v33, 0, v31, s[4:5]
-; GFX9-NEXT: v_cmp_o_f64_e64 s[4:5], v[2:3], v[18:19]
; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v33, v0, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v34, v1, vcc
-; GFX9-NEXT: v_cmp_class_f64_e64 vcc, v[16:17], 64
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v16, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v17, vcc
-; GFX9-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[33:34]
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v33, v0, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v34, v1, vcc
-; GFX9-NEXT: v_cmp_gt_f64_e32 vcc, v[2:3], v[18:19]
-; GFX9-NEXT: v_cndmask_b32_e32 v16, v19, v3, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v17, v32, v16, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e32 v16, v18, v2, vcc
-; GFX9-NEXT: v_cmp_class_f64_e64 vcc, v[2:3], 64
-; GFX9-NEXT: v_cndmask_b32_e64 v16, 0, v16, s[4:5]
-; GFX9-NEXT: v_cmp_gt_f64_e64 s[4:5], v[4:5], v[20:21]
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v16, v2, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v17, v3, vcc
-; GFX9-NEXT: v_cmp_class_f64_e64 vcc, v[18:19], 64
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v18, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v3, v19, vcc
-; GFX9-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[16:17]
-; GFX9-NEXT: v_cndmask_b32_e64 v18, v21, v5, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v16, v2, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v17, v3, vcc
-; GFX9-NEXT: v_cmp_class_f64_e64 vcc, v[4:5], 64
-; GFX9-NEXT: v_cndmask_b32_e64 v16, v20, v4, s[4:5]
-; GFX9-NEXT: v_cmp_class_f64_e64 s[4:5], v[20:21], 64
-; GFX9-NEXT: v_cndmask_b32_e64 v17, v32, v18, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v16, 0, v16, s[6:7]
-; GFX9-NEXT: v_cmp_o_f64_e64 s[6:7], v[6:7], v[22:23]
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v16, v4, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v5, v17, v5, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v4, v4, v20, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v5, v5, v21, s[4:5]
-; GFX9-NEXT: v_cmp_eq_f64_e64 s[4:5], 0, v[16:17]
-; GFX9-NEXT: v_cmp_gt_f64_e32 vcc, v[6:7], v[22:23]
-; GFX9-NEXT: v_cndmask_b32_e64 v4, v16, v4, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v5, v17, v5, s[4:5]
-; GFX9-NEXT: v_cmp_gt_f64_e64 s[4:5], v[8:9], v[24:25]
-; GFX9-NEXT: v_cndmask_b32_e32 v18, v23, v7, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v16, v22, v6, vcc
-; GFX9-NEXT: v_cmp_class_f64_e64 vcc, v[6:7], 64
-; GFX9-NEXT: v_cndmask_b32_e64 v19, v32, v18, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v18, 0, v16, s[6:7]
-; GFX9-NEXT: v_cmp_class_f64_e64 s[6:7], v[22:23], 64
-; GFX9-NEXT: v_cndmask_b32_e64 v16, v25, v9, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v17, v32, v16, s[8:9]
-; GFX9-NEXT: v_cndmask_b32_e64 v16, v24, v8, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e32 v6, v18, v6, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v7, v19, v7, vcc
-; GFX9-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[18:19]
-; GFX9-NEXT: v_cmp_class_f64_e64 s[4:5], v[8:9], 64
-; GFX9-NEXT: v_cndmask_b32_e64 v6, v6, v22, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v7, v7, v23, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v16, 0, v16, s[8:9]
-; GFX9-NEXT: v_cmp_class_f64_e64 s[6:7], v[24:25], 64
-; GFX9-NEXT: v_cmp_o_f64_e64 s[8:9], v[10:11], v[26:27]
-; GFX9-NEXT: v_cndmask_b32_e32 v6, v18, v6, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v8, v16, v8, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e32 v7, v19, v7, vcc
-; GFX9-NEXT: v_cmp_gt_f64_e32 vcc, v[10:11], v[26:27]
-; GFX9-NEXT: v_cndmask_b32_e64 v9, v17, v9, s[4:5]
-; GFX9-NEXT: v_cmp_eq_f64_e64 s[4:5], 0, v[16:17]
-; GFX9-NEXT: v_cndmask_b32_e64 v8, v8, v24, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v9, v9, v25, s[6:7]
-; GFX9-NEXT: v_cmp_class_f64_e64 s[6:7], v[26:27], 64
-; GFX9-NEXT: v_cndmask_b32_e32 v18, v27, v11, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v19, v32, v18, s[8:9]
-; GFX9-NEXT: v_cndmask_b32_e64 v8, v16, v8, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e32 v16, v26, v10, vcc
-; GFX9-NEXT: v_cmp_class_f64_e64 vcc, v[10:11], 64
-; GFX9-NEXT: v_cndmask_b32_e64 v18, 0, v16, s[8:9]
-; GFX9-NEXT: v_cmp_eq_f64_e64 s[8:9], 0, v[18:19]
-; GFX9-NEXT: v_cndmask_b32_e64 v9, v17, v9, s[4:5]
+; GFX9-NEXT: v_max_f64 v[32:33], v[2:3], v[18:19]
+; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[18:19]
+; GFX9-NEXT: v_max_f64 v[18:19], v[4:5], v[20:21]
+; GFX9-NEXT: v_cmp_u_f64_e64 s[4:5], v[4:5], v[20:21]
+; GFX9-NEXT: v_max_f64 v[2:3], v[0:1], v[16:17]
+; GFX9-NEXT: v_cmp_u_f64_e64 s[8:9], v[0:1], v[16:17]
+; GFX9-NEXT: v_mov_b32_e32 v34, 0x7ff80000
+; GFX9-NEXT: v_max_f64 v[20:21], v[6:7], v[22:23]
+; GFX9-NEXT: v_cmp_u_f64_e64 s[6:7], v[6:7], v[22:23]
+; GFX9-NEXT: v_max_f64 v[16:17], v[8:9], v[24:25]
+; GFX9-NEXT: v_cmp_u_f64_e64 s[10:11], v[8:9], v[24:25]
+; GFX9-NEXT: v_max_f64 v[22:23], v[10:11], v[26:27]
+; GFX9-NEXT: v_cmp_u_f64_e64 s[12:13], v[10:11], v[26:27]
+; GFX9-NEXT: v_max_f64 v[24:25], v[12:13], v[28:29]
+; GFX9-NEXT: v_cmp_u_f64_e64 s[14:15], v[12:13], v[28:29]
+; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, s[8:9]
+; GFX9-NEXT: v_cndmask_b32_e64 v1, v3, v34, s[8:9]
+; GFX9-NEXT: v_cndmask_b32_e64 v2, v32, 0, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v3, v33, v34, vcc
+; GFX9-NEXT: v_cndmask_b32_e64 v4, v18, 0, s[4:5]
+; GFX9-NEXT: v_cndmask_b32_e64 v5, v19, v34, s[4:5]
+; GFX9-NEXT: v_cndmask_b32_e64 v6, v20, 0, s[6:7]
+; GFX9-NEXT: v_cndmask_b32_e64 v7, v21, v34, s[6:7]
+; GFX9-NEXT: v_cndmask_b32_e64 v8, v16, 0, s[10:11]
+; GFX9-NEXT: v_cndmask_b32_e64 v9, v17, v34, s[10:11]
+; GFX9-NEXT: v_cndmask_b32_e64 v10, v22, 0, s[12:13]
+; GFX9-NEXT: v_cndmask_b32_e64 v11, v23, v34, s[12:13]
+; GFX9-NEXT: v_cndmask_b32_e64 v12, v24, 0, s[14:15]
+; GFX9-NEXT: v_cndmask_b32_e64 v13, v25, v34, s[14:15]
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: v_cmp_o_f64_e64 s[4:5], v[14:15], v[30:31]
-; GFX9-NEXT: v_cndmask_b32_e64 v16, v29, v13, s[10:11]
-; GFX9-NEXT: v_cndmask_b32_e64 v17, v32, v16, s[12:13]
-; GFX9-NEXT: v_cndmask_b32_e64 v16, v28, v12, s[10:11]
-; GFX9-NEXT: v_cndmask_b32_e32 v10, v18, v10, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v11, v19, v11, vcc
-; GFX9-NEXT: v_cmp_gt_f64_e32 vcc, v[14:15], v[30:31]
-; GFX9-NEXT: v_cndmask_b32_e64 v10, v10, v26, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v11, v11, v27, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v10, v18, v10, s[8:9]
-; GFX9-NEXT: v_cndmask_b32_e64 v11, v19, v11, s[8:9]
-; GFX9-NEXT: v_cmp_class_f64_e64 s[6:7], v[12:13], 64
-; GFX9-NEXT: v_cndmask_b32_e64 v16, 0, v16, s[12:13]
-; GFX9-NEXT: v_cmp_class_f64_e64 s[8:9], v[28:29], 64
-; GFX9-NEXT: v_cndmask_b32_e32 v18, v31, v15, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v19, v32, v18, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e32 v18, v30, v14, vcc
-; GFX9-NEXT: v_cmp_class_f64_e64 vcc, v[14:15], 64
-; GFX9-NEXT: v_cndmask_b32_e64 v18, 0, v18, s[4:5]
-; GFX9-NEXT: v_cmp_class_f64_e64 s[4:5], v[30:31], 64
-; GFX9-NEXT: v_cmp_eq_f64_e64 s[10:11], 0, v[16:17]
-; GFX9-NEXT: v_cmp_eq_f64_e64 s[12:13], 0, v[18:19]
-; GFX9-NEXT: v_cndmask_b32_e64 v12, v16, v12, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v13, v17, v13, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v12, v12, v28, s[8:9]
-; GFX9-NEXT: v_cndmask_b32_e32 v14, v18, v14, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v15, v19, v15, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v14, v14, v30, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v13, v13, v29, s[8:9]
-; GFX9-NEXT: v_cndmask_b32_e64 v15, v15, v31, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v12, v16, v12, s[10:11]
-; GFX9-NEXT: v_cndmask_b32_e64 v13, v17, v13, s[10:11]
-; GFX9-NEXT: v_cndmask_b32_e64 v14, v18, v14, s[12:13]
-; GFX9-NEXT: v_cndmask_b32_e64 v15, v19, v15, s[12:13]
+; GFX9-NEXT: v_max_f64 v[18:19], v[14:15], v[30:31]
+; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[14:15], v[30:31]
+; GFX9-NEXT: v_cndmask_b32_e64 v14, v18, 0, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v15, v19, v34, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX940-LABEL: v_maximum_v8f64:
; GFX940: ; %bb.0:
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX940-NEXT: scratch_load_dword v31, off, s32
-; GFX940-NEXT: v_cmp_gt_f64_e32 vcc, v[0:1], v[16:17]
-; GFX940-NEXT: v_mov_b32_e32 v32, 0x7ff80000
-; GFX940-NEXT: v_cmp_o_f64_e64 s[0:1], v[0:1], v[16:17]
-; GFX940-NEXT: v_cndmask_b32_e32 v33, v17, v1, vcc
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e64 v35, v32, v33, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v33, v16, v0, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v34, 0, v33, s[0:1]
-; GFX940-NEXT: v_cmp_class_f64_e64 vcc, v[0:1], 64
-; GFX940-NEXT: v_cmp_class_f64_e64 s[0:1], v[16:17], 64
-; GFX940-NEXT: v_cmp_eq_f64_e64 s[2:3], 0, v[34:35]
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v34, v0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v35, v1, vcc
-; GFX940-NEXT: v_cmp_gt_f64_e32 vcc, v[2:3], v[18:19]
-; GFX940-NEXT: v_cndmask_b32_e64 v0, v0, v16, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v1, v1, v17, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v19, v3, vcc
-; GFX940-NEXT: v_cmp_o_f64_e64 s[0:1], v[2:3], v[18:19]
-; GFX940-NEXT: v_cndmask_b32_e64 v0, v34, v0, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v1, v35, v1, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v17, v32, v16, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v18, v2, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v16, 0, v16, s[0:1]
-; GFX940-NEXT: v_cmp_class_f64_e64 vcc, v[2:3], 64
-; GFX940-NEXT: v_cmp_class_f64_e64 s[0:1], v[18:19], 64
-; GFX940-NEXT: v_cmp_eq_f64_e64 s[2:3], 0, v[16:17]
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v16, v2, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v2, v2, v18, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v17, v3, vcc
-; GFX940-NEXT: v_cmp_gt_f64_e32 vcc, v[4:5], v[20:21]
-; GFX940-NEXT: v_cndmask_b32_e64 v2, v16, v2, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v3, v3, v19, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v21, v5, vcc
-; GFX940-NEXT: v_cmp_o_f64_e64 s[0:1], v[4:5], v[20:21]
-; GFX940-NEXT: v_cndmask_b32_e64 v3, v17, v3, s[2:3]
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e64 v17, v32, v16, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v20, v4, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v16, 0, v16, s[0:1]
-; GFX940-NEXT: v_cmp_class_f64_e64 vcc, v[4:5], 64
-; GFX940-NEXT: v_cmp_class_f64_e64 s[0:1], v[20:21], 64
-; GFX940-NEXT: v_cmp_eq_f64_e64 s[2:3], 0, v[16:17]
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v16, v4, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v4, v4, v20, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v5, v17, v5, vcc
-; GFX940-NEXT: v_cmp_gt_f64_e32 vcc, v[6:7], v[22:23]
-; GFX940-NEXT: v_cndmask_b32_e64 v4, v16, v4, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v5, v5, v21, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v23, v7, vcc
-; GFX940-NEXT: v_cmp_o_f64_e64 s[0:1], v[6:7], v[22:23]
-; GFX940-NEXT: v_cndmask_b32_e64 v5, v17, v5, s[2:3]
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e64 v17, v32, v16, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v22, v6, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v16, 0, v16, s[0:1]
-; GFX940-NEXT: v_cmp_class_f64_e64 vcc, v[6:7], 64
-; GFX940-NEXT: v_cmp_class_f64_e64 s[0:1], v[22:23], 64
-; GFX940-NEXT: v_cmp_eq_f64_e64 s[2:3], 0, v[16:17]
-; GFX940-NEXT: v_cndmask_b32_e32 v6, v16, v6, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v6, v6, v22, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v7, v17, v7, vcc
-; GFX940-NEXT: v_cmp_gt_f64_e32 vcc, v[8:9], v[24:25]
-; GFX940-NEXT: v_cndmask_b32_e64 v6, v16, v6, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v7, v7, v23, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v25, v9, vcc
-; GFX940-NEXT: v_cmp_o_f64_e64 s[0:1], v[8:9], v[24:25]
-; GFX940-NEXT: v_cndmask_b32_e64 v7, v17, v7, s[2:3]
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e64 v17, v32, v16, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v24, v8, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v16, 0, v16, s[0:1]
-; GFX940-NEXT: v_cmp_class_f64_e64 vcc, v[8:9], 64
-; GFX940-NEXT: v_cmp_class_f64_e64 s[0:1], v[24:25], 64
-; GFX940-NEXT: v_cmp_eq_f64_e64 s[2:3], 0, v[16:17]
-; GFX940-NEXT: v_cndmask_b32_e32 v8, v16, v8, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v8, v8, v24, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v9, v17, v9, vcc
-; GFX940-NEXT: v_cmp_gt_f64_e32 vcc, v[10:11], v[26:27]
-; GFX940-NEXT: v_cndmask_b32_e64 v8, v16, v8, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v9, v9, v25, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v27, v11, vcc
-; GFX940-NEXT: v_cmp_o_f64_e64 s[0:1], v[10:11], v[26:27]
-; GFX940-NEXT: v_cndmask_b32_e64 v9, v17, v9, s[2:3]
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e64 v17, v32, v16, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v26, v10, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v16, 0, v16, s[0:1]
-; GFX940-NEXT: v_cmp_class_f64_e64 vcc, v[10:11], 64
-; GFX940-NEXT: v_cmp_class_f64_e64 s[0:1], v[26:27], 64
-; GFX940-NEXT: v_cmp_eq_f64_e64 s[2:3], 0, v[16:17]
-; GFX940-NEXT: v_cndmask_b32_e32 v10, v16, v10, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v10, v10, v26, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v11, v17, v11, vcc
-; GFX940-NEXT: v_cmp_gt_f64_e32 vcc, v[12:13], v[28:29]
-; GFX940-NEXT: v_cndmask_b32_e64 v10, v16, v10, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v11, v11, v27, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v29, v13, vcc
-; GFX940-NEXT: v_cmp_o_f64_e64 s[0:1], v[12:13], v[28:29]
-; GFX940-NEXT: v_cndmask_b32_e64 v11, v17, v11, s[2:3]
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e64 v17, v32, v16, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v28, v12, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v16, 0, v16, s[0:1]
-; GFX940-NEXT: v_cmp_class_f64_e64 vcc, v[12:13], 64
-; GFX940-NEXT: v_cmp_class_f64_e64 s[0:1], v[28:29], 64
-; GFX940-NEXT: v_cmp_eq_f64_e64 s[2:3], 0, v[16:17]
-; GFX940-NEXT: v_cndmask_b32_e32 v12, v16, v12, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v12, v12, v28, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v13, v17, v13, vcc
+; GFX940-NEXT: v_mov_b32_e32 v54, 0x7ff80000
+; GFX940-NEXT: v_max_f64 v[32:33], v[0:1], v[16:17]
+; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[16:17]
+; GFX940-NEXT: v_max_f64 v[34:35], v[2:3], v[18:19]
+; GFX940-NEXT: v_max_f64 v[36:37], v[4:5], v[20:21]
+; GFX940-NEXT: v_cndmask_b32_e64 v0, v32, 0, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v1, v33, v54, vcc
+; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[18:19]
+; GFX940-NEXT: v_max_f64 v[38:39], v[6:7], v[22:23]
+; GFX940-NEXT: v_max_f64 v[48:49], v[8:9], v[24:25]
+; GFX940-NEXT: v_cndmask_b32_e64 v2, v34, 0, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v3, v35, v54, vcc
+; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[4:5], v[20:21]
+; GFX940-NEXT: v_max_f64 v[50:51], v[10:11], v[26:27]
+; GFX940-NEXT: v_max_f64 v[52:53], v[12:13], v[28:29]
+; GFX940-NEXT: v_cndmask_b32_e64 v4, v36, 0, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v5, v37, v54, vcc
+; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[6:7], v[22:23]
; GFX940-NEXT: s_waitcnt vmcnt(0)
-; GFX940-NEXT: v_cmp_gt_f64_e32 vcc, v[14:15], v[30:31]
-; GFX940-NEXT: v_cndmask_b32_e64 v12, v16, v12, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v13, v13, v29, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v31, v15, vcc
-; GFX940-NEXT: v_cmp_o_f64_e64 s[0:1], v[14:15], v[30:31]
-; GFX940-NEXT: v_cndmask_b32_e64 v13, v17, v13, s[2:3]
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e64 v17, v32, v16, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v30, v14, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v16, 0, v16, s[0:1]
-; GFX940-NEXT: v_cmp_class_f64_e64 vcc, v[14:15], 64
-; GFX940-NEXT: v_cmp_class_f64_e64 s[0:1], v[30:31], 64
-; GFX940-NEXT: v_cmp_eq_f64_e64 s[2:3], 0, v[16:17]
-; GFX940-NEXT: v_cndmask_b32_e32 v14, v16, v14, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v15, v17, v15, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v14, v14, v30, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v15, v15, v31, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v14, v16, v14, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v15, v17, v15, s[2:3]
+; GFX940-NEXT: v_max_f64 v[16:17], v[14:15], v[30:31]
+; GFX940-NEXT: v_cndmask_b32_e64 v6, v38, 0, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v7, v39, v54, vcc
+; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[8:9], v[24:25]
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e64 v8, v48, 0, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v9, v49, v54, vcc
+; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[10:11], v[26:27]
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e64 v10, v50, 0, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v11, v51, v54, vcc
+; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[12:13], v[28:29]
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e64 v12, v52, 0, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v13, v53, v54, vcc
+; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[14:15], v[30:31]
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e64 v14, v16, 0, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v15, v17, v54, vcc
; GFX940-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximum_v8f64:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: buffer_load_dword v31, off, s[0:3], s32
-; GFX10-NEXT: v_cmp_gt_f64_e32 vcc_lo, v[0:1], v[16:17]
-; GFX10-NEXT: v_cmp_o_f64_e64 s4, v[0:1], v[16:17]
-; GFX10-NEXT: v_cmp_gt_f64_e64 s9, v[6:7], v[22:23]
-; GFX10-NEXT: v_cmp_gt_f64_e64 s10, v[8:9], v[24:25]
-; GFX10-NEXT: v_cmp_gt_f64_e64 s11, v[10:11], v[26:27]
-; GFX10-NEXT: v_cmp_gt_f64_e64 s12, v[12:13], v[28:29]
-; GFX10-NEXT: v_cmp_o_f64_e64 s13, v[6:7], v[22:23]
-; GFX10-NEXT: v_cmp_o_f64_e64 s14, v[8:9], v[24:25]
-; GFX10-NEXT: v_cmp_o_f64_e64 s15, v[10:11], v[26:27]
-; GFX10-NEXT: v_cmp_o_f64_e64 s16, v[12:13], v[28:29]
-; GFX10-NEXT: v_cmp_gt_f64_e64 s5, v[2:3], v[18:19]
-; GFX10-NEXT: v_cmp_o_f64_e64 s6, v[2:3], v[18:19]
-; GFX10-NEXT: v_cmp_gt_f64_e64 s7, v[4:5], v[20:21]
-; GFX10-NEXT: v_cmp_o_f64_e64 s8, v[4:5], v[20:21]
-; GFX10-NEXT: v_cmp_class_f64_e64 s17, v[26:27], 64
-; GFX10-NEXT: v_cmp_class_f64_e64 s18, v[28:29], 64
-; GFX10-NEXT: v_cndmask_b32_e32 v32, v17, v1, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v38, v23, v7, s9
-; GFX10-NEXT: v_cndmask_b32_e64 v48, v25, v9, s10
-; GFX10-NEXT: v_cndmask_b32_e64 v50, v27, v11, s11
-; GFX10-NEXT: v_cndmask_b32_e64 v33, 0x7ff80000, v32, s4
-; GFX10-NEXT: v_cndmask_b32_e32 v32, v16, v0, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v52, v29, v13, s12
-; GFX10-NEXT: v_cmp_class_f64_e64 vcc_lo, v[0:1], 64
-; GFX10-NEXT: v_cndmask_b32_e64 v39, 0x7ff80000, v38, s13
-; GFX10-NEXT: v_cndmask_b32_e64 v49, 0x7ff80000, v48, s14
-; GFX10-NEXT: v_cndmask_b32_e64 v32, 0, v32, s4
-; GFX10-NEXT: v_cmp_class_f64_e64 s4, v[2:3], 64
-; GFX10-NEXT: v_cndmask_b32_e64 v51, 0x7ff80000, v50, s15
-; GFX10-NEXT: v_cndmask_b32_e64 v53, 0x7ff80000, v52, s16
-; GFX10-NEXT: v_cndmask_b32_e64 v38, v22, v6, s9
-; GFX10-NEXT: v_cndmask_b32_e64 v48, v24, v8, s10
-; GFX10-NEXT: v_cndmask_b32_e64 v50, v26, v10, s11
-; GFX10-NEXT: v_cndmask_b32_e64 v52, v28, v12, s12
-; GFX10-NEXT: v_cmp_class_f64_e64 s11, v[16:17], 64
-; GFX10-NEXT: v_cmp_class_f64_e64 s12, v[18:19], 64
-; GFX10-NEXT: v_cndmask_b32_e64 v34, v19, v3, s5
-; GFX10-NEXT: v_cndmask_b32_e64 v38, 0, v38, s13
-; GFX10-NEXT: v_cndmask_b32_e64 v36, v21, v5, s7
-; GFX10-NEXT: v_cmp_class_f64_e64 s9, v[12:13], 64
-; GFX10-NEXT: v_cndmask_b32_e64 v48, 0, v48, s14
-; GFX10-NEXT: v_cndmask_b32_e64 v35, 0x7ff80000, v34, s6
-; GFX10-NEXT: v_cndmask_b32_e64 v34, v18, v2, s5
-; GFX10-NEXT: v_cndmask_b32_e64 v37, 0x7ff80000, v36, s8
-; GFX10-NEXT: v_cndmask_b32_e64 v36, v20, v4, s7
-; GFX10-NEXT: v_cndmask_b32_e32 v0, v32, v0, vcc_lo
-; GFX10-NEXT: v_cmp_class_f64_e64 s5, v[4:5], 64
-; GFX10-NEXT: v_cndmask_b32_e64 v34, 0, v34, s6
-; GFX10-NEXT: v_cmp_class_f64_e64 s6, v[6:7], 64
-; GFX10-NEXT: v_cndmask_b32_e64 v36, 0, v36, s8
-; GFX10-NEXT: v_cmp_class_f64_e64 s7, v[8:9], 64
-; GFX10-NEXT: v_cmp_class_f64_e64 s8, v[10:11], 64
-; GFX10-NEXT: v_cndmask_b32_e64 v2, v34, v2, s4
-; GFX10-NEXT: v_cndmask_b32_e64 v50, 0, v50, s15
-; GFX10-NEXT: v_cndmask_b32_e64 v52, 0, v52, s16
-; GFX10-NEXT: v_cmp_class_f64_e64 s14, v[20:21], 64
-; GFX10-NEXT: v_cndmask_b32_e64 v0, v0, v16, s11
-; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, v18, s12
-; GFX10-NEXT: v_cmp_class_f64_e64 s15, v[22:23], 64
-; GFX10-NEXT: v_cmp_class_f64_e64 s16, v[24:25], 64
-; GFX10-NEXT: v_cmp_eq_f64_e64 s19, 0, v[32:33]
-; GFX10-NEXT: v_cmp_eq_f64_e64 s20, 0, v[34:35]
-; GFX10-NEXT: v_cmp_eq_f64_e64 s21, 0, v[36:37]
-; GFX10-NEXT: v_cmp_eq_f64_e64 s22, 0, v[48:49]
-; GFX10-NEXT: v_cmp_eq_f64_e64 s23, 0, v[50:51]
-; GFX10-NEXT: v_cmp_eq_f64_e64 s24, 0, v[52:53]
-; GFX10-NEXT: v_cndmask_b32_e32 v1, v33, v1, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v12, v52, v12, s9
-; GFX10-NEXT: v_cndmask_b32_e64 v4, v36, v4, s5
-; GFX10-NEXT: v_cndmask_b32_e64 v3, v35, v3, s4
-; GFX10-NEXT: v_cndmask_b32_e64 v6, v38, v6, s6
-; GFX10-NEXT: v_cndmask_b32_e64 v5, v37, v5, s5
-; GFX10-NEXT: v_cndmask_b32_e64 v8, v48, v8, s7
-; GFX10-NEXT: v_cndmask_b32_e64 v10, v50, v10, s8
-; GFX10-NEXT: v_cndmask_b32_e64 v7, v39, v7, s6
-; GFX10-NEXT: v_cndmask_b32_e64 v9, v49, v9, s7
-; GFX10-NEXT: v_cndmask_b32_e64 v11, v51, v11, s8
-; GFX10-NEXT: v_cndmask_b32_e64 v13, v53, v13, s9
-; GFX10-NEXT: v_cndmask_b32_e64 v4, v4, v20, s14
-; GFX10-NEXT: v_cndmask_b32_e64 v10, v10, v26, s17
-; GFX10-NEXT: v_cndmask_b32_e64 v6, v6, v22, s15
-; GFX10-NEXT: v_cndmask_b32_e64 v8, v8, v24, s16
-; GFX10-NEXT: v_cndmask_b32_e64 v12, v12, v28, s18
-; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, v17, s11
-; GFX10-NEXT: v_cndmask_b32_e64 v3, v3, v19, s12
-; GFX10-NEXT: v_cndmask_b32_e64 v5, v5, v21, s14
-; GFX10-NEXT: v_cndmask_b32_e64 v7, v7, v23, s15
-; GFX10-NEXT: v_cndmask_b32_e64 v9, v9, v25, s16
-; GFX10-NEXT: v_cndmask_b32_e64 v11, v11, v27, s17
-; GFX10-NEXT: v_cndmask_b32_e64 v13, v13, v29, s18
-; GFX10-NEXT: v_cndmask_b32_e64 v0, v32, v0, s19
-; GFX10-NEXT: v_cndmask_b32_e64 v2, v34, v2, s20
-; GFX10-NEXT: v_cndmask_b32_e64 v4, v36, v4, s21
-; GFX10-NEXT: v_cndmask_b32_e64 v8, v48, v8, s22
-; GFX10-NEXT: v_cndmask_b32_e64 v10, v50, v10, s23
-; GFX10-NEXT: v_cndmask_b32_e64 v12, v52, v12, s24
-; GFX10-NEXT: v_cndmask_b32_e64 v1, v33, v1, s19
-; GFX10-NEXT: v_cndmask_b32_e64 v3, v35, v3, s20
-; GFX10-NEXT: v_cndmask_b32_e64 v5, v37, v5, s21
-; GFX10-NEXT: v_cndmask_b32_e64 v9, v49, v9, s22
-; GFX10-NEXT: v_cndmask_b32_e64 v11, v51, v11, s23
-; GFX10-NEXT: v_cndmask_b32_e64 v13, v53, v13, s24
+; GFX10-NEXT: v_max_f64 v[32:33], v[0:1], v[16:17]
+; GFX10-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[16:17]
+; GFX10-NEXT: v_max_f64 v[16:17], v[2:3], v[18:19]
+; GFX10-NEXT: v_cmp_u_f64_e64 s4, v[2:3], v[18:19]
+; GFX10-NEXT: v_max_f64 v[18:19], v[4:5], v[20:21]
+; GFX10-NEXT: v_cmp_u_f64_e64 s5, v[4:5], v[20:21]
+; GFX10-NEXT: v_max_f64 v[20:21], v[6:7], v[22:23]
+; GFX10-NEXT: v_cmp_u_f64_e64 s6, v[6:7], v[22:23]
+; GFX10-NEXT: v_max_f64 v[22:23], v[8:9], v[24:25]
+; GFX10-NEXT: v_cmp_u_f64_e64 s7, v[8:9], v[24:25]
+; GFX10-NEXT: v_max_f64 v[24:25], v[10:11], v[26:27]
+; GFX10-NEXT: v_cmp_u_f64_e64 s8, v[10:11], v[26:27]
+; GFX10-NEXT: v_max_f64 v[26:27], v[12:13], v[28:29]
+; GFX10-NEXT: v_cmp_u_f64_e64 s9, v[12:13], v[28:29]
+; GFX10-NEXT: v_cndmask_b32_e64 v0, v32, 0, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e64 v1, v33, 0x7ff80000, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e64 v2, v16, 0, s4
+; GFX10-NEXT: v_cndmask_b32_e64 v3, v17, 0x7ff80000, s4
+; GFX10-NEXT: v_cndmask_b32_e64 v4, v18, 0, s5
+; GFX10-NEXT: v_cndmask_b32_e64 v5, v19, 0x7ff80000, s5
+; GFX10-NEXT: v_cndmask_b32_e64 v6, v20, 0, s6
+; GFX10-NEXT: v_cndmask_b32_e64 v7, v21, 0x7ff80000, s6
+; GFX10-NEXT: v_cndmask_b32_e64 v8, v22, 0, s7
+; GFX10-NEXT: v_cndmask_b32_e64 v9, v23, 0x7ff80000, s7
+; GFX10-NEXT: v_cndmask_b32_e64 v10, v24, 0, s8
+; GFX10-NEXT: v_cndmask_b32_e64 v11, v25, 0x7ff80000, s8
+; GFX10-NEXT: v_cndmask_b32_e64 v12, v26, 0, s9
+; GFX10-NEXT: v_cndmask_b32_e64 v13, v27, 0x7ff80000, s9
; GFX10-NEXT: s_waitcnt vmcnt(0)
-; GFX10-NEXT: v_cmp_gt_f64_e64 s10, v[14:15], v[30:31]
-; GFX10-NEXT: v_cmp_o_f64_e64 s13, v[14:15], v[30:31]
-; GFX10-NEXT: v_cmp_class_f64_e64 s25, v[30:31], 64
-; GFX10-NEXT: v_cndmask_b32_e64 v16, v31, v15, s10
-; GFX10-NEXT: v_cndmask_b32_e64 v18, v30, v14, s10
-; GFX10-NEXT: v_cmp_eq_f64_e64 s10, 0, v[38:39]
-; GFX10-NEXT: v_cndmask_b32_e64 v55, 0x7ff80000, v16, s13
-; GFX10-NEXT: v_cndmask_b32_e64 v54, 0, v18, s13
-; GFX10-NEXT: v_cmp_class_f64_e64 s13, v[14:15], 64
-; GFX10-NEXT: v_cmp_eq_f64_e32 vcc_lo, 0, v[54:55]
-; GFX10-NEXT: v_cndmask_b32_e64 v6, v38, v6, s10
-; GFX10-NEXT: v_cndmask_b32_e64 v7, v39, v7, s10
-; GFX10-NEXT: v_cndmask_b32_e64 v14, v54, v14, s13
-; GFX10-NEXT: v_cndmask_b32_e64 v15, v55, v15, s13
-; GFX10-NEXT: v_cndmask_b32_e64 v14, v14, v30, s25
-; GFX10-NEXT: v_cndmask_b32_e64 v15, v15, v31, s25
-; GFX10-NEXT: v_cndmask_b32_e32 v14, v54, v14, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e32 v15, v55, v15, vcc_lo
+; GFX10-NEXT: v_max_f64 v[28:29], v[14:15], v[30:31]
+; GFX10-NEXT: v_cmp_u_f64_e64 s10, v[14:15], v[30:31]
+; GFX10-NEXT: v_cndmask_b32_e64 v14, v28, 0, s10
+; GFX10-NEXT: v_cndmask_b32_e64 v15, v29, 0x7ff80000, s10
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_maximum_v8f64:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: scratch_load_b32 v31, off, s32
-; GFX11-NEXT: v_cmp_gt_f64_e64 s4, v[6:7], v[22:23]
-; GFX11-NEXT: v_cmp_o_f64_e64 s9, v[6:7], v[22:23]
-; GFX11-NEXT: v_cmp_gt_f64_e64 s1, v[2:3], v[18:19]
-; GFX11-NEXT: v_cmp_gt_f64_e64 s6, v[10:11], v[26:27]
-; GFX11-NEXT: v_cmp_gt_f64_e32 vcc_lo, v[0:1], v[16:17]
-; GFX11-NEXT: v_cmp_o_f64_e64 s2, v[2:3], v[18:19]
-; GFX11-NEXT: v_cmp_o_f64_e64 s11, v[10:11], v[26:27]
-; GFX11-NEXT: v_cmp_o_f64_e64 s0, v[0:1], v[16:17]
-; GFX11-NEXT: v_cmp_gt_f64_e64 s3, v[4:5], v[20:21]
-; GFX11-NEXT: v_cmp_gt_f64_e64 s5, v[8:9], v[24:25]
-; GFX11-NEXT: v_cmp_gt_f64_e64 s7, v[12:13], v[28:29]
-; GFX11-NEXT: v_cmp_o_f64_e64 s8, v[4:5], v[20:21]
-; GFX11-NEXT: v_cmp_o_f64_e64 s10, v[8:9], v[24:25]
-; GFX11-NEXT: v_cmp_o_f64_e64 s12, v[12:13], v[28:29]
-; GFX11-NEXT: v_cmp_class_f64_e64 s13, v[18:19], 64
-; GFX11-NEXT: v_cmp_class_f64_e64 s15, v[20:21], 64
-; GFX11-NEXT: v_cndmask_b32_e64 v38, v23, v7, s4
-; GFX11-NEXT: v_cndmask_b32_e64 v34, v19, v3, s1
-; GFX11-NEXT: v_cndmask_b32_e64 v50, v27, v11, s6
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3)
-; GFX11-NEXT: v_cndmask_b32_e64 v39, 0x7ff80000, v38, s9
-; GFX11-NEXT: v_cndmask_b32_e64 v38, v22, v6, s4
-; GFX11-NEXT: v_cmp_class_f64_e64 s4, v[6:7], 64
-; GFX11-NEXT: v_cndmask_b32_e64 v35, 0x7ff80000, v34, s2
-; GFX11-NEXT: v_cndmask_b32_e64 v51, 0x7ff80000, v50, s11
-; GFX11-NEXT: v_cndmask_b32_e64 v34, v18, v2, s1
-; GFX11-NEXT: v_cndmask_b32_e64 v50, v26, v10, s6
-; GFX11-NEXT: v_cmp_class_f64_e64 s1, v[0:1], 64
-; GFX11-NEXT: v_cndmask_b32_e64 v36, v21, v5, s3
-; GFX11-NEXT: v_cndmask_b32_e64 v48, v25, v9, s5
-; GFX11-NEXT: v_cndmask_b32_e64 v52, v29, v13, s7
-; GFX11-NEXT: v_cndmask_b32_e64 v50, 0, v50, s11
-; GFX11-NEXT: v_cmp_class_f64_e64 s11, v[16:17], 64
-; GFX11-NEXT: v_cndmask_b32_e64 v37, 0x7ff80000, v36, s8
-; GFX11-NEXT: v_cndmask_b32_e64 v49, 0x7ff80000, v48, s10
-; GFX11-NEXT: v_cndmask_b32_e64 v53, 0x7ff80000, v52, s12
-; GFX11-NEXT: v_cndmask_b32_e64 v36, v20, v4, s3
-; GFX11-NEXT: v_cndmask_b32_e64 v48, v24, v8, s5
-; GFX11-NEXT: v_cndmask_b32_e64 v52, v28, v12, s7
-; GFX11-NEXT: v_cndmask_b32_e64 v34, 0, v34, s2
-; GFX11-NEXT: v_cndmask_b32_e64 v38, 0, v38, s9
-; GFX11-NEXT: v_cmp_class_f64_e64 s2, v[2:3], 64
-; GFX11-NEXT: v_cmp_class_f64_e64 s3, v[4:5], 64
-; GFX11-NEXT: v_cmp_class_f64_e64 s5, v[8:9], 64
-; GFX11-NEXT: v_cmp_class_f64_e64 s7, v[10:11], 64
-; GFX11-NEXT: v_cmp_class_f64_e64 s9, v[12:13], 64
-; GFX11-NEXT: v_cndmask_b32_e64 v36, 0, v36, s8
-; GFX11-NEXT: v_cndmask_b32_e64 v48, 0, v48, s10
-; GFX11-NEXT: v_cndmask_b32_e64 v52, 0, v52, s12
-; GFX11-NEXT: v_cmp_class_f64_e64 s6, v[24:25], 64
-; GFX11-NEXT: v_cmp_class_f64_e64 s8, v[26:27], 64
-; GFX11-NEXT: v_cmp_class_f64_e64 s10, v[28:29], 64
-; GFX11-NEXT: v_cmp_eq_f64_e64 s14, 0, v[34:35]
-; GFX11-NEXT: v_cmp_eq_f64_e64 s16, 0, v[36:37]
-; GFX11-NEXT: v_cmp_eq_f64_e64 s17, 0, v[38:39]
-; GFX11-NEXT: v_cmp_eq_f64_e64 s18, 0, v[48:49]
-; GFX11-NEXT: v_cmp_eq_f64_e64 s20, 0, v[50:51]
-; GFX11-NEXT: v_cmp_eq_f64_e64 s21, 0, v[52:53]
-; GFX11-NEXT: v_cndmask_b32_e64 v7, v39, v7, s4
-; GFX11-NEXT: v_cndmask_b32_e32 v32, v17, v1, vcc_lo
-; GFX11-NEXT: v_cndmask_b32_e64 v6, v38, v6, s4
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-NEXT: v_cndmask_b32_e64 v33, 0x7ff80000, v32, s0
-; GFX11-NEXT: v_cndmask_b32_e32 v32, v16, v0, vcc_lo
-; GFX11-NEXT: v_cndmask_b32_e64 v1, v33, v1, s1
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NEXT: v_cndmask_b32_e64 v32, 0, v32, s0
-; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, v17, s11
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX11-NEXT: v_cndmask_b32_e64 v0, v32, v0, s1
-; GFX11-NEXT: v_cmp_eq_f64_e64 s12, 0, v[32:33]
-; GFX11-NEXT: v_cndmask_b32_e64 v2, v34, v2, s2
-; GFX11-NEXT: v_cndmask_b32_e64 v4, v36, v4, s3
-; GFX11-NEXT: v_cndmask_b32_e64 v8, v48, v8, s5
-; GFX11-NEXT: v_cndmask_b32_e64 v0, v0, v16, s11
-; GFX11-NEXT: v_cndmask_b32_e64 v10, v50, v10, s7
-; GFX11-NEXT: v_cndmask_b32_e64 v12, v52, v12, s9
-; GFX11-NEXT: v_cndmask_b32_e64 v3, v35, v3, s2
-; GFX11-NEXT: v_cndmask_b32_e64 v5, v37, v5, s3
-; GFX11-NEXT: v_cndmask_b32_e64 v9, v49, v9, s5
-; GFX11-NEXT: v_cndmask_b32_e64 v11, v51, v11, s7
-; GFX11-NEXT: v_cndmask_b32_e64 v13, v53, v13, s9
-; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, v18, s13
-; GFX11-NEXT: v_cndmask_b32_e64 v4, v4, v20, s15
-; GFX11-NEXT: v_cndmask_b32_e64 v8, v8, v24, s6
-; GFX11-NEXT: v_cndmask_b32_e64 v10, v10, v26, s8
-; GFX11-NEXT: v_cndmask_b32_e64 v12, v12, v28, s10
-; GFX11-NEXT: v_cndmask_b32_e64 v3, v3, v19, s13
-; GFX11-NEXT: v_cndmask_b32_e64 v5, v5, v21, s15
-; GFX11-NEXT: v_cndmask_b32_e64 v9, v9, v25, s6
-; GFX11-NEXT: v_cndmask_b32_e64 v11, v11, v27, s8
-; GFX11-NEXT: v_cndmask_b32_e64 v13, v13, v29, s10
-; GFX11-NEXT: v_cndmask_b32_e64 v2, v34, v2, s14
-; GFX11-NEXT: v_cndmask_b32_e64 v4, v36, v4, s16
-; GFX11-NEXT: v_cndmask_b32_e64 v8, v48, v8, s18
-; GFX11-NEXT: v_cndmask_b32_e64 v10, v50, v10, s20
-; GFX11-NEXT: v_cndmask_b32_e64 v12, v52, v12, s21
-; GFX11-NEXT: v_cndmask_b32_e64 v3, v35, v3, s14
-; GFX11-NEXT: v_cndmask_b32_e64 v5, v37, v5, s16
-; GFX11-NEXT: v_cndmask_b32_e64 v9, v49, v9, s18
-; GFX11-NEXT: v_cndmask_b32_e64 v11, v51, v11, s20
-; GFX11-NEXT: v_cndmask_b32_e64 v13, v53, v13, s21
-; GFX11-NEXT: v_cndmask_b32_e64 v0, v32, v0, s12
-; GFX11-NEXT: v_cndmask_b32_e64 v1, v33, v1, s12
+; GFX11-NEXT: v_max_f64 v[32:33], v[0:1], v[16:17]
+; GFX11-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[16:17]
+; GFX11-NEXT: v_max_f64 v[16:17], v[2:3], v[18:19]
+; GFX11-NEXT: v_cmp_u_f64_e64 s0, v[2:3], v[18:19]
+; GFX11-NEXT: v_max_f64 v[18:19], v[4:5], v[20:21]
+; GFX11-NEXT: v_cmp_u_f64_e64 s1, v[4:5], v[20:21]
+; GFX11-NEXT: v_max_f64 v[20:21], v[6:7], v[22:23]
+; GFX11-NEXT: v_cmp_u_f64_e64 s2, v[6:7], v[22:23]
+; GFX11-NEXT: v_max_f64 v[22:23], v[8:9], v[24:25]
+; GFX11-NEXT: v_cmp_u_f64_e64 s3, v[8:9], v[24:25]
+; GFX11-NEXT: v_max_f64 v[24:25], v[10:11], v[26:27]
+; GFX11-NEXT: v_cmp_u_f64_e64 s4, v[10:11], v[26:27]
+; GFX11-NEXT: v_max_f64 v[26:27], v[12:13], v[28:29]
+; GFX11-NEXT: v_cmp_u_f64_e64 s5, v[12:13], v[28:29]
+; GFX11-NEXT: v_cndmask_b32_e64 v0, v32, 0, vcc_lo
+; GFX11-NEXT: v_cndmask_b32_e64 v1, v33, 0x7ff80000, vcc_lo
+; GFX11-NEXT: v_cndmask_b32_e64 v2, v16, 0, s0
+; GFX11-NEXT: v_cndmask_b32_e64 v3, v17, 0x7ff80000, s0
+; GFX11-NEXT: v_cndmask_b32_e64 v4, v18, 0, s1
+; GFX11-NEXT: v_cndmask_b32_e64 v5, v19, 0x7ff80000, s1
+; GFX11-NEXT: v_cndmask_b32_e64 v6, v20, 0, s2
+; GFX11-NEXT: v_cndmask_b32_e64 v7, v21, 0x7ff80000, s2
+; GFX11-NEXT: v_cndmask_b32_e64 v8, v22, 0, s3
+; GFX11-NEXT: v_cndmask_b32_e64 v9, v23, 0x7ff80000, s3
+; GFX11-NEXT: v_cndmask_b32_e64 v10, v24, 0, s4
+; GFX11-NEXT: v_cndmask_b32_e64 v11, v25, 0x7ff80000, s4
+; GFX11-NEXT: v_cndmask_b32_e64 v12, v26, 0, s5
+; GFX11-NEXT: v_cndmask_b32_e64 v13, v27, 0x7ff80000, s5
; GFX11-NEXT: s_waitcnt vmcnt(0)
-; GFX11-NEXT: v_cmp_gt_f64_e32 vcc_lo, v[14:15], v[30:31]
-; GFX11-NEXT: v_cmp_o_f64_e64 s0, v[14:15], v[30:31]
-; GFX11-NEXT: v_cmp_class_f64_e64 s19, v[30:31], 64
-; GFX11-NEXT: v_cndmask_b32_e32 v54, v31, v15, vcc_lo
-; GFX11-NEXT: v_cndmask_b32_e32 v16, v30, v14, vcc_lo
-; GFX11-NEXT: v_cmp_class_f64_e64 vcc_lo, v[22:23], 64
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NEXT: v_cndmask_b32_e64 v55, 0x7ff80000, v54, s0
-; GFX11-NEXT: v_cndmask_b32_e64 v54, 0, v16, s0
-; GFX11-NEXT: v_cmp_class_f64_e64 s0, v[14:15], 64
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3)
-; GFX11-NEXT: v_cmp_eq_f64_e64 s22, 0, v[54:55]
-; GFX11-NEXT: v_dual_cndmask_b32 v7, v7, v23 :: v_dual_cndmask_b32 v6, v6, v22
-; GFX11-NEXT: v_cndmask_b32_e64 v14, v54, v14, s0
-; GFX11-NEXT: v_cndmask_b32_e64 v15, v55, v15, s0
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_4)
-; GFX11-NEXT: v_cndmask_b32_e64 v6, v38, v6, s17
-; GFX11-NEXT: v_cndmask_b32_e64 v7, v39, v7, s17
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
-; GFX11-NEXT: v_cndmask_b32_e64 v14, v14, v30, s19
-; GFX11-NEXT: v_cndmask_b32_e64 v15, v15, v31, s19
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NEXT: v_cndmask_b32_e64 v14, v54, v14, s22
-; GFX11-NEXT: v_cndmask_b32_e64 v15, v55, v15, s22
+; GFX11-NEXT: v_max_f64 v[28:29], v[14:15], v[30:31]
+; GFX11-NEXT: v_cmp_u_f64_e64 s6, v[14:15], v[30:31]
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11-NEXT: v_cndmask_b32_e64 v14, v28, 0, s6
+; GFX11-NEXT: v_cndmask_b32_e64 v15, v29, 0x7ff80000, s6
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_maximum_v8f64:
@@ -4279,1799 +2067,798 @@ define <16 x double> @v_maximum_v16f64(<16 x double> %src0, <16 x double> %src1)
; GFX7-LABEL: v_maximum_v16f64:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; GFX7-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:132 ; 4-byte Folded Spill
+; GFX7-NEXT: s_mov_b64 exec, s[4:5]
; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:8
; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:4
-; GFX7-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:16
-; GFX7-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:12
-; GFX7-NEXT: buffer_load_dword v36, off, s[0:3], s32 offset:24
-; GFX7-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:20
-; GFX7-NEXT: buffer_load_dword v38, off, s[0:3], s32 offset:32
-; GFX7-NEXT: buffer_load_dword v37, off, s[0:3], s32 offset:28
-; GFX7-NEXT: v_mov_b32_e32 v39, 0x7ff80000
-; GFX7-NEXT: s_waitcnt vmcnt(6)
-; GFX7-NEXT: v_cmp_gt_f64_e32 vcc, v[0:1], v[31:32]
-; GFX7-NEXT: v_cmp_o_f64_e64 s[4:5], v[0:1], v[31:32]
-; GFX7-NEXT: s_waitcnt vmcnt(4)
-; GFX7-NEXT: v_cmp_o_f64_e64 s[6:7], v[2:3], v[33:34]
-; GFX7-NEXT: v_cndmask_b32_e32 v48, v32, v1, vcc
-; GFX7-NEXT: v_cndmask_b32_e64 v49, v39, v48, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e32 v48, v31, v0, vcc
-; GFX7-NEXT: v_cmp_class_f64_e64 vcc, v[0:1], 64
-; GFX7-NEXT: v_cndmask_b32_e64 v48, 0, v48, s[4:5]
-; GFX7-NEXT: v_cmp_class_f64_e64 s[4:5], v[31:32], 64
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v48, v0, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v49, v1, vcc
-; GFX7-NEXT: v_cmp_gt_f64_e32 vcc, v[2:3], v[33:34]
-; GFX7-NEXT: v_cndmask_b32_e64 v0, v0, v31, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v1, v1, v32, s[4:5]
-; GFX7-NEXT: v_cmp_class_f64_e64 s[4:5], v[33:34], 64
-; GFX7-NEXT: v_cndmask_b32_e32 v50, v34, v3, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v31, v33, v2, vcc
-; GFX7-NEXT: v_cmp_class_f64_e64 vcc, v[2:3], 64
-; GFX7-NEXT: v_cndmask_b32_e64 v32, v39, v50, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v31, 0, v31, s[6:7]
-; GFX7-NEXT: s_waitcnt vmcnt(2)
-; GFX7-NEXT: v_cmp_o_f64_e64 s[6:7], v[4:5], v[35:36]
-; GFX7-NEXT: v_cndmask_b32_e32 v2, v31, v2, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v3, v32, v3, vcc
-; GFX7-NEXT: v_cmp_gt_f64_e32 vcc, v[4:5], v[35:36]
-; GFX7-NEXT: v_cndmask_b32_e64 v2, v2, v33, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v3, v3, v34, s[4:5]
-; GFX7-NEXT: v_cmp_class_f64_e64 s[4:5], v[35:36], 64
-; GFX7-NEXT: v_cndmask_b32_e32 v50, v36, v5, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v33, v35, v4, vcc
-; GFX7-NEXT: v_cmp_class_f64_e64 vcc, v[4:5], 64
-; GFX7-NEXT: v_cndmask_b32_e64 v34, v39, v50, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v33, 0, v33, s[6:7]
+; GFX7-NEXT: v_writelane_b32 v34, s30, 0
+; GFX7-NEXT: v_writelane_b32 v34, s31, 1
+; GFX7-NEXT: v_writelane_b32 v34, s34, 2
+; GFX7-NEXT: v_writelane_b32 v34, s35, 3
; GFX7-NEXT: s_waitcnt vmcnt(0)
-; GFX7-NEXT: v_cmp_o_f64_e64 s[6:7], v[6:7], v[37:38]
-; GFX7-NEXT: v_cndmask_b32_e32 v4, v33, v4, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v5, v34, v5, vcc
-; GFX7-NEXT: v_cmp_gt_f64_e32 vcc, v[6:7], v[37:38]
-; GFX7-NEXT: v_cndmask_b32_e64 v4, v4, v35, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v5, v5, v36, s[4:5]
-; GFX7-NEXT: v_cmp_class_f64_e64 s[4:5], v[37:38], 64
-; GFX7-NEXT: v_cndmask_b32_e32 v50, v38, v7, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v35, v37, v6, vcc
-; GFX7-NEXT: v_cmp_class_f64_e64 vcc, v[6:7], 64
-; GFX7-NEXT: v_cndmask_b32_e64 v36, v39, v50, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v35, 0, v35, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e32 v6, v35, v6, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v7, v36, v7, vcc
-; GFX7-NEXT: v_cndmask_b32_e64 v6, v6, v37, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v7, v7, v38, s[4:5]
-; GFX7-NEXT: buffer_load_dword v37, off, s[0:3], s32 offset:36
-; GFX7-NEXT: buffer_load_dword v38, off, s[0:3], s32 offset:40
-; GFX7-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[48:49]
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v48, v0, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v49, v1, vcc
-; GFX7-NEXT: buffer_load_dword v49, off, s[0:3], s32 offset:48
-; GFX7-NEXT: buffer_load_dword v48, off, s[0:3], s32 offset:44
-; GFX7-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[31:32]
-; GFX7-NEXT: v_cndmask_b32_e32 v2, v31, v2, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v3, v32, v3, vcc
+; GFX7-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[31:32]
+; GFX7-NEXT: v_max_f64 v[0:1], v[0:1], v[31:32]
+; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:16
+; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:12
+; GFX7-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[31:32]
+; GFX7-NEXT: v_max_f64 v[2:3], v[2:3], v[31:32]
+; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:24
+; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:20
+; GFX7-NEXT: v_cndmask_b32_e64 v2, v2, 0, s[4:5]
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: v_cmp_u_f64_e64 s[6:7], v[4:5], v[31:32]
+; GFX7-NEXT: v_max_f64 v[4:5], v[4:5], v[31:32]
+; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:32
+; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:28
+; GFX7-NEXT: v_cndmask_b32_e64 v4, v4, 0, s[6:7]
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: v_cmp_u_f64_e64 s[8:9], v[6:7], v[31:32]
+; GFX7-NEXT: v_max_f64 v[6:7], v[6:7], v[31:32]
+; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:36
+; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:40
+; GFX7-NEXT: v_cndmask_b32_e64 v6, v6, 0, s[8:9]
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: v_cmp_u_f64_e64 s[10:11], v[8:9], v[31:32]
+; GFX7-NEXT: v_max_f64 v[8:9], v[8:9], v[31:32]
+; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:48
+; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:44
+; GFX7-NEXT: v_cndmask_b32_e64 v8, v8, 0, s[10:11]
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: v_cmp_u_f64_e64 s[12:13], v[10:11], v[31:32]
+; GFX7-NEXT: v_max_f64 v[10:11], v[10:11], v[31:32]
; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:56
; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:52
-; GFX7-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[33:34]
-; GFX7-NEXT: v_cndmask_b32_e32 v4, v33, v4, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v5, v34, v5, vcc
-; GFX7-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:64
-; GFX7-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:60
-; GFX7-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[35:36]
-; GFX7-NEXT: v_cndmask_b32_e32 v6, v35, v6, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v7, v36, v7, vcc
-; GFX7-NEXT: v_cmp_class_f64_e64 vcc, v[8:9], 64
-; GFX7-NEXT: s_waitcnt vmcnt(6)
-; GFX7-NEXT: v_cmp_gt_f64_e64 s[4:5], v[8:9], v[37:38]
-; GFX7-NEXT: v_cmp_o_f64_e64 s[6:7], v[8:9], v[37:38]
-; GFX7-NEXT: v_cndmask_b32_e64 v50, v38, v9, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v35, v37, v8, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v36, v39, v50, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v35, 0, v35, s[6:7]
-; GFX7-NEXT: v_cmp_class_f64_e64 s[4:5], v[37:38], 64
-; GFX7-NEXT: v_cndmask_b32_e32 v8, v35, v8, vcc
-; GFX7-NEXT: v_cmp_eq_f64_e64 s[6:7], 0, v[35:36]
-; GFX7-NEXT: v_cndmask_b32_e32 v9, v36, v9, vcc
-; GFX7-NEXT: s_waitcnt vmcnt(4)
-; GFX7-NEXT: v_cmp_gt_f64_e32 vcc, v[10:11], v[48:49]
-; GFX7-NEXT: v_cmp_o_f64_e64 s[8:9], v[10:11], v[48:49]
-; GFX7-NEXT: v_cndmask_b32_e64 v8, v8, v37, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v9, v9, v38, s[4:5]
-; GFX7-NEXT: buffer_load_dword v37, off, s[0:3], s32 offset:68
-; GFX7-NEXT: buffer_load_dword v38, off, s[0:3], s32 offset:72
-; GFX7-NEXT: v_cndmask_b32_e64 v8, v35, v8, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e32 v50, v49, v11, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v35, v48, v10, vcc
-; GFX7-NEXT: v_cmp_class_f64_e64 vcc, v[10:11], 64
-; GFX7-NEXT: v_cmp_class_f64_e64 s[4:5], v[48:49], 64
-; GFX7-NEXT: v_cndmask_b32_e64 v51, v39, v50, s[8:9]
-; GFX7-NEXT: v_cndmask_b32_e64 v50, 0, v35, s[8:9]
-; GFX7-NEXT: v_cndmask_b32_e64 v9, v36, v9, s[6:7]
-; GFX7-NEXT: buffer_load_dword v36, off, s[0:3], s32 offset:80
-; GFX7-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:76
-; GFX7-NEXT: s_waitcnt vmcnt(6)
-; GFX7-NEXT: v_cmp_o_f64_e64 s[6:7], v[12:13], v[31:32]
-; GFX7-NEXT: v_cndmask_b32_e32 v10, v50, v10, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v11, v51, v11, vcc
-; GFX7-NEXT: v_cndmask_b32_e64 v10, v10, v48, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v11, v11, v49, s[4:5]
-; GFX7-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[50:51]
-; GFX7-NEXT: v_cmp_gt_f64_e64 s[4:5], v[12:13], v[31:32]
-; GFX7-NEXT: v_cndmask_b32_e32 v10, v50, v10, vcc
-; GFX7-NEXT: v_cndmask_b32_e64 v48, v32, v13, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e32 v11, v51, v11, vcc
-; GFX7-NEXT: v_cmp_class_f64_e64 vcc, v[12:13], 64
-; GFX7-NEXT: v_cndmask_b32_e64 v49, v39, v48, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v48, v31, v12, s[4:5]
-; GFX7-NEXT: v_cmp_class_f64_e64 s[4:5], v[31:32], 64
-; GFX7-NEXT: v_cndmask_b32_e64 v48, 0, v48, s[6:7]
-; GFX7-NEXT: s_waitcnt vmcnt(4)
-; GFX7-NEXT: v_cmp_o_f64_e64 s[6:7], v[14:15], v[33:34]
-; GFX7-NEXT: v_cndmask_b32_e32 v12, v48, v12, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v13, v49, v13, vcc
-; GFX7-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[48:49]
-; GFX7-NEXT: v_cndmask_b32_e64 v12, v12, v31, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v13, v13, v32, s[4:5]
+; GFX7-NEXT: v_cndmask_b32_e64 v10, v10, 0, s[12:13]
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: v_cmp_u_f64_e64 s[14:15], v[12:13], v[31:32]
+; GFX7-NEXT: v_max_f64 v[12:13], v[12:13], v[31:32]
+; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:64
+; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:60
+; GFX7-NEXT: v_cndmask_b32_e64 v12, v12, 0, s[14:15]
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: v_cmp_u_f64_e64 s[16:17], v[14:15], v[31:32]
+; GFX7-NEXT: v_max_f64 v[14:15], v[14:15], v[31:32]
+; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:68
+; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:72
+; GFX7-NEXT: v_cndmask_b32_e64 v14, v14, 0, s[16:17]
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: v_cmp_u_f64_e64 s[18:19], v[16:17], v[31:32]
+; GFX7-NEXT: v_max_f64 v[16:17], v[16:17], v[31:32]
+; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:80
+; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:76
+; GFX7-NEXT: v_cndmask_b32_e64 v16, v16, 0, s[18:19]
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: v_cmp_u_f64_e64 s[20:21], v[18:19], v[31:32]
+; GFX7-NEXT: v_max_f64 v[18:19], v[18:19], v[31:32]
; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:88
; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:84
-; GFX7-NEXT: v_cmp_gt_f64_e64 s[4:5], v[14:15], v[33:34]
-; GFX7-NEXT: v_cndmask_b32_e32 v12, v48, v12, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v13, v49, v13, vcc
-; GFX7-NEXT: v_cmp_class_f64_e64 vcc, v[14:15], 64
-; GFX7-NEXT: v_cndmask_b32_e64 v50, v34, v15, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v48, v33, v14, s[4:5]
-; GFX7-NEXT: v_cmp_class_f64_e64 s[4:5], v[33:34], 64
-; GFX7-NEXT: v_cndmask_b32_e64 v49, v39, v50, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v48, 0, v48, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e32 v14, v48, v14, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v15, v49, v15, vcc
-; GFX7-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[48:49]
-; GFX7-NEXT: v_cndmask_b32_e64 v14, v14, v33, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v15, v15, v34, s[4:5]
-; GFX7-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:96
-; GFX7-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:92
-; GFX7-NEXT: v_cndmask_b32_e32 v14, v48, v14, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v15, v49, v15, vcc
-; GFX7-NEXT: v_cmp_class_f64_e64 vcc, v[16:17], 64
-; GFX7-NEXT: s_waitcnt vmcnt(6)
-; GFX7-NEXT: v_cmp_gt_f64_e64 s[4:5], v[16:17], v[37:38]
-; GFX7-NEXT: v_cmp_o_f64_e64 s[6:7], v[16:17], v[37:38]
-; GFX7-NEXT: v_cndmask_b32_e64 v50, v38, v17, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v48, v37, v16, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v49, v39, v50, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v48, 0, v48, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e32 v16, v48, v16, vcc
-; GFX7-NEXT: v_cmp_class_f64_e64 s[4:5], v[37:38], 64
-; GFX7-NEXT: v_cndmask_b32_e32 v17, v49, v17, vcc
-; GFX7-NEXT: s_waitcnt vmcnt(4)
-; GFX7-NEXT: v_cmp_gt_f64_e32 vcc, v[18:19], v[35:36]
-; GFX7-NEXT: v_cmp_o_f64_e64 s[6:7], v[18:19], v[35:36]
-; GFX7-NEXT: v_cndmask_b32_e64 v16, v16, v37, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v17, v17, v38, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e32 v50, v36, v19, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v37, v35, v18, vcc
-; GFX7-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[48:49]
-; GFX7-NEXT: v_cmp_class_f64_e64 s[4:5], v[18:19], 64
-; GFX7-NEXT: v_cndmask_b32_e64 v51, v39, v50, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v50, 0, v37, s[6:7]
-; GFX7-NEXT: buffer_load_dword v37, off, s[0:3], s32 offset:100
-; GFX7-NEXT: buffer_load_dword v38, off, s[0:3], s32 offset:104
-; GFX7-NEXT: v_cndmask_b32_e32 v16, v48, v16, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v17, v49, v17, vcc
-; GFX7-NEXT: v_cmp_class_f64_e64 vcc, v[35:36], 64
-; GFX7-NEXT: v_cndmask_b32_e64 v18, v50, v18, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v19, v51, v19, s[4:5]
-; GFX7-NEXT: s_waitcnt vmcnt(4)
-; GFX7-NEXT: v_cmp_gt_f64_e64 s[4:5], v[20:21], v[31:32]
-; GFX7-NEXT: v_cmp_o_f64_e64 s[6:7], v[20:21], v[31:32]
-; GFX7-NEXT: v_cndmask_b32_e32 v18, v18, v35, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v19, v19, v36, vcc
-; GFX7-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[50:51]
-; GFX7-NEXT: v_cndmask_b32_e64 v48, v32, v21, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v35, v31, v20, s[4:5]
-; GFX7-NEXT: v_cmp_class_f64_e64 s[4:5], v[20:21], 64
-; GFX7-NEXT: v_cndmask_b32_e64 v36, v39, v48, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v35, 0, v35, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e32 v18, v50, v18, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v19, v51, v19, vcc
-; GFX7-NEXT: v_cmp_class_f64_e64 vcc, v[31:32], 64
-; GFX7-NEXT: buffer_load_dword v49, off, s[0:3], s32 offset:112
-; GFX7-NEXT: buffer_load_dword v48, off, s[0:3], s32 offset:108
-; GFX7-NEXT: buffer_load_dword v51, off, s[0:3], s32 offset:120
-; GFX7-NEXT: buffer_load_dword v50, off, s[0:3], s32 offset:116
-; GFX7-NEXT: v_cndmask_b32_e64 v20, v35, v20, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v21, v36, v21, s[4:5]
-; GFX7-NEXT: v_cmp_eq_f64_e64 s[4:5], 0, v[35:36]
-; GFX7-NEXT: s_waitcnt vmcnt(6)
-; GFX7-NEXT: v_cmp_gt_f64_e64 s[6:7], v[22:23], v[33:34]
-; GFX7-NEXT: v_cmp_o_f64_e64 s[8:9], v[22:23], v[33:34]
-; GFX7-NEXT: v_cndmask_b32_e32 v20, v20, v31, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v21, v21, v32, vcc
-; GFX7-NEXT: v_cmp_class_f64_e64 vcc, v[22:23], 64
-; GFX7-NEXT: v_cndmask_b32_e64 v20, v35, v20, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v21, v36, v21, s[4:5]
-; GFX7-NEXT: v_cmp_class_f64_e64 s[4:5], v[33:34], 64
-; GFX7-NEXT: v_cndmask_b32_e64 v31, v34, v23, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v36, v39, v31, s[8:9]
-; GFX7-NEXT: v_cndmask_b32_e64 v31, v33, v22, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v35, 0, v31, s[8:9]
-; GFX7-NEXT: v_cndmask_b32_e32 v22, v35, v22, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v23, v36, v23, vcc
-; GFX7-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[35:36]
-; GFX7-NEXT: v_cndmask_b32_e64 v22, v22, v33, s[4:5]
+; GFX7-NEXT: v_cndmask_b32_e64 v18, v18, 0, s[20:21]
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: v_cmp_u_f64_e64 s[22:23], v[20:21], v[31:32]
+; GFX7-NEXT: v_max_f64 v[20:21], v[20:21], v[31:32]
+; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:96
+; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:92
+; GFX7-NEXT: v_cndmask_b32_e64 v20, v20, 0, s[22:23]
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: v_cmp_u_f64_e64 s[24:25], v[22:23], v[31:32]
+; GFX7-NEXT: v_max_f64 v[22:23], v[22:23], v[31:32]
+; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:100
+; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:104
+; GFX7-NEXT: v_cndmask_b32_e64 v22, v22, 0, s[24:25]
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: v_cmp_u_f64_e64 s[26:27], v[24:25], v[31:32]
+; GFX7-NEXT: v_max_f64 v[24:25], v[24:25], v[31:32]
+; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:112
+; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:108
+; GFX7-NEXT: v_cndmask_b32_e64 v24, v24, 0, s[26:27]
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: v_cmp_u_f64_e64 s[28:29], v[26:27], v[31:32]
+; GFX7-NEXT: v_max_f64 v[26:27], v[26:27], v[31:32]
+; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:120
+; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:116
+; GFX7-NEXT: v_cndmask_b32_e64 v26, v26, 0, s[28:29]
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: v_cmp_u_f64_e64 s[30:31], v[28:29], v[31:32]
+; GFX7-NEXT: v_max_f64 v[28:29], v[28:29], v[31:32]
; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32
; GFX7-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:128
; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:124
-; GFX7-NEXT: v_cndmask_b32_e64 v23, v23, v34, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e32 v22, v35, v22, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v23, v36, v23, vcc
-; GFX7-NEXT: v_cmp_class_f64_e64 vcc, v[24:25], 64
-; GFX7-NEXT: s_waitcnt vmcnt(7)
-; GFX7-NEXT: v_cmp_gt_f64_e64 s[4:5], v[24:25], v[37:38]
-; GFX7-NEXT: v_cmp_o_f64_e64 s[6:7], v[24:25], v[37:38]
-; GFX7-NEXT: v_cndmask_b32_e64 v34, v38, v25, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v35, v39, v34, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v34, v37, v24, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v34, 0, v34, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e32 v24, v34, v24, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v25, v35, v25, vcc
-; GFX7-NEXT: v_cmp_class_f64_e64 vcc, v[37:38], 64
-; GFX7-NEXT: v_cmp_eq_f64_e64 s[4:5], 0, v[34:35]
-; GFX7-NEXT: v_cndmask_b32_e32 v24, v24, v37, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v25, v25, v38, vcc
-; GFX7-NEXT: v_cmp_class_f64_e64 vcc, v[26:27], 64
-; GFX7-NEXT: v_cndmask_b32_e64 v24, v34, v24, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v25, v35, v25, s[4:5]
-; GFX7-NEXT: s_waitcnt vmcnt(5)
-; GFX7-NEXT: v_cmp_gt_f64_e64 s[6:7], v[26:27], v[48:49]
-; GFX7-NEXT: v_cmp_o_f64_e64 s[8:9], v[26:27], v[48:49]
-; GFX7-NEXT: v_cmp_class_f64_e64 s[4:5], v[48:49], 64
-; GFX7-NEXT: s_waitcnt vmcnt(3)
-; GFX7-NEXT: v_cmp_o_f64_e64 s[10:11], v[28:29], v[50:51]
-; GFX7-NEXT: v_cndmask_b32_e64 v36, v49, v27, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v34, v48, v26, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v35, v39, v36, s[8:9]
-; GFX7-NEXT: v_cndmask_b32_e64 v34, 0, v34, s[8:9]
-; GFX7-NEXT: v_cmp_gt_f64_e64 s[8:9], v[28:29], v[50:51]
-; GFX7-NEXT: v_cndmask_b32_e32 v26, v34, v26, vcc
-; GFX7-NEXT: v_cmp_eq_f64_e64 s[6:7], 0, v[34:35]
-; GFX7-NEXT: v_cndmask_b32_e32 v27, v35, v27, vcc
-; GFX7-NEXT: v_cndmask_b32_e64 v26, v26, v48, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v27, v27, v49, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v36, v51, v29, s[8:9]
-; GFX7-NEXT: v_cndmask_b32_e64 v27, v35, v27, s[6:7]
+; GFX7-NEXT: v_cndmask_b32_e64 v28, v28, 0, s[30:31]
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: v_cmp_u_f64_e64 s[34:35], v[30:31], v[32:33]
+; GFX7-NEXT: v_max_f64 v[30:31], v[30:31], v[32:33]
+; GFX7-NEXT: v_mov_b32_e32 v32, 0x7ff80000
+; GFX7-NEXT: v_cndmask_b32_e32 v1, v1, v32, vcc
+; GFX7-NEXT: v_cndmask_b32_e64 v3, v3, v32, s[4:5]
+; GFX7-NEXT: v_cndmask_b32_e64 v5, v5, v32, s[6:7]
+; GFX7-NEXT: v_cndmask_b32_e64 v7, v7, v32, s[8:9]
+; GFX7-NEXT: v_cndmask_b32_e64 v9, v9, v32, s[10:11]
+; GFX7-NEXT: v_cndmask_b32_e64 v11, v11, v32, s[12:13]
+; GFX7-NEXT: v_cndmask_b32_e64 v13, v13, v32, s[14:15]
+; GFX7-NEXT: v_cndmask_b32_e64 v15, v15, v32, s[16:17]
+; GFX7-NEXT: v_cndmask_b32_e64 v17, v17, v32, s[18:19]
+; GFX7-NEXT: v_cndmask_b32_e64 v19, v19, v32, s[20:21]
+; GFX7-NEXT: v_cndmask_b32_e64 v21, v21, v32, s[22:23]
+; GFX7-NEXT: v_cndmask_b32_e64 v23, v23, v32, s[24:25]
+; GFX7-NEXT: v_cndmask_b32_e64 v25, v25, v32, s[26:27]
+; GFX7-NEXT: v_cndmask_b32_e64 v27, v27, v32, s[28:29]
+; GFX7-NEXT: v_cndmask_b32_e64 v29, v29, v32, s[30:31]
+; GFX7-NEXT: v_cndmask_b32_e64 v31, v31, v32, s[34:35]
+; GFX7-NEXT: v_cndmask_b32_e64 v30, v30, 0, s[34:35]
+; GFX7-NEXT: v_readlane_b32 s35, v34, 3
+; GFX7-NEXT: v_readlane_b32 s34, v34, 2
+; GFX7-NEXT: v_readlane_b32 s31, v34, 1
+; GFX7-NEXT: v_readlane_b32 s30, v34, 0
+; GFX7-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; GFX7-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:132 ; 4-byte Folded Reload
+; GFX7-NEXT: s_mov_b64 exec, s[4:5]
; GFX7-NEXT: s_waitcnt vmcnt(0)
-; GFX7-NEXT: v_cmp_gt_f64_e32 vcc, v[30:31], v[32:33]
-; GFX7-NEXT: v_cmp_o_f64_e64 s[4:5], v[30:31], v[32:33]
-; GFX7-NEXT: v_cndmask_b32_e64 v35, v39, v36, s[10:11]
-; GFX7-NEXT: v_cndmask_b32_e64 v26, v34, v26, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v34, v50, v28, s[8:9]
-; GFX7-NEXT: v_cmp_class_f64_e64 s[6:7], v[28:29], 64
-; GFX7-NEXT: v_cndmask_b32_e64 v34, 0, v34, s[10:11]
-; GFX7-NEXT: v_cmp_class_f64_e64 s[8:9], v[50:51], 64
-; GFX7-NEXT: v_cndmask_b32_e32 v36, v33, v31, vcc
-; GFX7-NEXT: v_cndmask_b32_e64 v37, v39, v36, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e32 v36, v32, v30, vcc
-; GFX7-NEXT: v_cmp_class_f64_e64 vcc, v[30:31], 64
-; GFX7-NEXT: v_cndmask_b32_e64 v36, 0, v36, s[4:5]
-; GFX7-NEXT: v_cmp_class_f64_e64 s[4:5], v[32:33], 64
-; GFX7-NEXT: v_cmp_eq_f64_e64 s[10:11], 0, v[34:35]
-; GFX7-NEXT: v_cmp_eq_f64_e64 s[12:13], 0, v[36:37]
-; GFX7-NEXT: v_cndmask_b32_e64 v28, v34, v28, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v29, v35, v29, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v28, v28, v50, s[8:9]
-; GFX7-NEXT: v_cndmask_b32_e32 v30, v36, v30, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v31, v37, v31, vcc
-; GFX7-NEXT: v_cndmask_b32_e64 v30, v30, v32, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v29, v29, v51, s[8:9]
-; GFX7-NEXT: v_cndmask_b32_e64 v31, v31, v33, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v28, v34, v28, s[10:11]
-; GFX7-NEXT: v_cndmask_b32_e64 v29, v35, v29, s[10:11]
-; GFX7-NEXT: v_cndmask_b32_e64 v30, v36, v30, s[12:13]
-; GFX7-NEXT: v_cndmask_b32_e64 v31, v37, v31, s[12:13]
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_maximum_v16f64:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; GFX8-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:132 ; 4-byte Folded Spill
+; GFX8-NEXT: s_mov_b64 exec, s[4:5]
; GFX8-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:8
; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:4
-; GFX8-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:16
-; GFX8-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:12
-; GFX8-NEXT: buffer_load_dword v36, off, s[0:3], s32 offset:24
-; GFX8-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:20
-; GFX8-NEXT: buffer_load_dword v38, off, s[0:3], s32 offset:32
-; GFX8-NEXT: buffer_load_dword v37, off, s[0:3], s32 offset:28
-; GFX8-NEXT: v_mov_b32_e32 v39, 0x7ff80000
-; GFX8-NEXT: s_waitcnt vmcnt(6)
-; GFX8-NEXT: v_cmp_gt_f64_e32 vcc, v[0:1], v[31:32]
-; GFX8-NEXT: v_cmp_o_f64_e64 s[4:5], v[0:1], v[31:32]
-; GFX8-NEXT: s_waitcnt vmcnt(4)
-; GFX8-NEXT: v_cmp_o_f64_e64 s[6:7], v[2:3], v[33:34]
-; GFX8-NEXT: v_cndmask_b32_e32 v48, v32, v1, vcc
-; GFX8-NEXT: v_cndmask_b32_e64 v49, v39, v48, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e32 v48, v31, v0, vcc
-; GFX8-NEXT: v_cmp_class_f64_e64 vcc, v[0:1], 64
-; GFX8-NEXT: v_cndmask_b32_e64 v48, 0, v48, s[4:5]
-; GFX8-NEXT: v_cmp_class_f64_e64 s[4:5], v[31:32], 64
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v48, v0, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v49, v1, vcc
-; GFX8-NEXT: v_cmp_gt_f64_e32 vcc, v[2:3], v[33:34]
-; GFX8-NEXT: v_cndmask_b32_e64 v0, v0, v31, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v1, v1, v32, s[4:5]
-; GFX8-NEXT: v_cmp_class_f64_e64 s[4:5], v[33:34], 64
-; GFX8-NEXT: v_cndmask_b32_e32 v50, v34, v3, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v31, v33, v2, vcc
-; GFX8-NEXT: v_cmp_class_f64_e64 vcc, v[2:3], 64
-; GFX8-NEXT: v_cndmask_b32_e64 v32, v39, v50, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v31, 0, v31, s[6:7]
-; GFX8-NEXT: s_waitcnt vmcnt(2)
-; GFX8-NEXT: v_cmp_o_f64_e64 s[6:7], v[4:5], v[35:36]
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v31, v2, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v3, v32, v3, vcc
-; GFX8-NEXT: v_cmp_gt_f64_e32 vcc, v[4:5], v[35:36]
-; GFX8-NEXT: v_cndmask_b32_e64 v2, v2, v33, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v3, v3, v34, s[4:5]
-; GFX8-NEXT: v_cmp_class_f64_e64 s[4:5], v[35:36], 64
-; GFX8-NEXT: v_cndmask_b32_e32 v50, v36, v5, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v33, v35, v4, vcc
-; GFX8-NEXT: v_cmp_class_f64_e64 vcc, v[4:5], 64
-; GFX8-NEXT: v_cndmask_b32_e64 v34, v39, v50, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v33, 0, v33, s[6:7]
+; GFX8-NEXT: v_writelane_b32 v34, s30, 0
+; GFX8-NEXT: v_writelane_b32 v34, s31, 1
+; GFX8-NEXT: v_writelane_b32 v34, s34, 2
+; GFX8-NEXT: v_writelane_b32 v34, s35, 3
+; GFX8-NEXT: s_waitcnt vmcnt(0)
+; GFX8-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[31:32]
+; GFX8-NEXT: v_max_f64 v[0:1], v[0:1], v[31:32]
+; GFX8-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:16
+; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:12
+; GFX8-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc
+; GFX8-NEXT: s_waitcnt vmcnt(0)
+; GFX8-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[31:32]
+; GFX8-NEXT: v_max_f64 v[2:3], v[2:3], v[31:32]
+; GFX8-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:24
+; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:20
+; GFX8-NEXT: v_cndmask_b32_e64 v2, v2, 0, s[4:5]
+; GFX8-NEXT: s_waitcnt vmcnt(0)
+; GFX8-NEXT: v_cmp_u_f64_e64 s[6:7], v[4:5], v[31:32]
+; GFX8-NEXT: v_max_f64 v[4:5], v[4:5], v[31:32]
+; GFX8-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:32
+; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:28
+; GFX8-NEXT: v_cndmask_b32_e64 v4, v4, 0, s[6:7]
; GFX8-NEXT: s_waitcnt vmcnt(0)
-; GFX8-NEXT: v_cmp_o_f64_e64 s[6:7], v[6:7], v[37:38]
-; GFX8-NEXT: v_cndmask_b32_e32 v4, v33, v4, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v5, v34, v5, vcc
-; GFX8-NEXT: v_cmp_gt_f64_e32 vcc, v[6:7], v[37:38]
-; GFX8-NEXT: v_cndmask_b32_e64 v4, v4, v35, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v5, v5, v36, s[4:5]
-; GFX8-NEXT: v_cmp_class_f64_e64 s[4:5], v[37:38], 64
-; GFX8-NEXT: v_cndmask_b32_e32 v50, v38, v7, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v35, v37, v6, vcc
-; GFX8-NEXT: v_cmp_class_f64_e64 vcc, v[6:7], 64
-; GFX8-NEXT: v_cndmask_b32_e64 v36, v39, v50, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v35, 0, v35, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e32 v6, v35, v6, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v7, v36, v7, vcc
-; GFX8-NEXT: v_cndmask_b32_e64 v6, v6, v37, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v7, v7, v38, s[4:5]
-; GFX8-NEXT: buffer_load_dword v37, off, s[0:3], s32 offset:36
-; GFX8-NEXT: buffer_load_dword v38, off, s[0:3], s32 offset:40
-; GFX8-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[48:49]
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v48, v0, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v49, v1, vcc
-; GFX8-NEXT: buffer_load_dword v49, off, s[0:3], s32 offset:48
-; GFX8-NEXT: buffer_load_dword v48, off, s[0:3], s32 offset:44
-; GFX8-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[31:32]
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v31, v2, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v3, v32, v3, vcc
+; GFX8-NEXT: v_cmp_u_f64_e64 s[8:9], v[6:7], v[31:32]
+; GFX8-NEXT: v_max_f64 v[6:7], v[6:7], v[31:32]
+; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:36
+; GFX8-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:40
+; GFX8-NEXT: v_cndmask_b32_e64 v6, v6, 0, s[8:9]
+; GFX8-NEXT: s_waitcnt vmcnt(0)
+; GFX8-NEXT: v_cmp_u_f64_e64 s[10:11], v[8:9], v[31:32]
+; GFX8-NEXT: v_max_f64 v[8:9], v[8:9], v[31:32]
+; GFX8-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:48
+; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:44
+; GFX8-NEXT: v_cndmask_b32_e64 v8, v8, 0, s[10:11]
+; GFX8-NEXT: s_waitcnt vmcnt(0)
+; GFX8-NEXT: v_cmp_u_f64_e64 s[12:13], v[10:11], v[31:32]
+; GFX8-NEXT: v_max_f64 v[10:11], v[10:11], v[31:32]
; GFX8-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:56
; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:52
-; GFX8-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[33:34]
-; GFX8-NEXT: v_cndmask_b32_e32 v4, v33, v4, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v5, v34, v5, vcc
-; GFX8-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:64
-; GFX8-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:60
-; GFX8-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[35:36]
-; GFX8-NEXT: v_cndmask_b32_e32 v6, v35, v6, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v7, v36, v7, vcc
-; GFX8-NEXT: v_cmp_class_f64_e64 vcc, v[8:9], 64
-; GFX8-NEXT: s_waitcnt vmcnt(6)
-; GFX8-NEXT: v_cmp_gt_f64_e64 s[4:5], v[8:9], v[37:38]
-; GFX8-NEXT: v_cmp_o_f64_e64 s[6:7], v[8:9], v[37:38]
-; GFX8-NEXT: v_cndmask_b32_e64 v50, v38, v9, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v35, v37, v8, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v36, v39, v50, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v35, 0, v35, s[6:7]
-; GFX8-NEXT: v_cmp_class_f64_e64 s[4:5], v[37:38], 64
-; GFX8-NEXT: v_cndmask_b32_e32 v8, v35, v8, vcc
-; GFX8-NEXT: v_cmp_eq_f64_e64 s[6:7], 0, v[35:36]
-; GFX8-NEXT: v_cndmask_b32_e32 v9, v36, v9, vcc
-; GFX8-NEXT: s_waitcnt vmcnt(4)
-; GFX8-NEXT: v_cmp_gt_f64_e32 vcc, v[10:11], v[48:49]
-; GFX8-NEXT: v_cmp_o_f64_e64 s[8:9], v[10:11], v[48:49]
-; GFX8-NEXT: v_cndmask_b32_e64 v8, v8, v37, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v9, v9, v38, s[4:5]
-; GFX8-NEXT: buffer_load_dword v37, off, s[0:3], s32 offset:68
-; GFX8-NEXT: buffer_load_dword v38, off, s[0:3], s32 offset:72
-; GFX8-NEXT: v_cndmask_b32_e64 v8, v35, v8, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e32 v50, v49, v11, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v35, v48, v10, vcc
-; GFX8-NEXT: v_cmp_class_f64_e64 vcc, v[10:11], 64
-; GFX8-NEXT: v_cmp_class_f64_e64 s[4:5], v[48:49], 64
-; GFX8-NEXT: v_cndmask_b32_e64 v51, v39, v50, s[8:9]
-; GFX8-NEXT: v_cndmask_b32_e64 v50, 0, v35, s[8:9]
-; GFX8-NEXT: v_cndmask_b32_e64 v9, v36, v9, s[6:7]
-; GFX8-NEXT: buffer_load_dword v36, off, s[0:3], s32 offset:80
-; GFX8-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:76
-; GFX8-NEXT: s_waitcnt vmcnt(6)
-; GFX8-NEXT: v_cmp_o_f64_e64 s[6:7], v[12:13], v[31:32]
-; GFX8-NEXT: v_cndmask_b32_e32 v10, v50, v10, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v11, v51, v11, vcc
-; GFX8-NEXT: v_cndmask_b32_e64 v10, v10, v48, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v11, v11, v49, s[4:5]
-; GFX8-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[50:51]
-; GFX8-NEXT: v_cmp_gt_f64_e64 s[4:5], v[12:13], v[31:32]
-; GFX8-NEXT: v_cndmask_b32_e32 v10, v50, v10, vcc
-; GFX8-NEXT: v_cndmask_b32_e64 v48, v32, v13, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e32 v11, v51, v11, vcc
-; GFX8-NEXT: v_cmp_class_f64_e64 vcc, v[12:13], 64
-; GFX8-NEXT: v_cndmask_b32_e64 v49, v39, v48, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v48, v31, v12, s[4:5]
-; GFX8-NEXT: v_cmp_class_f64_e64 s[4:5], v[31:32], 64
-; GFX8-NEXT: v_cndmask_b32_e64 v48, 0, v48, s[6:7]
-; GFX8-NEXT: s_waitcnt vmcnt(4)
-; GFX8-NEXT: v_cmp_o_f64_e64 s[6:7], v[14:15], v[33:34]
-; GFX8-NEXT: v_cndmask_b32_e32 v12, v48, v12, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v13, v49, v13, vcc
-; GFX8-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[48:49]
-; GFX8-NEXT: v_cndmask_b32_e64 v12, v12, v31, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v13, v13, v32, s[4:5]
+; GFX8-NEXT: v_cndmask_b32_e64 v10, v10, 0, s[12:13]
+; GFX8-NEXT: s_waitcnt vmcnt(0)
+; GFX8-NEXT: v_cmp_u_f64_e64 s[14:15], v[12:13], v[31:32]
+; GFX8-NEXT: v_max_f64 v[12:13], v[12:13], v[31:32]
+; GFX8-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:64
+; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:60
+; GFX8-NEXT: v_cndmask_b32_e64 v12, v12, 0, s[14:15]
+; GFX8-NEXT: s_waitcnt vmcnt(0)
+; GFX8-NEXT: v_cmp_u_f64_e64 s[16:17], v[14:15], v[31:32]
+; GFX8-NEXT: v_max_f64 v[14:15], v[14:15], v[31:32]
+; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:68
+; GFX8-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:72
+; GFX8-NEXT: v_cndmask_b32_e64 v14, v14, 0, s[16:17]
+; GFX8-NEXT: s_waitcnt vmcnt(0)
+; GFX8-NEXT: v_cmp_u_f64_e64 s[18:19], v[16:17], v[31:32]
+; GFX8-NEXT: v_max_f64 v[16:17], v[16:17], v[31:32]
+; GFX8-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:80
+; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:76
+; GFX8-NEXT: v_cndmask_b32_e64 v16, v16, 0, s[18:19]
+; GFX8-NEXT: s_waitcnt vmcnt(0)
+; GFX8-NEXT: v_cmp_u_f64_e64 s[20:21], v[18:19], v[31:32]
+; GFX8-NEXT: v_max_f64 v[18:19], v[18:19], v[31:32]
; GFX8-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:88
; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:84
-; GFX8-NEXT: v_cmp_gt_f64_e64 s[4:5], v[14:15], v[33:34]
-; GFX8-NEXT: v_cndmask_b32_e32 v12, v48, v12, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v13, v49, v13, vcc
-; GFX8-NEXT: v_cmp_class_f64_e64 vcc, v[14:15], 64
-; GFX8-NEXT: v_cndmask_b32_e64 v50, v34, v15, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v48, v33, v14, s[4:5]
-; GFX8-NEXT: v_cmp_class_f64_e64 s[4:5], v[33:34], 64
-; GFX8-NEXT: v_cndmask_b32_e64 v49, v39, v50, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v48, 0, v48, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e32 v14, v48, v14, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v15, v49, v15, vcc
-; GFX8-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[48:49]
-; GFX8-NEXT: v_cndmask_b32_e64 v14, v14, v33, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v15, v15, v34, s[4:5]
-; GFX8-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:96
-; GFX8-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:92
-; GFX8-NEXT: v_cndmask_b32_e32 v14, v48, v14, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v15, v49, v15, vcc
-; GFX8-NEXT: v_cmp_class_f64_e64 vcc, v[16:17], 64
-; GFX8-NEXT: s_waitcnt vmcnt(6)
-; GFX8-NEXT: v_cmp_gt_f64_e64 s[4:5], v[16:17], v[37:38]
-; GFX8-NEXT: v_cmp_o_f64_e64 s[6:7], v[16:17], v[37:38]
-; GFX8-NEXT: v_cndmask_b32_e64 v50, v38, v17, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v48, v37, v16, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v49, v39, v50, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v48, 0, v48, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e32 v16, v48, v16, vcc
-; GFX8-NEXT: v_cmp_class_f64_e64 s[4:5], v[37:38], 64
-; GFX8-NEXT: v_cndmask_b32_e32 v17, v49, v17, vcc
-; GFX8-NEXT: s_waitcnt vmcnt(4)
-; GFX8-NEXT: v_cmp_gt_f64_e32 vcc, v[18:19], v[35:36]
-; GFX8-NEXT: v_cmp_o_f64_e64 s[6:7], v[18:19], v[35:36]
-; GFX8-NEXT: v_cndmask_b32_e64 v16, v16, v37, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v17, v17, v38, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e32 v50, v36, v19, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v37, v35, v18, vcc
-; GFX8-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[48:49]
-; GFX8-NEXT: v_cmp_class_f64_e64 s[4:5], v[18:19], 64
-; GFX8-NEXT: v_cndmask_b32_e64 v51, v39, v50, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v50, 0, v37, s[6:7]
-; GFX8-NEXT: buffer_load_dword v37, off, s[0:3], s32 offset:100
-; GFX8-NEXT: buffer_load_dword v38, off, s[0:3], s32 offset:104
-; GFX8-NEXT: v_cndmask_b32_e32 v16, v48, v16, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v17, v49, v17, vcc
-; GFX8-NEXT: v_cmp_class_f64_e64 vcc, v[35:36], 64
-; GFX8-NEXT: v_cndmask_b32_e64 v18, v50, v18, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v19, v51, v19, s[4:5]
-; GFX8-NEXT: s_waitcnt vmcnt(4)
-; GFX8-NEXT: v_cmp_gt_f64_e64 s[4:5], v[20:21], v[31:32]
-; GFX8-NEXT: v_cmp_o_f64_e64 s[6:7], v[20:21], v[31:32]
-; GFX8-NEXT: v_cndmask_b32_e32 v18, v18, v35, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v19, v19, v36, vcc
-; GFX8-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[50:51]
-; GFX8-NEXT: v_cndmask_b32_e64 v48, v32, v21, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v35, v31, v20, s[4:5]
-; GFX8-NEXT: v_cmp_class_f64_e64 s[4:5], v[20:21], 64
-; GFX8-NEXT: v_cndmask_b32_e64 v36, v39, v48, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v35, 0, v35, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e32 v18, v50, v18, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v19, v51, v19, vcc
-; GFX8-NEXT: v_cmp_class_f64_e64 vcc, v[31:32], 64
-; GFX8-NEXT: buffer_load_dword v49, off, s[0:3], s32 offset:112
-; GFX8-NEXT: buffer_load_dword v48, off, s[0:3], s32 offset:108
-; GFX8-NEXT: buffer_load_dword v51, off, s[0:3], s32 offset:120
-; GFX8-NEXT: buffer_load_dword v50, off, s[0:3], s32 offset:116
-; GFX8-NEXT: v_cndmask_b32_e64 v20, v35, v20, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v21, v36, v21, s[4:5]
-; GFX8-NEXT: v_cmp_eq_f64_e64 s[4:5], 0, v[35:36]
-; GFX8-NEXT: s_waitcnt vmcnt(6)
-; GFX8-NEXT: v_cmp_gt_f64_e64 s[6:7], v[22:23], v[33:34]
-; GFX8-NEXT: v_cmp_o_f64_e64 s[8:9], v[22:23], v[33:34]
-; GFX8-NEXT: v_cndmask_b32_e32 v20, v20, v31, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v21, v21, v32, vcc
-; GFX8-NEXT: v_cmp_class_f64_e64 vcc, v[22:23], 64
-; GFX8-NEXT: v_cndmask_b32_e64 v20, v35, v20, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v21, v36, v21, s[4:5]
-; GFX8-NEXT: v_cmp_class_f64_e64 s[4:5], v[33:34], 64
-; GFX8-NEXT: v_cndmask_b32_e64 v31, v34, v23, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v36, v39, v31, s[8:9]
-; GFX8-NEXT: v_cndmask_b32_e64 v31, v33, v22, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v35, 0, v31, s[8:9]
-; GFX8-NEXT: v_cndmask_b32_e32 v22, v35, v22, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v23, v36, v23, vcc
-; GFX8-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[35:36]
-; GFX8-NEXT: v_cndmask_b32_e64 v22, v22, v33, s[4:5]
+; GFX8-NEXT: v_cndmask_b32_e64 v18, v18, 0, s[20:21]
+; GFX8-NEXT: s_waitcnt vmcnt(0)
+; GFX8-NEXT: v_cmp_u_f64_e64 s[22:23], v[20:21], v[31:32]
+; GFX8-NEXT: v_max_f64 v[20:21], v[20:21], v[31:32]
+; GFX8-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:96
+; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:92
+; GFX8-NEXT: v_cndmask_b32_e64 v20, v20, 0, s[22:23]
+; GFX8-NEXT: s_waitcnt vmcnt(0)
+; GFX8-NEXT: v_cmp_u_f64_e64 s[24:25], v[22:23], v[31:32]
+; GFX8-NEXT: v_max_f64 v[22:23], v[22:23], v[31:32]
+; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:100
+; GFX8-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:104
+; GFX8-NEXT: v_cndmask_b32_e64 v22, v22, 0, s[24:25]
+; GFX8-NEXT: s_waitcnt vmcnt(0)
+; GFX8-NEXT: v_cmp_u_f64_e64 s[26:27], v[24:25], v[31:32]
+; GFX8-NEXT: v_max_f64 v[24:25], v[24:25], v[31:32]
+; GFX8-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:112
+; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:108
+; GFX8-NEXT: v_cndmask_b32_e64 v24, v24, 0, s[26:27]
+; GFX8-NEXT: s_waitcnt vmcnt(0)
+; GFX8-NEXT: v_cmp_u_f64_e64 s[28:29], v[26:27], v[31:32]
+; GFX8-NEXT: v_max_f64 v[26:27], v[26:27], v[31:32]
+; GFX8-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:120
+; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:116
+; GFX8-NEXT: v_cndmask_b32_e64 v26, v26, 0, s[28:29]
+; GFX8-NEXT: s_waitcnt vmcnt(0)
+; GFX8-NEXT: v_cmp_u_f64_e64 s[30:31], v[28:29], v[31:32]
+; GFX8-NEXT: v_max_f64 v[28:29], v[28:29], v[31:32]
; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32
; GFX8-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:128
; GFX8-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:124
-; GFX8-NEXT: v_cndmask_b32_e64 v23, v23, v34, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e32 v22, v35, v22, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v23, v36, v23, vcc
-; GFX8-NEXT: v_cmp_class_f64_e64 vcc, v[24:25], 64
-; GFX8-NEXT: s_waitcnt vmcnt(7)
-; GFX8-NEXT: v_cmp_gt_f64_e64 s[4:5], v[24:25], v[37:38]
-; GFX8-NEXT: v_cmp_o_f64_e64 s[6:7], v[24:25], v[37:38]
-; GFX8-NEXT: v_cndmask_b32_e64 v34, v38, v25, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v35, v39, v34, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v34, v37, v24, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v34, 0, v34, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e32 v24, v34, v24, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v25, v35, v25, vcc
-; GFX8-NEXT: v_cmp_class_f64_e64 vcc, v[37:38], 64
-; GFX8-NEXT: v_cmp_eq_f64_e64 s[4:5], 0, v[34:35]
-; GFX8-NEXT: v_cndmask_b32_e32 v24, v24, v37, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v25, v25, v38, vcc
-; GFX8-NEXT: v_cmp_class_f64_e64 vcc, v[26:27], 64
-; GFX8-NEXT: v_cndmask_b32_e64 v24, v34, v24, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v25, v35, v25, s[4:5]
-; GFX8-NEXT: s_waitcnt vmcnt(5)
-; GFX8-NEXT: v_cmp_gt_f64_e64 s[6:7], v[26:27], v[48:49]
-; GFX8-NEXT: v_cmp_o_f64_e64 s[8:9], v[26:27], v[48:49]
-; GFX8-NEXT: v_cmp_class_f64_e64 s[4:5], v[48:49], 64
-; GFX8-NEXT: s_waitcnt vmcnt(3)
-; GFX8-NEXT: v_cmp_o_f64_e64 s[10:11], v[28:29], v[50:51]
-; GFX8-NEXT: v_cndmask_b32_e64 v36, v49, v27, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v34, v48, v26, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v35, v39, v36, s[8:9]
-; GFX8-NEXT: v_cndmask_b32_e64 v34, 0, v34, s[8:9]
-; GFX8-NEXT: v_cmp_gt_f64_e64 s[8:9], v[28:29], v[50:51]
-; GFX8-NEXT: v_cndmask_b32_e32 v26, v34, v26, vcc
-; GFX8-NEXT: v_cmp_eq_f64_e64 s[6:7], 0, v[34:35]
-; GFX8-NEXT: v_cndmask_b32_e32 v27, v35, v27, vcc
-; GFX8-NEXT: v_cndmask_b32_e64 v26, v26, v48, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v27, v27, v49, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v36, v51, v29, s[8:9]
-; GFX8-NEXT: v_cndmask_b32_e64 v27, v35, v27, s[6:7]
+; GFX8-NEXT: v_cndmask_b32_e64 v28, v28, 0, s[30:31]
+; GFX8-NEXT: s_waitcnt vmcnt(0)
+; GFX8-NEXT: v_cmp_u_f64_e64 s[34:35], v[30:31], v[32:33]
+; GFX8-NEXT: v_max_f64 v[30:31], v[30:31], v[32:33]
+; GFX8-NEXT: v_mov_b32_e32 v32, 0x7ff80000
+; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v32, vcc
+; GFX8-NEXT: v_cndmask_b32_e64 v3, v3, v32, s[4:5]
+; GFX8-NEXT: v_cndmask_b32_e64 v5, v5, v32, s[6:7]
+; GFX8-NEXT: v_cndmask_b32_e64 v7, v7, v32, s[8:9]
+; GFX8-NEXT: v_cndmask_b32_e64 v9, v9, v32, s[10:11]
+; GFX8-NEXT: v_cndmask_b32_e64 v11, v11, v32, s[12:13]
+; GFX8-NEXT: v_cndmask_b32_e64 v13, v13, v32, s[14:15]
+; GFX8-NEXT: v_cndmask_b32_e64 v15, v15, v32, s[16:17]
+; GFX8-NEXT: v_cndmask_b32_e64 v17, v17, v32, s[18:19]
+; GFX8-NEXT: v_cndmask_b32_e64 v19, v19, v32, s[20:21]
+; GFX8-NEXT: v_cndmask_b32_e64 v21, v21, v32, s[22:23]
+; GFX8-NEXT: v_cndmask_b32_e64 v23, v23, v32, s[24:25]
+; GFX8-NEXT: v_cndmask_b32_e64 v25, v25, v32, s[26:27]
+; GFX8-NEXT: v_cndmask_b32_e64 v27, v27, v32, s[28:29]
+; GFX8-NEXT: v_cndmask_b32_e64 v29, v29, v32, s[30:31]
+; GFX8-NEXT: v_cndmask_b32_e64 v31, v31, v32, s[34:35]
+; GFX8-NEXT: v_cndmask_b32_e64 v30, v30, 0, s[34:35]
+; GFX8-NEXT: v_readlane_b32 s35, v34, 3
+; GFX8-NEXT: v_readlane_b32 s34, v34, 2
+; GFX8-NEXT: v_readlane_b32 s31, v34, 1
+; GFX8-NEXT: v_readlane_b32 s30, v34, 0
+; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; GFX8-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:132 ; 4-byte Folded Reload
+; GFX8-NEXT: s_mov_b64 exec, s[4:5]
; GFX8-NEXT: s_waitcnt vmcnt(0)
-; GFX8-NEXT: v_cmp_gt_f64_e32 vcc, v[30:31], v[32:33]
-; GFX8-NEXT: v_cmp_o_f64_e64 s[4:5], v[30:31], v[32:33]
-; GFX8-NEXT: v_cndmask_b32_e64 v35, v39, v36, s[10:11]
-; GFX8-NEXT: v_cndmask_b32_e64 v26, v34, v26, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v34, v50, v28, s[8:9]
-; GFX8-NEXT: v_cmp_class_f64_e64 s[6:7], v[28:29], 64
-; GFX8-NEXT: v_cndmask_b32_e64 v34, 0, v34, s[10:11]
-; GFX8-NEXT: v_cmp_class_f64_e64 s[8:9], v[50:51], 64
-; GFX8-NEXT: v_cndmask_b32_e32 v36, v33, v31, vcc
-; GFX8-NEXT: v_cndmask_b32_e64 v37, v39, v36, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e32 v36, v32, v30, vcc
-; GFX8-NEXT: v_cmp_class_f64_e64 vcc, v[30:31], 64
-; GFX8-NEXT: v_cndmask_b32_e64 v36, 0, v36, s[4:5]
-; GFX8-NEXT: v_cmp_class_f64_e64 s[4:5], v[32:33], 64
-; GFX8-NEXT: v_cmp_eq_f64_e64 s[10:11], 0, v[34:35]
-; GFX8-NEXT: v_cmp_eq_f64_e64 s[12:13], 0, v[36:37]
-; GFX8-NEXT: v_cndmask_b32_e64 v28, v34, v28, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v29, v35, v29, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v28, v28, v50, s[8:9]
-; GFX8-NEXT: v_cndmask_b32_e32 v30, v36, v30, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v31, v37, v31, vcc
-; GFX8-NEXT: v_cndmask_b32_e64 v30, v30, v32, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v29, v29, v51, s[8:9]
-; GFX8-NEXT: v_cndmask_b32_e64 v31, v31, v33, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v28, v34, v28, s[10:11]
-; GFX8-NEXT: v_cndmask_b32_e64 v29, v35, v29, s[10:11]
-; GFX8-NEXT: v_cndmask_b32_e64 v30, v36, v30, s[12:13]
-; GFX8-NEXT: v_cndmask_b32_e64 v31, v37, v31, s[12:13]
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_maximum_v16f64:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; GFX9-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:132 ; 4-byte Folded Spill
+; GFX9-NEXT: s_mov_b64 exec, s[4:5]
; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:8
; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:4
-; GFX9-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:16
-; GFX9-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:12
-; GFX9-NEXT: buffer_load_dword v36, off, s[0:3], s32 offset:24
-; GFX9-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:20
-; GFX9-NEXT: buffer_load_dword v38, off, s[0:3], s32 offset:32
-; GFX9-NEXT: buffer_load_dword v37, off, s[0:3], s32 offset:28
-; GFX9-NEXT: v_mov_b32_e32 v39, 0x7ff80000
-; GFX9-NEXT: s_waitcnt vmcnt(6)
-; GFX9-NEXT: v_cmp_gt_f64_e32 vcc, v[0:1], v[31:32]
-; GFX9-NEXT: v_cmp_o_f64_e64 s[4:5], v[0:1], v[31:32]
-; GFX9-NEXT: s_waitcnt vmcnt(4)
-; GFX9-NEXT: v_cmp_o_f64_e64 s[6:7], v[2:3], v[33:34]
-; GFX9-NEXT: v_cndmask_b32_e32 v48, v32, v1, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v49, v39, v48, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e32 v48, v31, v0, vcc
-; GFX9-NEXT: v_cmp_class_f64_e64 vcc, v[0:1], 64
-; GFX9-NEXT: v_cndmask_b32_e64 v48, 0, v48, s[4:5]
-; GFX9-NEXT: v_cmp_class_f64_e64 s[4:5], v[31:32], 64
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v48, v0, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v49, v1, vcc
-; GFX9-NEXT: v_cmp_gt_f64_e32 vcc, v[2:3], v[33:34]
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v0, v31, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v1, v1, v32, s[4:5]
-; GFX9-NEXT: v_cmp_class_f64_e64 s[4:5], v[33:34], 64
-; GFX9-NEXT: v_cndmask_b32_e32 v50, v34, v3, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v31, v33, v2, vcc
-; GFX9-NEXT: v_cmp_class_f64_e64 vcc, v[2:3], 64
-; GFX9-NEXT: v_cndmask_b32_e64 v32, v39, v50, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v31, 0, v31, s[6:7]
-; GFX9-NEXT: s_waitcnt vmcnt(2)
-; GFX9-NEXT: v_cmp_o_f64_e64 s[6:7], v[4:5], v[35:36]
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v31, v2, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v32, v3, vcc
-; GFX9-NEXT: v_cmp_gt_f64_e32 vcc, v[4:5], v[35:36]
-; GFX9-NEXT: v_cndmask_b32_e64 v2, v2, v33, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v3, v3, v34, s[4:5]
-; GFX9-NEXT: v_cmp_class_f64_e64 s[4:5], v[35:36], 64
-; GFX9-NEXT: v_cndmask_b32_e32 v50, v36, v5, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v33, v35, v4, vcc
-; GFX9-NEXT: v_cmp_class_f64_e64 vcc, v[4:5], 64
-; GFX9-NEXT: v_cndmask_b32_e64 v34, v39, v50, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v33, 0, v33, s[6:7]
+; GFX9-NEXT: v_writelane_b32 v34, s30, 0
+; GFX9-NEXT: v_writelane_b32 v34, s31, 1
+; GFX9-NEXT: v_writelane_b32 v34, s34, 2
+; GFX9-NEXT: v_writelane_b32 v34, s35, 3
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[31:32]
+; GFX9-NEXT: v_max_f64 v[0:1], v[0:1], v[31:32]
+; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:16
+; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:12
+; GFX9-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: v_cmp_o_f64_e64 s[6:7], v[6:7], v[37:38]
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v33, v4, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v5, v34, v5, vcc
-; GFX9-NEXT: v_cmp_gt_f64_e32 vcc, v[6:7], v[37:38]
-; GFX9-NEXT: v_cndmask_b32_e64 v4, v4, v35, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v5, v5, v36, s[4:5]
-; GFX9-NEXT: v_cmp_class_f64_e64 s[4:5], v[37:38], 64
-; GFX9-NEXT: v_cndmask_b32_e32 v50, v38, v7, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v35, v37, v6, vcc
-; GFX9-NEXT: v_cmp_class_f64_e64 vcc, v[6:7], 64
-; GFX9-NEXT: v_cndmask_b32_e64 v36, v39, v50, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v35, 0, v35, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e32 v6, v35, v6, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v7, v36, v7, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v6, v6, v37, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v7, v7, v38, s[4:5]
-; GFX9-NEXT: buffer_load_dword v37, off, s[0:3], s32 offset:36
-; GFX9-NEXT: buffer_load_dword v38, off, s[0:3], s32 offset:40
-; GFX9-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[48:49]
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v48, v0, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v49, v1, vcc
-; GFX9-NEXT: buffer_load_dword v49, off, s[0:3], s32 offset:48
-; GFX9-NEXT: buffer_load_dword v48, off, s[0:3], s32 offset:44
-; GFX9-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[31:32]
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v31, v2, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v32, v3, vcc
+; GFX9-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[31:32]
+; GFX9-NEXT: v_max_f64 v[2:3], v[2:3], v[31:32]
+; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:24
+; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:20
+; GFX9-NEXT: v_cndmask_b32_e64 v2, v2, 0, s[4:5]
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: v_cmp_u_f64_e64 s[6:7], v[4:5], v[31:32]
+; GFX9-NEXT: v_max_f64 v[4:5], v[4:5], v[31:32]
+; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:32
+; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:28
+; GFX9-NEXT: v_cndmask_b32_e64 v4, v4, 0, s[6:7]
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: v_cmp_u_f64_e64 s[8:9], v[6:7], v[31:32]
+; GFX9-NEXT: v_max_f64 v[6:7], v[6:7], v[31:32]
+; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:36
+; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:40
+; GFX9-NEXT: v_cndmask_b32_e64 v6, v6, 0, s[8:9]
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: v_cmp_u_f64_e64 s[10:11], v[8:9], v[31:32]
+; GFX9-NEXT: v_max_f64 v[8:9], v[8:9], v[31:32]
+; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:48
+; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:44
+; GFX9-NEXT: v_cndmask_b32_e64 v8, v8, 0, s[10:11]
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: v_cmp_u_f64_e64 s[12:13], v[10:11], v[31:32]
+; GFX9-NEXT: v_max_f64 v[10:11], v[10:11], v[31:32]
; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:56
; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:52
-; GFX9-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[33:34]
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v33, v4, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v5, v34, v5, vcc
-; GFX9-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:64
-; GFX9-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:60
-; GFX9-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[35:36]
-; GFX9-NEXT: v_cndmask_b32_e32 v6, v35, v6, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v7, v36, v7, vcc
-; GFX9-NEXT: v_cmp_class_f64_e64 vcc, v[8:9], 64
-; GFX9-NEXT: s_waitcnt vmcnt(6)
-; GFX9-NEXT: v_cmp_gt_f64_e64 s[4:5], v[8:9], v[37:38]
-; GFX9-NEXT: v_cmp_o_f64_e64 s[6:7], v[8:9], v[37:38]
-; GFX9-NEXT: v_cndmask_b32_e64 v50, v38, v9, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v35, v37, v8, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v36, v39, v50, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v35, 0, v35, s[6:7]
-; GFX9-NEXT: v_cmp_class_f64_e64 s[4:5], v[37:38], 64
-; GFX9-NEXT: v_cndmask_b32_e32 v8, v35, v8, vcc
-; GFX9-NEXT: v_cmp_eq_f64_e64 s[6:7], 0, v[35:36]
-; GFX9-NEXT: v_cndmask_b32_e32 v9, v36, v9, vcc
-; GFX9-NEXT: s_waitcnt vmcnt(4)
-; GFX9-NEXT: v_cmp_gt_f64_e32 vcc, v[10:11], v[48:49]
-; GFX9-NEXT: v_cmp_o_f64_e64 s[8:9], v[10:11], v[48:49]
-; GFX9-NEXT: v_cndmask_b32_e64 v8, v8, v37, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v9, v9, v38, s[4:5]
-; GFX9-NEXT: buffer_load_dword v37, off, s[0:3], s32 offset:68
-; GFX9-NEXT: buffer_load_dword v38, off, s[0:3], s32 offset:72
-; GFX9-NEXT: v_cndmask_b32_e64 v8, v35, v8, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e32 v50, v49, v11, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v35, v48, v10, vcc
-; GFX9-NEXT: v_cmp_class_f64_e64 vcc, v[10:11], 64
-; GFX9-NEXT: v_cmp_class_f64_e64 s[4:5], v[48:49], 64
-; GFX9-NEXT: v_cndmask_b32_e64 v51, v39, v50, s[8:9]
-; GFX9-NEXT: v_cndmask_b32_e64 v50, 0, v35, s[8:9]
-; GFX9-NEXT: v_cndmask_b32_e64 v9, v36, v9, s[6:7]
-; GFX9-NEXT: buffer_load_dword v36, off, s[0:3], s32 offset:80
-; GFX9-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:76
-; GFX9-NEXT: s_waitcnt vmcnt(6)
-; GFX9-NEXT: v_cmp_o_f64_e64 s[6:7], v[12:13], v[31:32]
-; GFX9-NEXT: v_cndmask_b32_e32 v10, v50, v10, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v11, v51, v11, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v10, v10, v48, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v11, v11, v49, s[4:5]
-; GFX9-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[50:51]
-; GFX9-NEXT: v_cmp_gt_f64_e64 s[4:5], v[12:13], v[31:32]
-; GFX9-NEXT: v_cndmask_b32_e32 v10, v50, v10, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v48, v32, v13, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e32 v11, v51, v11, vcc
-; GFX9-NEXT: v_cmp_class_f64_e64 vcc, v[12:13], 64
-; GFX9-NEXT: v_cndmask_b32_e64 v49, v39, v48, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v48, v31, v12, s[4:5]
-; GFX9-NEXT: v_cmp_class_f64_e64 s[4:5], v[31:32], 64
-; GFX9-NEXT: v_cndmask_b32_e64 v48, 0, v48, s[6:7]
-; GFX9-NEXT: s_waitcnt vmcnt(4)
-; GFX9-NEXT: v_cmp_o_f64_e64 s[6:7], v[14:15], v[33:34]
-; GFX9-NEXT: v_cndmask_b32_e32 v12, v48, v12, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v13, v49, v13, vcc
-; GFX9-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[48:49]
-; GFX9-NEXT: v_cndmask_b32_e64 v12, v12, v31, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v13, v13, v32, s[4:5]
+; GFX9-NEXT: v_cndmask_b32_e64 v10, v10, 0, s[12:13]
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: v_cmp_u_f64_e64 s[14:15], v[12:13], v[31:32]
+; GFX9-NEXT: v_max_f64 v[12:13], v[12:13], v[31:32]
+; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:64
+; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:60
+; GFX9-NEXT: v_cndmask_b32_e64 v12, v12, 0, s[14:15]
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: v_cmp_u_f64_e64 s[16:17], v[14:15], v[31:32]
+; GFX9-NEXT: v_max_f64 v[14:15], v[14:15], v[31:32]
+; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:68
+; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:72
+; GFX9-NEXT: v_cndmask_b32_e64 v14, v14, 0, s[16:17]
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: v_cmp_u_f64_e64 s[18:19], v[16:17], v[31:32]
+; GFX9-NEXT: v_max_f64 v[16:17], v[16:17], v[31:32]
+; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:80
+; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:76
+; GFX9-NEXT: v_cndmask_b32_e64 v16, v16, 0, s[18:19]
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: v_cmp_u_f64_e64 s[20:21], v[18:19], v[31:32]
+; GFX9-NEXT: v_max_f64 v[18:19], v[18:19], v[31:32]
; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:88
; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:84
-; GFX9-NEXT: v_cmp_gt_f64_e64 s[4:5], v[14:15], v[33:34]
-; GFX9-NEXT: v_cndmask_b32_e32 v12, v48, v12, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v13, v49, v13, vcc
-; GFX9-NEXT: v_cmp_class_f64_e64 vcc, v[14:15], 64
-; GFX9-NEXT: v_cndmask_b32_e64 v50, v34, v15, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v48, v33, v14, s[4:5]
-; GFX9-NEXT: v_cmp_class_f64_e64 s[4:5], v[33:34], 64
-; GFX9-NEXT: v_cndmask_b32_e64 v49, v39, v50, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v48, 0, v48, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e32 v14, v48, v14, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v15, v49, v15, vcc
-; GFX9-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[48:49]
-; GFX9-NEXT: v_cndmask_b32_e64 v14, v14, v33, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v15, v15, v34, s[4:5]
-; GFX9-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:96
-; GFX9-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:92
-; GFX9-NEXT: v_cndmask_b32_e32 v14, v48, v14, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v15, v49, v15, vcc
-; GFX9-NEXT: v_cmp_class_f64_e64 vcc, v[16:17], 64
-; GFX9-NEXT: s_waitcnt vmcnt(6)
-; GFX9-NEXT: v_cmp_gt_f64_e64 s[4:5], v[16:17], v[37:38]
-; GFX9-NEXT: v_cmp_o_f64_e64 s[6:7], v[16:17], v[37:38]
-; GFX9-NEXT: v_cndmask_b32_e64 v50, v38, v17, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v48, v37, v16, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v49, v39, v50, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v48, 0, v48, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e32 v16, v48, v16, vcc
-; GFX9-NEXT: v_cmp_class_f64_e64 s[4:5], v[37:38], 64
-; GFX9-NEXT: v_cndmask_b32_e32 v17, v49, v17, vcc
-; GFX9-NEXT: s_waitcnt vmcnt(4)
-; GFX9-NEXT: v_cmp_gt_f64_e32 vcc, v[18:19], v[35:36]
-; GFX9-NEXT: v_cmp_o_f64_e64 s[6:7], v[18:19], v[35:36]
-; GFX9-NEXT: v_cndmask_b32_e64 v16, v16, v37, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v17, v17, v38, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e32 v50, v36, v19, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v37, v35, v18, vcc
-; GFX9-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[48:49]
-; GFX9-NEXT: v_cmp_class_f64_e64 s[4:5], v[18:19], 64
-; GFX9-NEXT: v_cndmask_b32_e64 v51, v39, v50, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v50, 0, v37, s[6:7]
-; GFX9-NEXT: buffer_load_dword v37, off, s[0:3], s32 offset:100
-; GFX9-NEXT: buffer_load_dword v38, off, s[0:3], s32 offset:104
-; GFX9-NEXT: v_cndmask_b32_e32 v16, v48, v16, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v17, v49, v17, vcc
-; GFX9-NEXT: v_cmp_class_f64_e64 vcc, v[35:36], 64
-; GFX9-NEXT: v_cndmask_b32_e64 v18, v50, v18, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v19, v51, v19, s[4:5]
-; GFX9-NEXT: s_waitcnt vmcnt(4)
-; GFX9-NEXT: v_cmp_gt_f64_e64 s[4:5], v[20:21], v[31:32]
-; GFX9-NEXT: v_cmp_o_f64_e64 s[6:7], v[20:21], v[31:32]
-; GFX9-NEXT: v_cndmask_b32_e32 v18, v18, v35, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v19, v19, v36, vcc
-; GFX9-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[50:51]
-; GFX9-NEXT: v_cndmask_b32_e64 v48, v32, v21, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v35, v31, v20, s[4:5]
-; GFX9-NEXT: v_cmp_class_f64_e64 s[4:5], v[20:21], 64
-; GFX9-NEXT: v_cndmask_b32_e64 v36, v39, v48, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v35, 0, v35, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e32 v18, v50, v18, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v19, v51, v19, vcc
-; GFX9-NEXT: v_cmp_class_f64_e64 vcc, v[31:32], 64
-; GFX9-NEXT: buffer_load_dword v49, off, s[0:3], s32 offset:112
-; GFX9-NEXT: buffer_load_dword v48, off, s[0:3], s32 offset:108
-; GFX9-NEXT: buffer_load_dword v51, off, s[0:3], s32 offset:120
-; GFX9-NEXT: buffer_load_dword v50, off, s[0:3], s32 offset:116
-; GFX9-NEXT: v_cndmask_b32_e64 v20, v35, v20, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v21, v36, v21, s[4:5]
-; GFX9-NEXT: v_cmp_eq_f64_e64 s[4:5], 0, v[35:36]
-; GFX9-NEXT: s_waitcnt vmcnt(6)
-; GFX9-NEXT: v_cmp_gt_f64_e64 s[6:7], v[22:23], v[33:34]
-; GFX9-NEXT: v_cmp_o_f64_e64 s[8:9], v[22:23], v[33:34]
-; GFX9-NEXT: v_cndmask_b32_e32 v20, v20, v31, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v21, v21, v32, vcc
-; GFX9-NEXT: v_cmp_class_f64_e64 vcc, v[22:23], 64
-; GFX9-NEXT: v_cndmask_b32_e64 v20, v35, v20, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v21, v36, v21, s[4:5]
-; GFX9-NEXT: v_cmp_class_f64_e64 s[4:5], v[33:34], 64
-; GFX9-NEXT: v_cndmask_b32_e64 v31, v34, v23, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v36, v39, v31, s[8:9]
-; GFX9-NEXT: v_cndmask_b32_e64 v31, v33, v22, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v35, 0, v31, s[8:9]
-; GFX9-NEXT: v_cndmask_b32_e32 v22, v35, v22, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v23, v36, v23, vcc
-; GFX9-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[35:36]
-; GFX9-NEXT: v_cndmask_b32_e64 v22, v22, v33, s[4:5]
+; GFX9-NEXT: v_cndmask_b32_e64 v18, v18, 0, s[20:21]
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: v_cmp_u_f64_e64 s[22:23], v[20:21], v[31:32]
+; GFX9-NEXT: v_max_f64 v[20:21], v[20:21], v[31:32]
+; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:96
+; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:92
+; GFX9-NEXT: v_cndmask_b32_e64 v20, v20, 0, s[22:23]
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: v_cmp_u_f64_e64 s[24:25], v[22:23], v[31:32]
+; GFX9-NEXT: v_max_f64 v[22:23], v[22:23], v[31:32]
+; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:100
+; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:104
+; GFX9-NEXT: v_cndmask_b32_e64 v22, v22, 0, s[24:25]
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: v_cmp_u_f64_e64 s[26:27], v[24:25], v[31:32]
+; GFX9-NEXT: v_max_f64 v[24:25], v[24:25], v[31:32]
+; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:112
+; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:108
+; GFX9-NEXT: v_cndmask_b32_e64 v24, v24, 0, s[26:27]
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: v_cmp_u_f64_e64 s[28:29], v[26:27], v[31:32]
+; GFX9-NEXT: v_max_f64 v[26:27], v[26:27], v[31:32]
+; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:120
+; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:116
+; GFX9-NEXT: v_cndmask_b32_e64 v26, v26, 0, s[28:29]
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: v_cmp_u_f64_e64 s[30:31], v[28:29], v[31:32]
+; GFX9-NEXT: v_max_f64 v[28:29], v[28:29], v[31:32]
; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32
; GFX9-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:128
; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:124
-; GFX9-NEXT: v_cndmask_b32_e64 v23, v23, v34, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e32 v22, v35, v22, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v23, v36, v23, vcc
-; GFX9-NEXT: v_cmp_class_f64_e64 vcc, v[24:25], 64
-; GFX9-NEXT: s_waitcnt vmcnt(7)
-; GFX9-NEXT: v_cmp_gt_f64_e64 s[4:5], v[24:25], v[37:38]
-; GFX9-NEXT: v_cmp_o_f64_e64 s[6:7], v[24:25], v[37:38]
-; GFX9-NEXT: v_cndmask_b32_e64 v34, v38, v25, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v35, v39, v34, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v34, v37, v24, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v34, 0, v34, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e32 v24, v34, v24, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v25, v35, v25, vcc
-; GFX9-NEXT: v_cmp_class_f64_e64 vcc, v[37:38], 64
-; GFX9-NEXT: v_cmp_eq_f64_e64 s[4:5], 0, v[34:35]
-; GFX9-NEXT: v_cndmask_b32_e32 v24, v24, v37, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v25, v25, v38, vcc
-; GFX9-NEXT: v_cmp_class_f64_e64 vcc, v[26:27], 64
-; GFX9-NEXT: v_cndmask_b32_e64 v24, v34, v24, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v25, v35, v25, s[4:5]
-; GFX9-NEXT: s_waitcnt vmcnt(5)
-; GFX9-NEXT: v_cmp_gt_f64_e64 s[6:7], v[26:27], v[48:49]
-; GFX9-NEXT: v_cmp_o_f64_e64 s[8:9], v[26:27], v[48:49]
-; GFX9-NEXT: v_cmp_class_f64_e64 s[4:5], v[48:49], 64
-; GFX9-NEXT: s_waitcnt vmcnt(3)
-; GFX9-NEXT: v_cmp_o_f64_e64 s[10:11], v[28:29], v[50:51]
-; GFX9-NEXT: v_cndmask_b32_e64 v36, v49, v27, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v34, v48, v26, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v35, v39, v36, s[8:9]
-; GFX9-NEXT: v_cndmask_b32_e64 v34, 0, v34, s[8:9]
-; GFX9-NEXT: v_cmp_gt_f64_e64 s[8:9], v[28:29], v[50:51]
-; GFX9-NEXT: v_cndmask_b32_e32 v26, v34, v26, vcc
-; GFX9-NEXT: v_cmp_eq_f64_e64 s[6:7], 0, v[34:35]
-; GFX9-NEXT: v_cndmask_b32_e32 v27, v35, v27, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v26, v26, v48, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v27, v27, v49, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v36, v51, v29, s[8:9]
-; GFX9-NEXT: v_cndmask_b32_e64 v27, v35, v27, s[6:7]
+; GFX9-NEXT: v_cndmask_b32_e64 v28, v28, 0, s[30:31]
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: v_cmp_u_f64_e64 s[34:35], v[30:31], v[32:33]
+; GFX9-NEXT: v_max_f64 v[30:31], v[30:31], v[32:33]
+; GFX9-NEXT: v_mov_b32_e32 v32, 0x7ff80000
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v32, vcc
+; GFX9-NEXT: v_cndmask_b32_e64 v3, v3, v32, s[4:5]
+; GFX9-NEXT: v_cndmask_b32_e64 v5, v5, v32, s[6:7]
+; GFX9-NEXT: v_cndmask_b32_e64 v7, v7, v32, s[8:9]
+; GFX9-NEXT: v_cndmask_b32_e64 v9, v9, v32, s[10:11]
+; GFX9-NEXT: v_cndmask_b32_e64 v11, v11, v32, s[12:13]
+; GFX9-NEXT: v_cndmask_b32_e64 v13, v13, v32, s[14:15]
+; GFX9-NEXT: v_cndmask_b32_e64 v15, v15, v32, s[16:17]
+; GFX9-NEXT: v_cndmask_b32_e64 v17, v17, v32, s[18:19]
+; GFX9-NEXT: v_cndmask_b32_e64 v19, v19, v32, s[20:21]
+; GFX9-NEXT: v_cndmask_b32_e64 v21, v21, v32, s[22:23]
+; GFX9-NEXT: v_cndmask_b32_e64 v23, v23, v32, s[24:25]
+; GFX9-NEXT: v_cndmask_b32_e64 v25, v25, v32, s[26:27]
+; GFX9-NEXT: v_cndmask_b32_e64 v27, v27, v32, s[28:29]
+; GFX9-NEXT: v_cndmask_b32_e64 v29, v29, v32, s[30:31]
+; GFX9-NEXT: v_cndmask_b32_e64 v31, v31, v32, s[34:35]
+; GFX9-NEXT: v_cndmask_b32_e64 v30, v30, 0, s[34:35]
+; GFX9-NEXT: v_readlane_b32 s35, v34, 3
+; GFX9-NEXT: v_readlane_b32 s34, v34, 2
+; GFX9-NEXT: v_readlane_b32 s31, v34, 1
+; GFX9-NEXT: v_readlane_b32 s30, v34, 0
+; GFX9-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; GFX9-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:132 ; 4-byte Folded Reload
+; GFX9-NEXT: s_mov_b64 exec, s[4:5]
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: v_cmp_gt_f64_e32 vcc, v[30:31], v[32:33]
-; GFX9-NEXT: v_cmp_o_f64_e64 s[4:5], v[30:31], v[32:33]
-; GFX9-NEXT: v_cndmask_b32_e64 v35, v39, v36, s[10:11]
-; GFX9-NEXT: v_cndmask_b32_e64 v26, v34, v26, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v34, v50, v28, s[8:9]
-; GFX9-NEXT: v_cmp_class_f64_e64 s[6:7], v[28:29], 64
-; GFX9-NEXT: v_cndmask_b32_e64 v34, 0, v34, s[10:11]
-; GFX9-NEXT: v_cmp_class_f64_e64 s[8:9], v[50:51], 64
-; GFX9-NEXT: v_cndmask_b32_e32 v36, v33, v31, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v37, v39, v36, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e32 v36, v32, v30, vcc
-; GFX9-NEXT: v_cmp_class_f64_e64 vcc, v[30:31], 64
-; GFX9-NEXT: v_cndmask_b32_e64 v36, 0, v36, s[4:5]
-; GFX9-NEXT: v_cmp_class_f64_e64 s[4:5], v[32:33], 64
-; GFX9-NEXT: v_cmp_eq_f64_e64 s[10:11], 0, v[34:35]
-; GFX9-NEXT: v_cmp_eq_f64_e64 s[12:13], 0, v[36:37]
-; GFX9-NEXT: v_cndmask_b32_e64 v28, v34, v28, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v29, v35, v29, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v28, v28, v50, s[8:9]
-; GFX9-NEXT: v_cndmask_b32_e32 v30, v36, v30, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v31, v37, v31, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v30, v30, v32, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v29, v29, v51, s[8:9]
-; GFX9-NEXT: v_cndmask_b32_e64 v31, v31, v33, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v28, v34, v28, s[10:11]
-; GFX9-NEXT: v_cndmask_b32_e64 v29, v35, v29, s[10:11]
-; GFX9-NEXT: v_cndmask_b32_e64 v30, v36, v30, s[12:13]
-; GFX9-NEXT: v_cndmask_b32_e64 v31, v37, v31, s[12:13]
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX940-LABEL: v_maximum_v16f64:
; GFX940: ; %bb.0:
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_accvgpr_write_b32 a0, v40 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_write_b32 a1, v41 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_write_b32 a2, v42 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_write_b32 a3, v43 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_write_b32 a4, v44 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_write_b32 a5, v45 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_write_b32 a6, v46 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_write_b32 a7, v47 ; Reload Reuse
-; GFX940-NEXT: scratch_load_dword v41, off, s32 offset:8
-; GFX940-NEXT: scratch_load_dword v40, off, s32 offset:4
-; GFX940-NEXT: scratch_load_dword v51, off, s32 offset:16
-; GFX940-NEXT: scratch_load_dword v50, off, s32 offset:12
-; GFX940-NEXT: scratch_load_dword v45, off, s32 offset:24
-; GFX940-NEXT: scratch_load_dword v44, off, s32 offset:20
-; GFX940-NEXT: scratch_load_dword v47, off, s32 offset:32
-; GFX940-NEXT: scratch_load_dword v46, off, s32 offset:28
+; GFX940-NEXT: v_accvgpr_write_b32 a1, v40 ; Reload Reuse
+; GFX940-NEXT: v_accvgpr_write_b32 a2, v41 ; Reload Reuse
+; GFX940-NEXT: v_accvgpr_write_b32 a3, v42 ; Reload Reuse
+; GFX940-NEXT: v_accvgpr_write_b32 a4, v43 ; Reload Reuse
+; GFX940-NEXT: v_accvgpr_write_b32 a5, v44 ; Reload Reuse
+; GFX940-NEXT: v_accvgpr_write_b32 a6, v45 ; Reload Reuse
+; GFX940-NEXT: v_accvgpr_write_b32 a7, v46 ; Reload Reuse
+; GFX940-NEXT: v_accvgpr_write_b32 a8, v47 ; Reload Reuse
+; GFX940-NEXT: v_accvgpr_write_b32 a9, v56 ; Reload Reuse
+; GFX940-NEXT: v_accvgpr_write_b32 a10, v57 ; Reload Reuse
+; GFX940-NEXT: scratch_load_dword v37, off, s32 offset:16
+; GFX940-NEXT: scratch_load_dword v36, off, s32 offset:12
+; GFX940-NEXT: scratch_load_dword v39, off, s32 offset:24
+; GFX940-NEXT: scratch_load_dword v38, off, s32 offset:20
+; GFX940-NEXT: scratch_load_dword v49, off, s32 offset:32
+; GFX940-NEXT: scratch_load_dword v48, off, s32 offset:28
+; GFX940-NEXT: scratch_load_dword v57, off, s32 offset:8
+; GFX940-NEXT: scratch_load_dword v56, off, s32 offset:4
+; GFX940-NEXT: scratch_load_dword v47, off, s32 offset:40
+; GFX940-NEXT: scratch_load_dword v46, off, s32 offset:36
+; GFX940-NEXT: scratch_load_dword v45, off, s32 offset:48
+; GFX940-NEXT: scratch_load_dword v44, off, s32 offset:44
+; GFX940-NEXT: scratch_load_dword v43, off, s32 offset:56
+; GFX940-NEXT: scratch_load_dword v42, off, s32 offset:52
+; GFX940-NEXT: scratch_load_dword v41, off, s32 offset:64
+; GFX940-NEXT: scratch_load_dword v40, off, s32 offset:60
+; GFX940-NEXT: scratch_load_dword v55, off, s32 offset:72
+; GFX940-NEXT: scratch_load_dword v54, off, s32 offset:68
+; GFX940-NEXT: scratch_load_dword v53, off, s32 offset:80
+; GFX940-NEXT: scratch_load_dword v52, off, s32 offset:76
+; GFX940-NEXT: scratch_load_dword v51, off, s32 offset:88
+; GFX940-NEXT: scratch_load_dword v50, off, s32 offset:84
+; GFX940-NEXT: scratch_load_dword v35, off, s32 offset:96
+; GFX940-NEXT: scratch_load_dword v34, off, s32 offset:92
; GFX940-NEXT: scratch_load_dword v31, off, s32
-; GFX940-NEXT: scratch_load_dword v33, off, s32 offset:128
-; GFX940-NEXT: scratch_load_dword v32, off, s32 offset:124
-; GFX940-NEXT: scratch_load_dword v35, off, s32 offset:120
-; GFX940-NEXT: scratch_load_dword v34, off, s32 offset:116
-; GFX940-NEXT: scratch_load_dword v43, off, s32 offset:40
-; GFX940-NEXT: scratch_load_dword v42, off, s32 offset:36
+; GFX940-NEXT: scratch_load_dword v33, off, s32 offset:104
+; GFX940-NEXT: scratch_load_dword v32, off, s32 offset:100
+; GFX940-NEXT: v_accvgpr_write_b32 a11, v58 ; Reload Reuse
+; GFX940-NEXT: v_accvgpr_write_b32 a12, v59 ; Reload Reuse
+; GFX940-NEXT: v_accvgpr_write_b32 a13, v60 ; Reload Reuse
+; GFX940-NEXT: v_accvgpr_write_b32 a14, v61 ; Reload Reuse
+; GFX940-NEXT: v_accvgpr_write_b32 a15, v62 ; Reload Reuse
+; GFX940-NEXT: v_accvgpr_write_b32 a16, v63 ; Reload Reuse
+; GFX940-NEXT: s_waitcnt vmcnt(25)
+; GFX940-NEXT: v_max_f64 v[58:59], v[2:3], v[36:37]
+; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[36:37]
; GFX940-NEXT: scratch_load_dword v37, off, s32 offset:112
; GFX940-NEXT: scratch_load_dword v36, off, s32 offset:108
-; GFX940-NEXT: scratch_load_dword v39, off, s32 offset:104
-; GFX940-NEXT: scratch_load_dword v38, off, s32 offset:100
-; GFX940-NEXT: scratch_load_dword v49, off, s32 offset:96
-; GFX940-NEXT: scratch_load_dword v48, off, s32 offset:92
-; GFX940-NEXT: scratch_load_dword v53, off, s32 offset:56
-; GFX940-NEXT: scratch_load_dword v52, off, s32 offset:52
-; GFX940-NEXT: scratch_load_dword v55, off, s32 offset:48
-; GFX940-NEXT: scratch_load_dword v54, off, s32 offset:44
-; GFX940-NEXT: v_accvgpr_write_b32 a8, v56 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_write_b32 a9, v57 ; Reload Reuse
-; GFX940-NEXT: v_mov_b32_e32 v56, 0x7ff80000
-; GFX940-NEXT: v_accvgpr_write_b32 a11, v59 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_write_b32 a10, v58 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_write_b32 a12, v60 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_write_b32 a13, v61 ; Reload Reuse
+; GFX940-NEXT: s_waitcnt vmcnt(25)
+; GFX940-NEXT: v_max_f64 v[60:61], v[4:5], v[38:39]
+; GFX940-NEXT: v_cmp_u_f64_e64 s[0:1], v[4:5], v[38:39]
+; GFX940-NEXT: scratch_load_dword v39, off, s32 offset:120
+; GFX940-NEXT: scratch_load_dword v38, off, s32 offset:116
+; GFX940-NEXT: s_waitcnt vmcnt(25)
+; GFX940-NEXT: v_max_f64 v[62:63], v[6:7], v[48:49]
+; GFX940-NEXT: v_cmp_u_f64_e64 s[2:3], v[6:7], v[48:49]
+; GFX940-NEXT: scratch_load_dword v49, off, s32 offset:128
+; GFX940-NEXT: scratch_load_dword v48, off, s32 offset:124
+; GFX940-NEXT: s_waitcnt vmcnt(25)
+; GFX940-NEXT: v_max_f64 v[2:3], v[0:1], v[56:57]
+; GFX940-NEXT: v_cmp_u_f64_e64 s[4:5], v[0:1], v[56:57]
+; GFX940-NEXT: v_mov_b32_e32 v0, 0x7ff80000
; GFX940-NEXT: s_waitcnt vmcnt(23)
-; GFX940-NEXT: v_cmp_gt_f64_e32 vcc, v[0:1], v[40:41]
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v57, v41, v1, vcc
-; GFX940-NEXT: v_cmp_o_f64_e64 s[0:1], v[0:1], v[40:41]
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e64 v59, v56, v57, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v57, v40, v0, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v58, 0, v57, s[0:1]
-; GFX940-NEXT: v_cmp_class_f64_e64 vcc, v[0:1], 64
-; GFX940-NEXT: v_cmp_class_f64_e64 s[0:1], v[40:41], 64
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v58, v0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v59, v1, vcc
+; GFX940-NEXT: v_max_f64 v[56:57], v[8:9], v[46:47]
+; GFX940-NEXT: v_cndmask_b32_e64 v1, v2, 0, s[4:5]
+; GFX940-NEXT: v_accvgpr_write_b32 a0, v1
+; GFX940-NEXT: v_cndmask_b32_e64 v1, v3, v0, s[4:5]
+; GFX940-NEXT: v_cndmask_b32_e64 v2, v58, 0, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v3, v59, v0, vcc
+; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[8:9], v[46:47]
; GFX940-NEXT: s_waitcnt vmcnt(21)
-; GFX940-NEXT: v_cmp_gt_f64_e32 vcc, v[2:3], v[50:51]
-; GFX940-NEXT: v_cndmask_b32_e64 v0, v0, v40, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v1, v1, v41, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v57, v51, v3, vcc
-; GFX940-NEXT: v_cmp_o_f64_e64 s[0:1], v[2:3], v[50:51]
-; GFX940-NEXT: v_cndmask_b32_e32 v40, v50, v2, vcc
-; GFX940-NEXT: v_cmp_class_f64_e64 vcc, v[2:3], 64
-; GFX940-NEXT: v_cndmask_b32_e64 v61, v56, v57, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v60, 0, v40, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v60, v2, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v61, v3, vcc
-; GFX940-NEXT: v_cmp_class_f64_e64 vcc, v[50:51], 64
-; GFX940-NEXT: scratch_load_dword v41, off, s32 offset:64
-; GFX940-NEXT: scratch_load_dword v40, off, s32 offset:60
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v2, v50, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v3, v51, vcc
-; GFX940-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[58:59]
-; GFX940-NEXT: s_waitcnt vmcnt(21)
-; GFX940-NEXT: v_cmp_o_f64_e64 s[0:1], v[4:5], v[44:45]
-; GFX940-NEXT: scratch_load_dword v51, off, s32 offset:88
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v58, v0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v59, v1, vcc
-; GFX940-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[60:61]
+; GFX940-NEXT: v_max_f64 v[46:47], v[10:11], v[44:45]
+; GFX940-NEXT: v_cndmask_b32_e64 v4, v60, 0, s[0:1]
+; GFX940-NEXT: v_cndmask_b32_e64 v8, v56, 0, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v9, v57, v0, vcc
+; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[10:11], v[44:45]
+; GFX940-NEXT: s_waitcnt vmcnt(19)
+; GFX940-NEXT: v_max_f64 v[44:45], v[12:13], v[42:43]
+; GFX940-NEXT: v_cndmask_b32_e64 v5, v61, v0, s[0:1]
+; GFX940-NEXT: v_cndmask_b32_e64 v10, v46, 0, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v11, v47, v0, vcc
+; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[12:13], v[42:43]
+; GFX940-NEXT: s_waitcnt vmcnt(17)
+; GFX940-NEXT: v_max_f64 v[42:43], v[14:15], v[40:41]
+; GFX940-NEXT: v_cndmask_b32_e64 v6, v62, 0, s[2:3]
+; GFX940-NEXT: v_cndmask_b32_e64 v12, v44, 0, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v13, v45, v0, vcc
+; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[14:15], v[40:41]
+; GFX940-NEXT: s_waitcnt vmcnt(15)
+; GFX940-NEXT: v_max_f64 v[40:41], v[16:17], v[54:55]
+; GFX940-NEXT: v_cndmask_b32_e64 v7, v63, v0, s[2:3]
+; GFX940-NEXT: v_cndmask_b32_e64 v14, v42, 0, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v15, v43, v0, vcc
+; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[16:17], v[54:55]
+; GFX940-NEXT: s_waitcnt vmcnt(13)
+; GFX940-NEXT: v_max_f64 v[54:55], v[18:19], v[52:53]
+; GFX940-NEXT: v_accvgpr_read_b32 v63, a16 ; Reload Reuse
+; GFX940-NEXT: v_cndmask_b32_e64 v16, v40, 0, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v17, v41, v0, vcc
+; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[18:19], v[52:53]
+; GFX940-NEXT: s_waitcnt vmcnt(11)
+; GFX940-NEXT: v_max_f64 v[52:53], v[20:21], v[50:51]
+; GFX940-NEXT: v_accvgpr_read_b32 v62, a15 ; Reload Reuse
+; GFX940-NEXT: v_cndmask_b32_e64 v18, v54, 0, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v19, v55, v0, vcc
+; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[20:21], v[50:51]
+; GFX940-NEXT: s_waitcnt vmcnt(9)
+; GFX940-NEXT: v_max_f64 v[50:51], v[22:23], v[34:35]
+; GFX940-NEXT: v_accvgpr_read_b32 v61, a14 ; Reload Reuse
+; GFX940-NEXT: v_cndmask_b32_e64 v20, v52, 0, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v21, v53, v0, vcc
+; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[22:23], v[34:35]
+; GFX940-NEXT: s_waitcnt vmcnt(6)
+; GFX940-NEXT: v_max_f64 v[34:35], v[24:25], v[32:33]
+; GFX940-NEXT: v_accvgpr_read_b32 v60, a13 ; Reload Reuse
+; GFX940-NEXT: v_cndmask_b32_e64 v22, v50, 0, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v23, v51, v0, vcc
+; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[24:25], v[32:33]
+; GFX940-NEXT: v_accvgpr_read_b32 v59, a12 ; Reload Reuse
+; GFX940-NEXT: v_accvgpr_read_b32 v58, a11 ; Reload Reuse
+; GFX940-NEXT: v_cndmask_b32_e64 v24, v34, 0, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v25, v35, v0, vcc
+; GFX940-NEXT: v_accvgpr_read_b32 v57, a10 ; Reload Reuse
+; GFX940-NEXT: v_accvgpr_read_b32 v56, a9 ; Reload Reuse
+; GFX940-NEXT: v_accvgpr_read_b32 v47, a8 ; Reload Reuse
+; GFX940-NEXT: v_accvgpr_read_b32 v46, a7 ; Reload Reuse
+; GFX940-NEXT: v_accvgpr_read_b32 v45, a6 ; Reload Reuse
+; GFX940-NEXT: v_accvgpr_read_b32 v44, a5 ; Reload Reuse
+; GFX940-NEXT: v_accvgpr_read_b32 v43, a4 ; Reload Reuse
+; GFX940-NEXT: v_accvgpr_read_b32 v42, a3 ; Reload Reuse
+; GFX940-NEXT: v_accvgpr_read_b32 v41, a2 ; Reload Reuse
+; GFX940-NEXT: v_accvgpr_read_b32 v40, a1 ; Reload Reuse
+; GFX940-NEXT: s_waitcnt vmcnt(4)
+; GFX940-NEXT: v_max_f64 v[32:33], v[26:27], v[36:37]
+; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[26:27], v[36:37]
; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v60, v2, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v61, v3, vcc
-; GFX940-NEXT: v_cmp_gt_f64_e32 vcc, v[4:5], v[44:45]
-; GFX940-NEXT: v_accvgpr_read_b32 v61, a13 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_read_b32 v60, a12 ; Reload Reuse
-; GFX940-NEXT: v_cndmask_b32_e32 v50, v45, v5, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v59, v56, v50, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v50, v44, v4, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v58, 0, v50, s[0:1]
-; GFX940-NEXT: v_cmp_class_f64_e64 vcc, v[4:5], 64
-; GFX940-NEXT: v_cmp_class_f64_e64 s[0:1], v[44:45], 64
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v58, v4, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v5, v59, v5, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v4, v4, v44, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v5, v5, v45, s[0:1]
-; GFX940-NEXT: scratch_load_dword v45, off, s32 offset:72
-; GFX940-NEXT: scratch_load_dword v44, off, s32 offset:68
-; GFX940-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[58:59]
-; GFX940-NEXT: s_waitcnt vmcnt(22)
-; GFX940-NEXT: v_cmp_o_f64_e64 s[0:1], v[6:7], v[46:47]
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v58, v4, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v5, v59, v5, vcc
-; GFX940-NEXT: v_cmp_gt_f64_e32 vcc, v[6:7], v[46:47]
+; GFX940-NEXT: v_cndmask_b32_e64 v26, v32, 0, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v27, v33, v0, vcc
+; GFX940-NEXT: s_waitcnt vmcnt(2)
+; GFX940-NEXT: v_max_f64 v[32:33], v[28:29], v[38:39]
+; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[28:29], v[38:39]
; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v50, v47, v7, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v59, v56, v50, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v50, v46, v6, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v58, 0, v50, s[0:1]
-; GFX940-NEXT: v_cmp_class_f64_e64 vcc, v[6:7], 64
-; GFX940-NEXT: v_cmp_class_f64_e64 s[0:1], v[46:47], 64
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v6, v58, v6, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v7, v59, v7, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v6, v6, v46, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v7, v7, v47, s[0:1]
-; GFX940-NEXT: scratch_load_dword v47, off, s32 offset:80
-; GFX940-NEXT: scratch_load_dword v46, off, s32 offset:76
-; GFX940-NEXT: scratch_load_dword v50, off, s32 offset:84
-; GFX940-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[58:59]
-; GFX940-NEXT: s_waitcnt vmcnt(18)
-; GFX940-NEXT: v_cmp_o_f64_e64 s[0:1], v[8:9], v[42:43]
-; GFX940-NEXT: v_cndmask_b32_e32 v6, v58, v6, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v7, v59, v7, vcc
-; GFX940-NEXT: v_cmp_gt_f64_e32 vcc, v[8:9], v[42:43]
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v57, v43, v9, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v59, v56, v57, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v57, v42, v8, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v58, 0, v57, s[0:1]
-; GFX940-NEXT: v_cmp_class_f64_e64 vcc, v[8:9], 64
-; GFX940-NEXT: v_cmp_class_f64_e64 s[0:1], v[42:43], 64
-; GFX940-NEXT: v_cmp_eq_f64_e64 s[2:3], 0, v[58:59]
-; GFX940-NEXT: v_cndmask_b32_e32 v8, v58, v8, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v9, v59, v9, vcc
-; GFX940-NEXT: s_waitcnt vmcnt(8)
-; GFX940-NEXT: v_cmp_gt_f64_e32 vcc, v[10:11], v[54:55]
-; GFX940-NEXT: v_cndmask_b32_e64 v8, v8, v42, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v9, v9, v43, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v42, v55, v11, vcc
-; GFX940-NEXT: v_cmp_o_f64_e64 s[0:1], v[10:11], v[54:55]
-; GFX940-NEXT: v_cndmask_b32_e64 v8, v58, v8, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v9, v59, v9, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v43, v56, v42, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v42, v54, v10, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v42, 0, v42, s[0:1]
-; GFX940-NEXT: v_cmp_class_f64_e64 vcc, v[10:11], 64
-; GFX940-NEXT: v_cmp_class_f64_e64 s[0:1], v[54:55], 64
-; GFX940-NEXT: v_cmp_eq_f64_e64 s[2:3], 0, v[42:43]
-; GFX940-NEXT: v_cndmask_b32_e32 v10, v42, v10, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v11, v43, v11, vcc
-; GFX940-NEXT: v_cmp_gt_f64_e32 vcc, v[12:13], v[52:53]
-; GFX940-NEXT: v_cndmask_b32_e64 v10, v10, v54, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v11, v11, v55, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v54, v53, v13, vcc
-; GFX940-NEXT: v_cmp_o_f64_e64 s[0:1], v[12:13], v[52:53]
-; GFX940-NEXT: v_cndmask_b32_e64 v10, v42, v10, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v11, v43, v11, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v55, v56, v54, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v54, v52, v12, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v54, 0, v54, s[0:1]
-; GFX940-NEXT: v_cmp_class_f64_e64 vcc, v[12:13], 64
-; GFX940-NEXT: v_cmp_class_f64_e64 s[0:1], v[52:53], 64
-; GFX940-NEXT: v_cmp_eq_f64_e64 s[2:3], 0, v[54:55]
-; GFX940-NEXT: v_cndmask_b32_e32 v12, v54, v12, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v13, v55, v13, vcc
-; GFX940-NEXT: s_waitcnt vmcnt(6)
-; GFX940-NEXT: v_cmp_gt_f64_e32 vcc, v[14:15], v[40:41]
-; GFX940-NEXT: v_cndmask_b32_e64 v12, v12, v52, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v13, v13, v53, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v52, v41, v15, vcc
-; GFX940-NEXT: v_cmp_o_f64_e64 s[0:1], v[14:15], v[40:41]
-; GFX940-NEXT: v_cndmask_b32_e64 v12, v54, v12, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v13, v55, v13, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v53, v56, v52, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v52, v40, v14, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v52, 0, v52, s[0:1]
-; GFX940-NEXT: v_cmp_class_f64_e64 vcc, v[14:15], 64
-; GFX940-NEXT: v_cmp_class_f64_e64 s[0:1], v[40:41], 64
-; GFX940-NEXT: v_cmp_eq_f64_e64 s[2:3], 0, v[52:53]
-; GFX940-NEXT: v_cndmask_b32_e32 v14, v52, v14, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v14, v14, v40, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v15, v53, v15, vcc
-; GFX940-NEXT: s_waitcnt vmcnt(3)
-; GFX940-NEXT: v_cmp_gt_f64_e32 vcc, v[16:17], v[44:45]
-; GFX940-NEXT: v_cndmask_b32_e64 v14, v52, v14, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v15, v15, v41, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v52, v45, v17, vcc
-; GFX940-NEXT: v_cmp_o_f64_e64 s[0:1], v[16:17], v[44:45]
-; GFX940-NEXT: v_cndmask_b32_e64 v15, v53, v15, s[2:3]
-; GFX940-NEXT: v_accvgpr_read_b32 v59, a11 ; Reload Reuse
-; GFX940-NEXT: v_cndmask_b32_e64 v53, v56, v52, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v52, v44, v16, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v52, 0, v52, s[0:1]
-; GFX940-NEXT: v_cmp_class_f64_e64 vcc, v[16:17], 64
-; GFX940-NEXT: v_cmp_class_f64_e64 s[0:1], v[44:45], 64
-; GFX940-NEXT: v_cmp_eq_f64_e64 s[2:3], 0, v[52:53]
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v52, v16, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v16, v16, v44, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v17, v53, v17, vcc
-; GFX940-NEXT: s_waitcnt vmcnt(1)
-; GFX940-NEXT: v_cmp_gt_f64_e32 vcc, v[18:19], v[46:47]
-; GFX940-NEXT: v_cndmask_b32_e64 v16, v52, v16, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v17, v17, v45, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v52, v47, v19, vcc
-; GFX940-NEXT: v_cmp_o_f64_e64 s[0:1], v[18:19], v[46:47]
-; GFX940-NEXT: v_cndmask_b32_e64 v17, v53, v17, s[2:3]
-; GFX940-NEXT: v_accvgpr_read_b32 v58, a10 ; Reload Reuse
-; GFX940-NEXT: v_cndmask_b32_e64 v53, v56, v52, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v52, v46, v18, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v52, 0, v52, s[0:1]
-; GFX940-NEXT: v_cmp_class_f64_e64 vcc, v[18:19], 64
-; GFX940-NEXT: v_cmp_class_f64_e64 s[0:1], v[46:47], 64
-; GFX940-NEXT: v_cmp_eq_f64_e64 s[2:3], 0, v[52:53]
-; GFX940-NEXT: v_cndmask_b32_e32 v18, v52, v18, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v18, v18, v46, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v19, v53, v19, vcc
+; GFX940-NEXT: v_cndmask_b32_e64 v28, v32, 0, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v29, v33, v0, vcc
; GFX940-NEXT: s_waitcnt vmcnt(0)
-; GFX940-NEXT: v_cmp_gt_f64_e32 vcc, v[20:21], v[50:51]
-; GFX940-NEXT: v_cndmask_b32_e64 v18, v52, v18, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v19, v19, v47, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v52, v51, v21, vcc
-; GFX940-NEXT: v_cmp_o_f64_e64 s[0:1], v[20:21], v[50:51]
-; GFX940-NEXT: v_cndmask_b32_e64 v19, v53, v19, s[2:3]
-; GFX940-NEXT: v_accvgpr_read_b32 v57, a9 ; Reload Reuse
-; GFX940-NEXT: v_cndmask_b32_e64 v53, v56, v52, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v52, v50, v20, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v52, 0, v52, s[0:1]
-; GFX940-NEXT: v_cmp_class_f64_e64 vcc, v[20:21], 64
-; GFX940-NEXT: v_cmp_class_f64_e64 s[0:1], v[50:51], 64
-; GFX940-NEXT: v_cmp_eq_f64_e64 s[2:3], 0, v[52:53]
-; GFX940-NEXT: v_cndmask_b32_e32 v20, v52, v20, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v21, v53, v21, vcc
-; GFX940-NEXT: v_cmp_gt_f64_e32 vcc, v[22:23], v[48:49]
-; GFX940-NEXT: v_cndmask_b32_e64 v20, v20, v50, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v21, v21, v51, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v50, v49, v23, vcc
-; GFX940-NEXT: v_cmp_o_f64_e64 s[0:1], v[22:23], v[48:49]
-; GFX940-NEXT: v_cndmask_b32_e64 v20, v52, v20, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v21, v53, v21, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v51, v56, v50, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v50, v48, v22, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v50, 0, v50, s[0:1]
-; GFX940-NEXT: v_cmp_class_f64_e64 vcc, v[22:23], 64
-; GFX940-NEXT: v_cmp_class_f64_e64 s[0:1], v[48:49], 64
-; GFX940-NEXT: v_cmp_eq_f64_e64 s[2:3], 0, v[50:51]
-; GFX940-NEXT: v_cndmask_b32_e32 v22, v50, v22, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v23, v51, v23, vcc
-; GFX940-NEXT: v_cmp_gt_f64_e32 vcc, v[24:25], v[38:39]
-; GFX940-NEXT: v_cndmask_b32_e64 v22, v22, v48, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v23, v23, v49, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v48, v39, v25, vcc
-; GFX940-NEXT: v_cmp_o_f64_e64 s[0:1], v[24:25], v[38:39]
-; GFX940-NEXT: v_cndmask_b32_e64 v22, v50, v22, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v23, v51, v23, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v49, v56, v48, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v48, v38, v24, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v48, 0, v48, s[0:1]
-; GFX940-NEXT: v_cmp_class_f64_e64 vcc, v[24:25], 64
-; GFX940-NEXT: v_cmp_class_f64_e64 s[0:1], v[38:39], 64
-; GFX940-NEXT: v_cmp_eq_f64_e64 s[2:3], 0, v[48:49]
-; GFX940-NEXT: v_cndmask_b32_e32 v24, v48, v24, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v25, v49, v25, vcc
-; GFX940-NEXT: v_cmp_gt_f64_e32 vcc, v[26:27], v[36:37]
-; GFX940-NEXT: v_cndmask_b32_e64 v24, v24, v38, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v25, v25, v39, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v38, v37, v27, vcc
-; GFX940-NEXT: v_cmp_o_f64_e64 s[0:1], v[26:27], v[36:37]
-; GFX940-NEXT: v_cndmask_b32_e64 v24, v48, v24, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v25, v49, v25, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v39, v56, v38, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v38, v36, v26, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v38, 0, v38, s[0:1]
-; GFX940-NEXT: v_cmp_class_f64_e64 vcc, v[26:27], 64
-; GFX940-NEXT: v_cmp_class_f64_e64 s[0:1], v[36:37], 64
-; GFX940-NEXT: v_cmp_eq_f64_e64 s[2:3], 0, v[38:39]
-; GFX940-NEXT: v_cndmask_b32_e32 v26, v38, v26, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v27, v39, v27, vcc
-; GFX940-NEXT: v_cmp_gt_f64_e32 vcc, v[28:29], v[34:35]
-; GFX940-NEXT: v_cndmask_b32_e64 v26, v26, v36, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v27, v27, v37, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v36, v35, v29, vcc
-; GFX940-NEXT: v_cmp_o_f64_e64 s[0:1], v[28:29], v[34:35]
-; GFX940-NEXT: v_cndmask_b32_e64 v26, v38, v26, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v27, v39, v27, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v37, v56, v36, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v36, v34, v28, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v36, 0, v36, s[0:1]
-; GFX940-NEXT: v_cmp_class_f64_e64 vcc, v[28:29], 64
-; GFX940-NEXT: v_cmp_class_f64_e64 s[0:1], v[34:35], 64
-; GFX940-NEXT: v_cmp_eq_f64_e64 s[2:3], 0, v[36:37]
-; GFX940-NEXT: v_cndmask_b32_e32 v28, v36, v28, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v29, v37, v29, vcc
-; GFX940-NEXT: v_cmp_gt_f64_e32 vcc, v[30:31], v[32:33]
-; GFX940-NEXT: v_cndmask_b32_e64 v28, v28, v34, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v29, v29, v35, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v34, v33, v31, vcc
-; GFX940-NEXT: v_cmp_o_f64_e64 s[0:1], v[30:31], v[32:33]
-; GFX940-NEXT: v_cndmask_b32_e64 v28, v36, v28, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v29, v37, v29, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v35, v56, v34, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v34, v32, v30, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v34, 0, v34, s[0:1]
-; GFX940-NEXT: v_cmp_class_f64_e64 vcc, v[30:31], 64
-; GFX940-NEXT: v_cmp_class_f64_e64 s[0:1], v[32:33], 64
-; GFX940-NEXT: v_cmp_eq_f64_e64 s[2:3], 0, v[34:35]
-; GFX940-NEXT: v_cndmask_b32_e32 v30, v34, v30, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v31, v35, v31, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v30, v30, v32, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v31, v31, v33, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v30, v34, v30, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v31, v35, v31, s[2:3]
-; GFX940-NEXT: v_accvgpr_read_b32 v56, a8 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_read_b32 v47, a7 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_read_b32 v46, a6 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_read_b32 v45, a5 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_read_b32 v44, a4 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_read_b32 v43, a3 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_read_b32 v42, a2 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_read_b32 v41, a1 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_read_b32 v40, a0 ; Reload Reuse
+; GFX940-NEXT: v_max_f64 v[32:33], v[30:31], v[48:49]
+; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[30:31], v[48:49]
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e64 v30, v32, 0, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v31, v33, v0, vcc
+; GFX940-NEXT: v_accvgpr_read_b32 v0, a0
; GFX940-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximum_v16f64:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_clause 0x20
+; GFX10-NEXT: s_clause 0x19
+; GFX10-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:16
+; GFX10-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:12
+; GFX10-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:24
+; GFX10-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:20
+; GFX10-NEXT: buffer_load_dword v36, off, s[0:3], s32 offset:32
+; GFX10-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:28
+; GFX10-NEXT: buffer_load_dword v37, off, s[0:3], s32 offset:36
+; GFX10-NEXT: buffer_load_dword v48, off, s[0:3], s32 offset:68
+; GFX10-NEXT: buffer_load_dword v51, off, s[0:3], s32 offset:64
+; GFX10-NEXT: buffer_load_dword v50, off, s[0:3], s32 offset:60
+; GFX10-NEXT: buffer_load_dword v53, off, s[0:3], s32 offset:56
+; GFX10-NEXT: buffer_load_dword v52, off, s[0:3], s32 offset:52
+; GFX10-NEXT: buffer_load_dword v55, off, s[0:3], s32 offset:48
+; GFX10-NEXT: buffer_load_dword v54, off, s[0:3], s32 offset:44
+; GFX10-NEXT: buffer_load_dword v38, off, s[0:3], s32 offset:40
; GFX10-NEXT: buffer_load_dword v65, off, s[0:3], s32 offset:8
; GFX10-NEXT: buffer_load_dword v64, off, s[0:3], s32 offset:4
-; GFX10-NEXT: buffer_load_dword v55, off, s[0:3], s32 offset:16
-; GFX10-NEXT: buffer_load_dword v54, off, s[0:3], s32 offset:12
-; GFX10-NEXT: buffer_load_dword v53, off, s[0:3], s32 offset:24
-; GFX10-NEXT: buffer_load_dword v52, off, s[0:3], s32 offset:20
-; GFX10-NEXT: buffer_load_dword v51, off, s[0:3], s32 offset:32
-; GFX10-NEXT: buffer_load_dword v50, off, s[0:3], s32 offset:28
-; GFX10-NEXT: buffer_load_dword v48, off, s[0:3], s32 offset:36
-; GFX10-NEXT: buffer_load_dword v49, off, s[0:3], s32 offset:40
-; GFX10-NEXT: buffer_load_dword v37, off, s[0:3], s32 offset:56
-; GFX10-NEXT: buffer_load_dword v36, off, s[0:3], s32 offset:52
-; GFX10-NEXT: buffer_load_dword v39, off, s[0:3], s32 offset:48
-; GFX10-NEXT: buffer_load_dword v38, off, s[0:3], s32 offset:44
-; GFX10-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:64
-; GFX10-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:60
-; GFX10-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:68
-; GFX10-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:72
-; GFX10-NEXT: buffer_load_dword v83, off, s[0:3], s32 offset:80
-; GFX10-NEXT: buffer_load_dword v82, off, s[0:3], s32 offset:76
-; GFX10-NEXT: buffer_load_dword v67, off, s[0:3], s32 offset:88
-; GFX10-NEXT: buffer_load_dword v66, off, s[0:3], s32 offset:84
+; GFX10-NEXT: buffer_load_dword v66, off, s[0:3], s32 offset:100
; GFX10-NEXT: buffer_load_dword v69, off, s[0:3], s32 offset:96
; GFX10-NEXT: buffer_load_dword v68, off, s[0:3], s32 offset:92
-; GFX10-NEXT: buffer_load_dword v70, off, s[0:3], s32 offset:100
-; GFX10-NEXT: buffer_load_dword v71, off, s[0:3], s32 offset:104
-; GFX10-NEXT: buffer_load_dword v81, off, s[0:3], s32 offset:112
-; GFX10-NEXT: buffer_load_dword v80, off, s[0:3], s32 offset:108
-; GFX10-NEXT: buffer_load_dword v85, off, s[0:3], s32 offset:120
-; GFX10-NEXT: buffer_load_dword v84, off, s[0:3], s32 offset:116
+; GFX10-NEXT: buffer_load_dword v71, off, s[0:3], s32 offset:88
+; GFX10-NEXT: buffer_load_dword v70, off, s[0:3], s32 offset:84
+; GFX10-NEXT: buffer_load_dword v81, off, s[0:3], s32 offset:80
+; GFX10-NEXT: buffer_load_dword v80, off, s[0:3], s32 offset:76
+; GFX10-NEXT: buffer_load_dword v49, off, s[0:3], s32 offset:72
+; GFX10-NEXT: buffer_load_dword v67, off, s[0:3], s32 offset:104
+; GFX10-NEXT: s_waitcnt vmcnt(24)
+; GFX10-NEXT: v_max_f64 v[82:83], v[2:3], v[31:32]
+; GFX10-NEXT: v_cmp_u_f64_e32 vcc_lo, v[2:3], v[31:32]
+; GFX10-NEXT: s_waitcnt vmcnt(22)
+; GFX10-NEXT: v_max_f64 v[84:85], v[4:5], v[33:34]
+; GFX10-NEXT: v_cmp_u_f64_e64 s4, v[4:5], v[33:34]
+; GFX10-NEXT: s_clause 0x3
+; GFX10-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:120
+; GFX10-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:116
+; GFX10-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:112
+; GFX10-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:108
+; GFX10-NEXT: s_waitcnt vmcnt(24)
+; GFX10-NEXT: v_max_f64 v[32:33], v[6:7], v[35:36]
+; GFX10-NEXT: v_cmp_u_f64_e64 s5, v[6:7], v[35:36]
+; GFX10-NEXT: s_clause 0x2
; GFX10-NEXT: buffer_load_dword v31, off, s[0:3], s32
-; GFX10-NEXT: buffer_load_dword v87, off, s[0:3], s32 offset:128
-; GFX10-NEXT: buffer_load_dword v86, off, s[0:3], s32 offset:124
-; GFX10-NEXT: v_cmp_class_f64_e64 s10, v[0:1], 64
-; GFX10-NEXT: v_cmp_class_f64_e64 s12, v[2:3], 64
-; GFX10-NEXT: v_cmp_class_f64_e64 s17, v[10:11], 64
-; GFX10-NEXT: s_waitcnt vmcnt(31)
-; GFX10-NEXT: v_cmp_gt_f64_e32 vcc_lo, v[0:1], v[64:65]
-; GFX10-NEXT: v_cmp_o_f64_e64 s4, v[0:1], v[64:65]
-; GFX10-NEXT: s_waitcnt vmcnt(29)
-; GFX10-NEXT: v_cmp_gt_f64_e64 s5, v[2:3], v[54:55]
-; GFX10-NEXT: v_cmp_o_f64_e64 s6, v[2:3], v[54:55]
-; GFX10-NEXT: s_waitcnt vmcnt(27)
-; GFX10-NEXT: v_cmp_gt_f64_e64 s7, v[4:5], v[52:53]
-; GFX10-NEXT: v_cmp_o_f64_e64 s8, v[4:5], v[52:53]
-; GFX10-NEXT: s_waitcnt vmcnt(25)
-; GFX10-NEXT: v_cmp_gt_f64_e64 s9, v[6:7], v[50:51]
-; GFX10-NEXT: v_cmp_o_f64_e64 s11, v[6:7], v[50:51]
+; GFX10-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:128
+; GFX10-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:124
; GFX10-NEXT: s_waitcnt vmcnt(23)
-; GFX10-NEXT: v_cmp_gt_f64_e64 s13, v[8:9], v[48:49]
-; GFX10-NEXT: v_cmp_class_f64_e64 s14, v[64:65], 64
+; GFX10-NEXT: v_cmp_u_f64_e64 s10, v[14:15], v[50:51]
; GFX10-NEXT: s_waitcnt vmcnt(21)
-; GFX10-NEXT: v_cmp_gt_f64_e64 s15, v[12:13], v[36:37]
-; GFX10-NEXT: s_waitcnt vmcnt(17)
-; GFX10-NEXT: v_cmp_o_f64_e64 s16, v[14:15], v[34:35]
-; GFX10-NEXT: v_cndmask_b32_e32 v96, v64, v0, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v97, v54, v2, s5
-; GFX10-NEXT: v_cndmask_b32_e64 v99, v55, v3, s5
-; GFX10-NEXT: v_cndmask_b32_e64 v100, v52, v4, s7
-; GFX10-NEXT: v_cndmask_b32_e64 v96, 0, v96, s4
-; GFX10-NEXT: v_cndmask_b32_e64 v101, v50, v6, s9
-; GFX10-NEXT: v_cndmask_b32_e64 v98, 0, v97, s6
-; GFX10-NEXT: v_cndmask_b32_e32 v97, v65, v1, vcc_lo
-; GFX10-NEXT: v_cmp_class_f64_e64 vcc_lo, v[54:55], 64
-; GFX10-NEXT: v_cndmask_b32_e64 v0, v96, v0, s10
-; GFX10-NEXT: v_cndmask_b32_e64 v99, 0x7ff80000, v99, s6
-; GFX10-NEXT: v_cndmask_b32_e64 v2, v98, v2, s12
-; GFX10-NEXT: v_cndmask_b32_e64 v97, 0x7ff80000, v97, s4
-; GFX10-NEXT: v_cndmask_b32_e64 v100, 0, v100, s8
-; GFX10-NEXT: v_cndmask_b32_e64 v102, 0, v101, s11
-; GFX10-NEXT: v_cndmask_b32_e64 v3, v99, v3, s12
-; GFX10-NEXT: v_cmp_class_f64_e64 s12, v[6:7], 64
-; GFX10-NEXT: v_cndmask_b32_e64 v1, v97, v1, s10
-; GFX10-NEXT: v_cmp_class_f64_e64 s10, v[4:5], 64
-; GFX10-NEXT: v_cmp_gt_f64_e64 s4, v[10:11], v[38:39]
-; GFX10-NEXT: v_cndmask_b32_e64 v112, v48, v8, s13
-; GFX10-NEXT: v_cmp_o_f64_e64 s5, v[12:13], v[36:37]
-; GFX10-NEXT: v_cmp_gt_f64_e64 s6, v[14:15], v[34:35]
-; GFX10-NEXT: v_cndmask_b32_e64 v0, v0, v64, s14
-; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, v65, s14
-; GFX10-NEXT: v_cmp_class_f64_e64 s14, v[52:53], 64
-; GFX10-NEXT: v_cndmask_b32_e64 v113, v36, v12, s15
+; GFX10-NEXT: v_cmp_u_f64_e64 s9, v[12:13], v[52:53]
+; GFX10-NEXT: s_waitcnt vmcnt(19)
+; GFX10-NEXT: v_cmp_u_f64_e64 s7, v[10:11], v[54:55]
+; GFX10-NEXT: s_waitcnt vmcnt(18)
+; GFX10-NEXT: v_max_f64 v[34:35], v[8:9], v[37:38]
+; GFX10-NEXT: v_cmp_u_f64_e64 s6, v[8:9], v[37:38]
+; GFX10-NEXT: s_waitcnt vmcnt(16)
+; GFX10-NEXT: v_max_f64 v[8:9], v[0:1], v[64:65]
+; GFX10-NEXT: v_max_f64 v[36:37], v[10:11], v[54:55]
+; GFX10-NEXT: v_cmp_u_f64_e64 s8, v[0:1], v[64:65]
+; GFX10-NEXT: v_max_f64 v[38:39], v[12:13], v[52:53]
+; GFX10-NEXT: v_max_f64 v[52:53], v[14:15], v[50:51]
+; GFX10-NEXT: s_waitcnt vmcnt(11)
+; GFX10-NEXT: v_max_f64 v[54:55], v[20:21], v[70:71]
+; GFX10-NEXT: v_cmp_u_f64_e64 s13, v[20:21], v[70:71]
+; GFX10-NEXT: s_waitcnt vmcnt(9)
+; GFX10-NEXT: v_cmp_u_f64_e64 s12, v[18:19], v[80:81]
+; GFX10-NEXT: s_waitcnt vmcnt(8)
+; GFX10-NEXT: v_max_f64 v[50:51], v[16:17], v[48:49]
+; GFX10-NEXT: v_cmp_u_f64_e64 s11, v[16:17], v[48:49]
+; GFX10-NEXT: v_max_f64 v[48:49], v[18:19], v[80:81]
+; GFX10-NEXT: v_max_f64 v[64:65], v[22:23], v[68:69]
+; GFX10-NEXT: v_cmp_u_f64_e64 s14, v[22:23], v[68:69]
+; GFX10-NEXT: s_waitcnt vmcnt(7)
+; GFX10-NEXT: v_max_f64 v[68:69], v[24:25], v[66:67]
+; GFX10-NEXT: v_cmp_u_f64_e64 s15, v[24:25], v[66:67]
+; GFX10-NEXT: v_cndmask_b32_e64 v10, v36, 0, s7
+; GFX10-NEXT: v_cndmask_b32_e64 v0, v8, 0, s8
+; GFX10-NEXT: v_cndmask_b32_e64 v1, v9, 0x7ff80000, s8
+; GFX10-NEXT: v_cndmask_b32_e64 v8, v34, 0, s6
+; GFX10-NEXT: v_cndmask_b32_e64 v9, v35, 0x7ff80000, s6
+; GFX10-NEXT: v_cndmask_b32_e64 v11, v37, 0x7ff80000, s7
+; GFX10-NEXT: v_cndmask_b32_e64 v12, v38, 0, s9
+; GFX10-NEXT: v_cndmask_b32_e64 v13, v39, 0x7ff80000, s9
+; GFX10-NEXT: v_cndmask_b32_e64 v14, v52, 0, s10
+; GFX10-NEXT: v_cndmask_b32_e64 v15, v53, 0x7ff80000, s10
+; GFX10-NEXT: v_cndmask_b32_e64 v16, v50, 0, s11
+; GFX10-NEXT: v_cndmask_b32_e64 v17, v51, 0x7ff80000, s11
+; GFX10-NEXT: v_cndmask_b32_e64 v18, v48, 0, s12
+; GFX10-NEXT: v_cndmask_b32_e64 v19, v49, 0x7ff80000, s12
+; GFX10-NEXT: v_cndmask_b32_e64 v20, v54, 0, s13
+; GFX10-NEXT: v_cndmask_b32_e64 v21, v55, 0x7ff80000, s13
+; GFX10-NEXT: v_cndmask_b32_e64 v22, v64, 0, s14
+; GFX10-NEXT: v_cndmask_b32_e64 v23, v65, 0x7ff80000, s14
+; GFX10-NEXT: v_cndmask_b32_e64 v24, v68, 0, s15
+; GFX10-NEXT: v_cndmask_b32_e64 v25, v69, 0x7ff80000, s15
+; GFX10-NEXT: s_waitcnt vmcnt(5)
+; GFX10-NEXT: v_max_f64 v[70:71], v[28:29], v[2:3]
+; GFX10-NEXT: v_cmp_u_f64_e64 s17, v[28:29], v[2:3]
+; GFX10-NEXT: s_waitcnt vmcnt(3)
+; GFX10-NEXT: v_max_f64 v[66:67], v[26:27], v[4:5]
+; GFX10-NEXT: v_cmp_u_f64_e64 s16, v[26:27], v[4:5]
+; GFX10-NEXT: v_cndmask_b32_e64 v2, v82, 0, vcc_lo
; GFX10-NEXT: s_waitcnt vmcnt(0)
-; GFX10-NEXT: v_cmp_o_f64_e64 s18, v[30:31], v[86:87]
-; GFX10-NEXT: v_cndmask_b32_e32 v2, v2, v54, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v54, v53, v5, s7
-; GFX10-NEXT: v_cndmask_b32_e32 v3, v3, v55, vcc_lo
-; GFX10-NEXT: v_cmp_class_f64_e64 vcc_lo, v[50:51], 64
-; GFX10-NEXT: v_cndmask_b32_e64 v55, v51, v7, s9
-; GFX10-NEXT: v_cmp_o_f64_e64 s9, v[8:9], v[48:49]
-; GFX10-NEXT: v_cndmask_b32_e64 v101, 0x7ff80000, v54, s8
-; GFX10-NEXT: v_cmp_gt_f64_e64 s7, v[16:17], v[32:33]
-; GFX10-NEXT: v_cndmask_b32_e64 v6, v102, v6, s12
-; GFX10-NEXT: v_cndmask_b32_e64 v103, 0x7ff80000, v55, s11
-; GFX10-NEXT: v_cndmask_b32_e64 v4, v100, v4, s10
-; GFX10-NEXT: v_cndmask_b32_e64 v5, v101, v5, s10
-; GFX10-NEXT: v_cmp_class_f64_e64 s10, v[8:9], 64
-; GFX10-NEXT: v_cmp_o_f64_e64 s11, v[10:11], v[38:39]
-; GFX10-NEXT: v_cndmask_b32_e64 v7, v103, v7, s12
-; GFX10-NEXT: v_cmp_class_f64_e64 s12, v[48:49], 64
-; GFX10-NEXT: v_cndmask_b32_e64 v114, v38, v10, s4
-; GFX10-NEXT: v_cndmask_b32_e64 v115, v34, v14, s6
-; GFX10-NEXT: v_cmp_o_f64_e64 s8, v[16:17], v[32:33]
-; GFX10-NEXT: v_cndmask_b32_e64 v4, v4, v52, s14
-; GFX10-NEXT: v_cndmask_b32_e64 v5, v5, v53, s14
-; GFX10-NEXT: v_cmp_gt_f64_e64 s14, v[18:19], v[82:83]
-; GFX10-NEXT: v_cndmask_b32_e64 v52, 0, v115, s16
-; GFX10-NEXT: v_cndmask_b32_e32 v6, v6, v50, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v50, v49, v9, s13
-; GFX10-NEXT: v_cndmask_b32_e32 v7, v7, v51, vcc_lo
-; GFX10-NEXT: v_cmp_class_f64_e64 vcc_lo, v[38:39], 64
-; GFX10-NEXT: v_cndmask_b32_e64 v54, 0, v112, s9
-; GFX10-NEXT: v_cndmask_b32_e64 v51, v39, v11, s4
-; GFX10-NEXT: v_cndmask_b32_e64 v55, 0x7ff80000, v50, s9
-; GFX10-NEXT: v_cndmask_b32_e64 v50, 0, v113, s5
-; GFX10-NEXT: v_cmp_o_f64_e64 s4, v[18:19], v[82:83]
-; GFX10-NEXT: v_cndmask_b32_e64 v8, v54, v8, s10
-; GFX10-NEXT: v_cndmask_b32_e64 v64, 0, v114, s11
-; GFX10-NEXT: v_cndmask_b32_e64 v9, v55, v9, s10
-; GFX10-NEXT: v_cmp_class_f64_e64 s10, v[12:13], 64
-; GFX10-NEXT: v_cndmask_b32_e64 v65, 0x7ff80000, v51, s11
-; GFX10-NEXT: v_cndmask_b32_e64 v8, v8, v48, s12
-; GFX10-NEXT: v_cndmask_b32_e64 v10, v64, v10, s17
-; GFX10-NEXT: v_cndmask_b32_e64 v9, v9, v49, s12
-; GFX10-NEXT: v_cmp_class_f64_e64 s12, v[14:15], 64
-; GFX10-NEXT: v_cndmask_b32_e64 v11, v65, v11, s17
-; GFX10-NEXT: v_cndmask_b32_e64 v48, v37, v13, s15
-; GFX10-NEXT: v_cmp_class_f64_e64 s17, v[34:35], 64
-; GFX10-NEXT: v_cmp_gt_f64_e64 s9, v[20:21], v[66:67]
-; GFX10-NEXT: v_cmp_o_f64_e64 s11, v[20:21], v[66:67]
-; GFX10-NEXT: v_cndmask_b32_e64 v116, v32, v16, s7
-; GFX10-NEXT: v_cndmask_b32_e64 v51, 0x7ff80000, v48, s5
-; GFX10-NEXT: v_cndmask_b32_e32 v10, v10, v38, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e32 v11, v11, v39, vcc_lo
-; GFX10-NEXT: v_cmp_gt_f64_e32 vcc_lo, v[22:23], v[68:69]
-; GFX10-NEXT: v_cndmask_b32_e64 v38, v35, v15, s6
-; GFX10-NEXT: v_cndmask_b32_e64 v49, v82, v18, s14
-; GFX10-NEXT: v_cndmask_b32_e64 v48, 0, v116, s8
-; GFX10-NEXT: v_cmp_class_f64_e64 s13, v[36:37], 64
-; GFX10-NEXT: v_cmp_o_f64_e64 s5, v[22:23], v[68:69]
-; GFX10-NEXT: v_cndmask_b32_e64 v53, 0x7ff80000, v38, s16
-; GFX10-NEXT: v_cndmask_b32_e64 v12, v50, v12, s10
-; GFX10-NEXT: v_cndmask_b32_e64 v13, v51, v13, s10
-; GFX10-NEXT: v_cmp_class_f64_e64 s10, v[16:17], 64
-; GFX10-NEXT: v_cndmask_b32_e64 v38, 0, v49, s4
-; GFX10-NEXT: v_cndmask_b32_e64 v112, v83, v19, s14
-; GFX10-NEXT: v_cndmask_b32_e64 v14, v52, v14, s12
-; GFX10-NEXT: v_cndmask_b32_e64 v15, v53, v15, s12
-; GFX10-NEXT: v_cmp_class_f64_e64 s12, v[32:33], 64
-; GFX10-NEXT: v_cmp_class_f64_e64 s14, v[18:19], 64
-; GFX10-NEXT: v_cndmask_b32_e64 v114, v67, v21, s9
-; GFX10-NEXT: v_cndmask_b32_e64 v14, v14, v34, s17
-; GFX10-NEXT: v_cndmask_b32_e64 v15, v15, v35, s17
-; GFX10-NEXT: v_cndmask_b32_e64 v34, v33, v17, s7
-; GFX10-NEXT: v_cndmask_b32_e64 v39, 0x7ff80000, v112, s4
-; GFX10-NEXT: v_cmp_gt_f64_e64 s4, v[24:25], v[70:71]
-; GFX10-NEXT: v_cndmask_b32_e32 v113, v69, v23, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e32 v35, v68, v22, vcc_lo
-; GFX10-NEXT: v_cmp_class_f64_e64 vcc_lo, v[20:21], 64
-; GFX10-NEXT: v_cndmask_b32_e64 v49, 0x7ff80000, v34, s8
-; GFX10-NEXT: v_cndmask_b32_e64 v34, 0x7ff80000, v114, s11
-; GFX10-NEXT: v_cndmask_b32_e64 v12, v12, v36, s13
-; GFX10-NEXT: v_cndmask_b32_e64 v13, v13, v37, s13
-; GFX10-NEXT: v_cmp_class_f64_e64 s13, v[82:83], 64
-; GFX10-NEXT: v_cmp_o_f64_e64 s6, v[24:25], v[70:71]
-; GFX10-NEXT: v_cndmask_b32_e64 v16, v48, v16, s10
-; GFX10-NEXT: v_cndmask_b32_e64 v17, v49, v17, s10
-; GFX10-NEXT: v_cndmask_b32_e64 v36, 0x7ff80000, v113, s5
-; GFX10-NEXT: v_cndmask_b32_e64 v35, 0, v35, s5
-; GFX10-NEXT: v_cmp_gt_f64_e64 s7, v[26:27], v[80:81]
-; GFX10-NEXT: v_cndmask_b32_e64 v16, v16, v32, s12
-; GFX10-NEXT: v_cndmask_b32_e64 v32, v66, v20, s9
-; GFX10-NEXT: v_cndmask_b32_e64 v17, v17, v33, s12
-; GFX10-NEXT: v_cndmask_b32_e64 v18, v38, v18, s14
-; GFX10-NEXT: v_cndmask_b32_e64 v19, v39, v19, s14
-; GFX10-NEXT: v_cmp_o_f64_e64 s15, v[26:27], v[80:81]
-; GFX10-NEXT: v_cndmask_b32_e64 v33, 0, v32, s11
-; GFX10-NEXT: v_cndmask_b32_e64 v112, v71, v25, s4
-; GFX10-NEXT: v_cmp_gt_f64_e64 s16, v[28:29], v[84:85]
-; GFX10-NEXT: v_cmp_o_f64_e64 s8, v[28:29], v[84:85]
-; GFX10-NEXT: v_cndmask_b32_e32 v21, v34, v21, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e32 v20, v33, v20, vcc_lo
-; GFX10-NEXT: v_cmp_class_f64_e64 vcc_lo, v[22:23], 64
-; GFX10-NEXT: v_cmp_gt_f64_e64 s17, v[30:31], v[86:87]
-; GFX10-NEXT: v_cmp_class_f64_e64 s5, v[70:71], 64
-; GFX10-NEXT: v_cndmask_b32_e64 v18, v18, v82, s13
-; GFX10-NEXT: v_cndmask_b32_e64 v82, v70, v24, s4
-; GFX10-NEXT: v_cndmask_b32_e64 v19, v19, v83, s13
-; GFX10-NEXT: v_cndmask_b32_e64 v83, 0x7ff80000, v112, s6
-; GFX10-NEXT: v_cmp_class_f64_e64 s4, v[68:69], 64
-; GFX10-NEXT: v_cmp_eq_f64_e64 s9, 0, v[96:97]
-; GFX10-NEXT: v_cndmask_b32_e64 v82, 0, v82, s6
-; GFX10-NEXT: v_cndmask_b32_e64 v37, v81, v27, s7
-; GFX10-NEXT: v_cndmask_b32_e64 v32, v80, v26, s7
-; GFX10-NEXT: v_cmp_class_f64_e64 s6, v[80:81], 64
-; GFX10-NEXT: v_cmp_class_f64_e64 s7, v[84:85], 64
-; GFX10-NEXT: v_cmp_eq_f64_e64 s10, 0, v[98:99]
-; GFX10-NEXT: v_cndmask_b32_e64 v113, 0x7ff80000, v37, s15
-; GFX10-NEXT: v_cndmask_b32_e64 v112, 0, v32, s15
-; GFX10-NEXT: v_cmp_eq_f64_e64 s11, 0, v[100:101]
-; GFX10-NEXT: v_cndmask_b32_e64 v115, v85, v29, s16
-; GFX10-NEXT: v_cndmask_b32_e64 v114, v84, v28, s16
-; GFX10-NEXT: v_cmp_eq_f64_e64 s12, 0, v[102:103]
-; GFX10-NEXT: v_cmp_eq_f64_e64 s13, 0, v[54:55]
-; GFX10-NEXT: v_cndmask_b32_e32 v22, v35, v22, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e32 v23, v36, v23, vcc_lo
-; GFX10-NEXT: v_cmp_class_f64_e64 vcc_lo, v[24:25], 64
-; GFX10-NEXT: v_cndmask_b32_e64 v115, 0x7ff80000, v115, s8
-; GFX10-NEXT: v_cndmask_b32_e64 v114, 0, v114, s8
-; GFX10-NEXT: v_cndmask_b32_e64 v116, v87, v31, s17
-; GFX10-NEXT: v_cndmask_b32_e64 v32, v86, v30, s17
-; GFX10-NEXT: v_cmp_class_f64_e64 s8, v[86:87], 64
-; GFX10-NEXT: v_cmp_eq_f64_e64 s14, 0, v[64:65]
-; GFX10-NEXT: v_cmp_eq_f64_e64 s15, 0, v[50:51]
-; GFX10-NEXT: v_cndmask_b32_e64 v117, 0x7ff80000, v116, s18
-; GFX10-NEXT: v_cndmask_b32_e64 v116, 0, v32, s18
-; GFX10-NEXT: v_cmp_eq_f64_e64 s16, 0, v[52:53]
-; GFX10-NEXT: v_cmp_eq_f64_e64 s17, 0, v[48:49]
-; GFX10-NEXT: v_cmp_eq_f64_e64 s18, 0, v[38:39]
-; GFX10-NEXT: v_cmp_eq_f64_e64 s19, 0, v[33:34]
-; GFX10-NEXT: v_cmp_eq_f64_e64 s20, 0, v[35:36]
-; GFX10-NEXT: v_cmp_eq_f64_e64 s21, 0, v[82:83]
-; GFX10-NEXT: v_cmp_eq_f64_e64 s22, 0, v[112:113]
-; GFX10-NEXT: v_cmp_eq_f64_e64 s23, 0, v[114:115]
-; GFX10-NEXT: v_cmp_eq_f64_e64 s24, 0, v[116:117]
-; GFX10-NEXT: v_cndmask_b32_e64 v22, v22, v68, s4
-; GFX10-NEXT: v_cndmask_b32_e64 v23, v23, v69, s4
-; GFX10-NEXT: v_cndmask_b32_e64 v0, v96, v0, s9
-; GFX10-NEXT: v_cndmask_b32_e32 v24, v82, v24, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e32 v25, v83, v25, vcc_lo
-; GFX10-NEXT: v_cmp_class_f64_e64 vcc_lo, v[26:27], 64
-; GFX10-NEXT: v_cndmask_b32_e64 v2, v98, v2, s10
-; GFX10-NEXT: v_cndmask_b32_e64 v4, v100, v4, s11
-; GFX10-NEXT: v_cndmask_b32_e64 v24, v24, v70, s5
-; GFX10-NEXT: v_cndmask_b32_e64 v25, v25, v71, s5
-; GFX10-NEXT: v_cndmask_b32_e64 v6, v102, v6, s12
-; GFX10-NEXT: v_cndmask_b32_e64 v8, v54, v8, s13
-; GFX10-NEXT: v_cndmask_b32_e64 v10, v64, v10, s14
-; GFX10-NEXT: v_cndmask_b32_e64 v12, v50, v12, s15
-; GFX10-NEXT: v_cndmask_b32_e64 v14, v52, v14, s16
-; GFX10-NEXT: v_cndmask_b32_e64 v16, v48, v16, s17
-; GFX10-NEXT: v_cndmask_b32_e64 v18, v38, v18, s18
-; GFX10-NEXT: v_cndmask_b32_e64 v22, v35, v22, s20
-; GFX10-NEXT: v_cndmask_b32_e64 v24, v82, v24, s21
-; GFX10-NEXT: v_cndmask_b32_e64 v1, v97, v1, s9
-; GFX10-NEXT: v_cndmask_b32_e64 v3, v99, v3, s10
-; GFX10-NEXT: v_cndmask_b32_e64 v5, v101, v5, s11
-; GFX10-NEXT: v_cndmask_b32_e64 v7, v103, v7, s12
-; GFX10-NEXT: v_cndmask_b32_e64 v9, v55, v9, s13
-; GFX10-NEXT: v_cndmask_b32_e64 v11, v65, v11, s14
-; GFX10-NEXT: v_cndmask_b32_e64 v13, v51, v13, s15
-; GFX10-NEXT: v_cndmask_b32_e64 v15, v53, v15, s16
-; GFX10-NEXT: v_cndmask_b32_e32 v26, v112, v26, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e32 v27, v113, v27, vcc_lo
-; GFX10-NEXT: v_cmp_class_f64_e64 vcc_lo, v[28:29], 64
-; GFX10-NEXT: v_cndmask_b32_e64 v17, v49, v17, s17
-; GFX10-NEXT: v_cndmask_b32_e64 v19, v39, v19, s18
-; GFX10-NEXT: v_cndmask_b32_e64 v26, v26, v80, s6
-; GFX10-NEXT: v_cndmask_b32_e64 v27, v27, v81, s6
-; GFX10-NEXT: v_cndmask_b32_e64 v23, v36, v23, s20
-; GFX10-NEXT: v_cndmask_b32_e64 v25, v83, v25, s21
-; GFX10-NEXT: v_cndmask_b32_e64 v26, v112, v26, s22
-; GFX10-NEXT: v_cndmask_b32_e64 v27, v113, v27, s22
-; GFX10-NEXT: v_cndmask_b32_e32 v28, v114, v28, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e32 v29, v115, v29, vcc_lo
-; GFX10-NEXT: v_cmp_class_f64_e64 vcc_lo, v[30:31], 64
-; GFX10-NEXT: v_cndmask_b32_e64 v28, v28, v84, s7
-; GFX10-NEXT: v_cndmask_b32_e64 v29, v29, v85, s7
-; GFX10-NEXT: v_cndmask_b32_e64 v28, v114, v28, s23
-; GFX10-NEXT: v_cndmask_b32_e64 v29, v115, v29, s23
-; GFX10-NEXT: v_cndmask_b32_e32 v30, v116, v30, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e32 v31, v117, v31, vcc_lo
-; GFX10-NEXT: v_cmp_class_f64_e64 vcc_lo, v[66:67], 64
-; GFX10-NEXT: v_cndmask_b32_e64 v30, v30, v86, s8
-; GFX10-NEXT: v_cndmask_b32_e64 v31, v31, v87, s8
-; GFX10-NEXT: v_cndmask_b32_e64 v30, v116, v30, s24
-; GFX10-NEXT: v_cndmask_b32_e64 v31, v117, v31, s24
-; GFX10-NEXT: v_cndmask_b32_e32 v20, v20, v66, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e32 v21, v21, v67, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v20, v33, v20, s19
-; GFX10-NEXT: v_cndmask_b32_e64 v21, v34, v21, s19
+; GFX10-NEXT: v_max_f64 v[80:81], v[30:31], v[6:7]
+; GFX10-NEXT: v_cmp_u_f64_e64 s18, v[30:31], v[6:7]
+; GFX10-NEXT: v_cndmask_b32_e64 v3, v83, 0x7ff80000, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e64 v4, v84, 0, s4
+; GFX10-NEXT: v_cndmask_b32_e64 v5, v85, 0x7ff80000, s4
+; GFX10-NEXT: v_cndmask_b32_e64 v6, v32, 0, s5
+; GFX10-NEXT: v_cndmask_b32_e64 v7, v33, 0x7ff80000, s5
+; GFX10-NEXT: v_cndmask_b32_e64 v28, v70, 0, s17
+; GFX10-NEXT: v_cndmask_b32_e64 v29, v71, 0x7ff80000, s17
+; GFX10-NEXT: v_cndmask_b32_e64 v26, v66, 0, s16
+; GFX10-NEXT: v_cndmask_b32_e64 v27, v67, 0x7ff80000, s16
+; GFX10-NEXT: v_cndmask_b32_e64 v30, v80, 0, s18
+; GFX10-NEXT: v_cndmask_b32_e64 v31, v81, 0x7ff80000, s18
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_maximum_v16f64:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_clause 0x1f
-; GFX11-NEXT: scratch_load_b32 v87, off, s32 offset:8
-; GFX11-NEXT: scratch_load_b32 v86, off, s32 offset:4
-; GFX11-NEXT: scratch_load_b32 v85, off, s32 offset:16
-; GFX11-NEXT: scratch_load_b32 v84, off, s32 offset:12
-; GFX11-NEXT: scratch_load_b32 v33, off, s32 offset:24
-; GFX11-NEXT: scratch_load_b32 v32, off, s32 offset:20
-; GFX11-NEXT: scratch_load_b32 v35, off, s32 offset:32
-; GFX11-NEXT: scratch_load_b32 v34, off, s32 offset:28
-; GFX11-NEXT: scratch_load_b32 v37, off, s32 offset:40
-; GFX11-NEXT: scratch_load_b32 v36, off, s32 offset:36
-; GFX11-NEXT: scratch_load_b32 v39, off, s32 offset:48
-; GFX11-NEXT: scratch_load_b32 v38, off, s32 offset:44
-; GFX11-NEXT: scratch_load_b32 v49, off, s32 offset:56
-; GFX11-NEXT: scratch_load_b32 v48, off, s32 offset:52
-; GFX11-NEXT: scratch_load_b32 v51, off, s32 offset:64
-; GFX11-NEXT: scratch_load_b32 v50, off, s32 offset:60
-; GFX11-NEXT: scratch_load_b32 v53, off, s32 offset:72
-; GFX11-NEXT: scratch_load_b32 v52, off, s32 offset:68
-; GFX11-NEXT: scratch_load_b32 v55, off, s32 offset:80
-; GFX11-NEXT: scratch_load_b32 v54, off, s32 offset:76
-; GFX11-NEXT: scratch_load_b32 v65, off, s32 offset:88
-; GFX11-NEXT: scratch_load_b32 v64, off, s32 offset:84
-; GFX11-NEXT: scratch_load_b32 v67, off, s32 offset:96
-; GFX11-NEXT: scratch_load_b32 v66, off, s32 offset:92
-; GFX11-NEXT: scratch_load_b32 v69, off, s32 offset:104
-; GFX11-NEXT: scratch_load_b32 v68, off, s32 offset:100
-; GFX11-NEXT: scratch_load_b32 v71, off, s32 offset:112
-; GFX11-NEXT: scratch_load_b32 v70, off, s32 offset:108
-; GFX11-NEXT: scratch_load_b32 v81, off, s32 offset:120
-; GFX11-NEXT: scratch_load_b32 v80, off, s32 offset:116
; GFX11-NEXT: scratch_load_b32 v31, off, s32
-; GFX11-NEXT: scratch_load_b32 v83, off, s32 offset:128
-; GFX11-NEXT: scratch_load_b32 v82, off, s32 offset:124
-; GFX11-NEXT: s_waitcnt vmcnt(31)
-; GFX11-NEXT: v_cmp_gt_f64_e64 s9, v[0:1], v[86:87]
-; GFX11-NEXT: v_cmp_o_f64_e64 s11, v[0:1], v[86:87]
-; GFX11-NEXT: s_waitcnt vmcnt(29)
-; GFX11-NEXT: v_cmp_gt_f64_e64 s10, v[2:3], v[84:85]
-; GFX11-NEXT: v_cmp_class_f64_e64 s14, v[86:87], 64
-; GFX11-NEXT: s_waitcnt vmcnt(27)
-; GFX11-NEXT: v_cmp_gt_f64_e64 s0, v[4:5], v[32:33]
-; GFX11-NEXT: v_cmp_o_f64_e32 vcc_lo, v[4:5], v[32:33]
-; GFX11-NEXT: s_waitcnt vmcnt(25)
-; GFX11-NEXT: v_cmp_gt_f64_e64 s2, v[6:7], v[34:35]
-; GFX11-NEXT: v_cmp_o_f64_e64 s12, v[2:3], v[84:85]
-; GFX11-NEXT: v_cmp_o_f64_e64 s1, v[6:7], v[34:35]
-; GFX11-NEXT: s_waitcnt vmcnt(23)
-; GFX11-NEXT: v_cmp_gt_f64_e64 s4, v[8:9], v[36:37]
-; GFX11-NEXT: v_cmp_o_f64_e64 s3, v[8:9], v[36:37]
-; GFX11-NEXT: v_cmp_class_f64_e64 s16, v[84:85], 64
-; GFX11-NEXT: s_waitcnt vmcnt(21)
-; GFX11-NEXT: v_cmp_gt_f64_e64 s6, v[10:11], v[38:39]
-; GFX11-NEXT: v_cmp_o_f64_e64 s5, v[10:11], v[38:39]
-; GFX11-NEXT: s_waitcnt vmcnt(19)
-; GFX11-NEXT: v_cmp_gt_f64_e64 s8, v[12:13], v[48:49]
-; GFX11-NEXT: v_cmp_o_f64_e64 s7, v[12:13], v[48:49]
-; GFX11-NEXT: s_waitcnt vmcnt(17)
-; GFX11-NEXT: v_cmp_gt_f64_e64 s13, v[14:15], v[50:51]
-; GFX11-NEXT: s_waitcnt vmcnt(15)
-; GFX11-NEXT: v_cmp_o_f64_e64 s15, v[16:17], v[52:53]
-; GFX11-NEXT: s_waitcnt vmcnt(13)
-; GFX11-NEXT: v_cmp_gt_f64_e64 s17, v[18:19], v[54:55]
-; GFX11-NEXT: v_cmp_o_f64_e64 s18, v[18:19], v[54:55]
-; GFX11-NEXT: s_waitcnt vmcnt(11)
-; GFX11-NEXT: v_cmp_gt_f64_e64 s19, v[20:21], v[64:65]
-; GFX11-NEXT: v_cmp_o_f64_e64 s20, v[20:21], v[64:65]
-; GFX11-NEXT: s_waitcnt vmcnt(9)
-; GFX11-NEXT: v_cmp_gt_f64_e64 s21, v[22:23], v[66:67]
-; GFX11-NEXT: v_cmp_o_f64_e64 s22, v[22:23], v[66:67]
-; GFX11-NEXT: s_waitcnt vmcnt(7)
-; GFX11-NEXT: v_cmp_gt_f64_e64 s23, v[24:25], v[68:69]
-; GFX11-NEXT: v_cmp_o_f64_e64 s24, v[24:25], v[68:69]
-; GFX11-NEXT: s_waitcnt vmcnt(5)
-; GFX11-NEXT: v_cmp_gt_f64_e64 s25, v[26:27], v[70:71]
-; GFX11-NEXT: v_cmp_o_f64_e64 s26, v[26:27], v[70:71]
-; GFX11-NEXT: s_waitcnt vmcnt(3)
-; GFX11-NEXT: v_cmp_gt_f64_e64 s27, v[28:29], v[80:81]
-; GFX11-NEXT: v_cmp_o_f64_e64 s28, v[28:29], v[80:81]
+; GFX11-NEXT: scratch_load_b32 v33, off, s32 offset:8
+; GFX11-NEXT: scratch_load_b32 v32, off, s32 offset:4
+; GFX11-NEXT: scratch_load_b32 v35, off, s32 offset:16
+; GFX11-NEXT: scratch_load_b32 v34, off, s32 offset:12
+; GFX11-NEXT: scratch_load_b32 v37, off, s32 offset:24
+; GFX11-NEXT: scratch_load_b32 v36, off, s32 offset:20
+; GFX11-NEXT: scratch_load_b32 v39, off, s32 offset:32
+; GFX11-NEXT: scratch_load_b32 v38, off, s32 offset:28
+; GFX11-NEXT: scratch_load_b32 v49, off, s32 offset:40
+; GFX11-NEXT: scratch_load_b32 v48, off, s32 offset:36
+; GFX11-NEXT: scratch_load_b32 v51, off, s32 offset:48
+; GFX11-NEXT: scratch_load_b32 v50, off, s32 offset:44
+; GFX11-NEXT: scratch_load_b32 v53, off, s32 offset:56
+; GFX11-NEXT: scratch_load_b32 v52, off, s32 offset:52
+; GFX11-NEXT: scratch_load_b32 v55, off, s32 offset:64
+; GFX11-NEXT: scratch_load_b32 v54, off, s32 offset:60
+; GFX11-NEXT: scratch_load_b32 v65, off, s32 offset:72
+; GFX11-NEXT: scratch_load_b32 v64, off, s32 offset:68
+; GFX11-NEXT: scratch_load_b32 v67, off, s32 offset:80
+; GFX11-NEXT: scratch_load_b32 v66, off, s32 offset:76
+; GFX11-NEXT: scratch_load_b32 v69, off, s32 offset:88
+; GFX11-NEXT: scratch_load_b32 v68, off, s32 offset:84
+; GFX11-NEXT: scratch_load_b32 v71, off, s32 offset:96
+; GFX11-NEXT: scratch_load_b32 v70, off, s32 offset:92
+; GFX11-NEXT: scratch_load_b32 v81, off, s32 offset:104
+; GFX11-NEXT: scratch_load_b32 v80, off, s32 offset:100
+; GFX11-NEXT: scratch_load_b32 v83, off, s32 offset:112
+; GFX11-NEXT: scratch_load_b32 v82, off, s32 offset:108
+; GFX11-NEXT: scratch_load_b32 v85, off, s32 offset:120
+; GFX11-NEXT: scratch_load_b32 v84, off, s32 offset:116
+; GFX11-NEXT: scratch_load_b32 v87, off, s32 offset:128
+; GFX11-NEXT: scratch_load_b32 v86, off, s32 offset:124
+; GFX11-NEXT: s_waitcnt vmcnt(30)
+; GFX11-NEXT: v_max_f64 v[96:97], v[0:1], v[32:33]
+; GFX11-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[32:33]
+; GFX11-NEXT: s_waitcnt vmcnt(28)
+; GFX11-NEXT: v_max_f64 v[32:33], v[2:3], v[34:35]
+; GFX11-NEXT: v_cmp_u_f64_e64 s0, v[2:3], v[34:35]
+; GFX11-NEXT: s_waitcnt vmcnt(26)
+; GFX11-NEXT: v_max_f64 v[34:35], v[4:5], v[36:37]
+; GFX11-NEXT: v_cmp_u_f64_e64 s1, v[4:5], v[36:37]
+; GFX11-NEXT: s_waitcnt vmcnt(24)
+; GFX11-NEXT: v_max_f64 v[36:37], v[6:7], v[38:39]
+; GFX11-NEXT: v_cmp_u_f64_e64 s2, v[6:7], v[38:39]
+; GFX11-NEXT: s_waitcnt vmcnt(22)
+; GFX11-NEXT: v_max_f64 v[38:39], v[8:9], v[48:49]
+; GFX11-NEXT: v_cmp_u_f64_e64 s3, v[8:9], v[48:49]
+; GFX11-NEXT: s_waitcnt vmcnt(20)
+; GFX11-NEXT: v_max_f64 v[48:49], v[10:11], v[50:51]
+; GFX11-NEXT: v_cmp_u_f64_e64 s4, v[10:11], v[50:51]
+; GFX11-NEXT: s_waitcnt vmcnt(18)
+; GFX11-NEXT: v_max_f64 v[50:51], v[12:13], v[52:53]
+; GFX11-NEXT: v_cmp_u_f64_e64 s5, v[12:13], v[52:53]
+; GFX11-NEXT: s_waitcnt vmcnt(16)
+; GFX11-NEXT: v_max_f64 v[52:53], v[14:15], v[54:55]
+; GFX11-NEXT: v_cmp_u_f64_e64 s6, v[14:15], v[54:55]
+; GFX11-NEXT: s_waitcnt vmcnt(14)
+; GFX11-NEXT: v_max_f64 v[54:55], v[16:17], v[64:65]
+; GFX11-NEXT: v_cmp_u_f64_e64 s7, v[16:17], v[64:65]
+; GFX11-NEXT: s_waitcnt vmcnt(12)
+; GFX11-NEXT: v_max_f64 v[64:65], v[18:19], v[66:67]
+; GFX11-NEXT: v_cmp_u_f64_e64 s8, v[18:19], v[66:67]
+; GFX11-NEXT: s_waitcnt vmcnt(10)
+; GFX11-NEXT: v_max_f64 v[66:67], v[20:21], v[68:69]
+; GFX11-NEXT: v_cmp_u_f64_e64 s9, v[20:21], v[68:69]
+; GFX11-NEXT: s_waitcnt vmcnt(8)
+; GFX11-NEXT: v_max_f64 v[68:69], v[22:23], v[70:71]
+; GFX11-NEXT: v_cmp_u_f64_e64 s10, v[22:23], v[70:71]
+; GFX11-NEXT: s_waitcnt vmcnt(6)
+; GFX11-NEXT: v_max_f64 v[70:71], v[24:25], v[80:81]
+; GFX11-NEXT: v_cmp_u_f64_e64 s11, v[24:25], v[80:81]
+; GFX11-NEXT: s_waitcnt vmcnt(4)
+; GFX11-NEXT: v_max_f64 v[80:81], v[26:27], v[82:83]
+; GFX11-NEXT: v_cmp_u_f64_e64 s12, v[26:27], v[82:83]
+; GFX11-NEXT: s_waitcnt vmcnt(2)
+; GFX11-NEXT: v_max_f64 v[82:83], v[28:29], v[84:85]
+; GFX11-NEXT: v_cmp_u_f64_e64 s13, v[28:29], v[84:85]
; GFX11-NEXT: s_waitcnt vmcnt(0)
-; GFX11-NEXT: v_cmp_gt_f64_e64 s29, v[30:31], v[82:83]
-; GFX11-NEXT: v_cmp_o_f64_e64 vcc_hi, v[30:31], v[82:83]
-; GFX11-NEXT: v_cndmask_b32_e64 v96, v87, v1, s9
-; GFX11-NEXT: v_cndmask_b32_e64 v101, v86, v0, s9
-; GFX11-NEXT: v_cndmask_b32_e64 v98, v85, v3, s10
-; GFX11-NEXT: v_cndmask_b32_e64 v103, v84, v2, s10
-; GFX11-NEXT: v_cmp_class_f64_e64 s10, v[0:1], 64
-; GFX11-NEXT: v_cndmask_b32_e64 v97, 0x7ff80000, v96, s11
-; GFX11-NEXT: v_cndmask_b32_e64 v96, 0, v101, s11
-; GFX11-NEXT: v_cndmask_b32_e64 v100, v33, v5, s0
-; GFX11-NEXT: v_cndmask_b32_e64 v102, v35, v7, s2
-; GFX11-NEXT: v_cndmask_b32_e64 v99, 0x7ff80000, v98, s12
-; GFX11-NEXT: v_cndmask_b32_e64 v98, 0, v103, s12
-; GFX11-NEXT: v_cmp_class_f64_e64 s11, v[2:3], 64
-; GFX11-NEXT: v_cndmask_b32_e32 v101, 0x7ff80000, v100, vcc_lo
-; GFX11-NEXT: v_cndmask_b32_e64 v103, 0x7ff80000, v102, s1
-; GFX11-NEXT: v_cndmask_b32_e64 v112, v37, v9, s4
-; GFX11-NEXT: v_cndmask_b32_e64 v114, v39, v11, s6
-; GFX11-NEXT: v_cndmask_b32_e64 v116, v49, v13, s8
-; GFX11-NEXT: v_cmp_o_f64_e64 s9, v[14:15], v[50:51]
-; GFX11-NEXT: v_cndmask_b32_e64 v118, v51, v15, s13
-; GFX11-NEXT: v_cndmask_b32_e64 v113, 0x7ff80000, v112, s3
-; GFX11-NEXT: v_cndmask_b32_e64 v115, 0x7ff80000, v114, s5
-; GFX11-NEXT: v_cndmask_b32_e64 v117, 0x7ff80000, v116, s7
-; GFX11-NEXT: v_cmp_gt_f64_e64 s12, v[16:17], v[52:53]
-; GFX11-NEXT: v_cndmask_b32_e64 v130, v55, v19, s17
-; GFX11-NEXT: v_cndmask_b32_e64 v132, v65, v21, s19
-; GFX11-NEXT: v_cndmask_b32_e64 v134, v67, v23, s21
-; GFX11-NEXT: v_cndmask_b32_e64 v144, v69, v25, s23
-; GFX11-NEXT: v_cndmask_b32_e64 v145, v71, v27, s25
-; GFX11-NEXT: v_cndmask_b32_e64 v131, 0x7ff80000, v130, s18
-; GFX11-NEXT: v_cndmask_b32_e64 v133, 0x7ff80000, v132, s20
-; GFX11-NEXT: v_cndmask_b32_e64 v135, 0x7ff80000, v134, s22
-; GFX11-NEXT: v_cndmask_b32_e64 v146, v81, v29, s27
-; GFX11-NEXT: v_cndmask_b32_e64 v148, v80, v28, s27
-; GFX11-NEXT: v_cndmask_b32_e64 v147, v83, v31, s29
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_3)
-; GFX11-NEXT: v_cndmask_b32_e64 v147, 0x7ff80000, v147, vcc_hi
-; GFX11-NEXT: v_cndmask_b32_e64 v0, v96, v0, s10
-; GFX11-NEXT: v_cndmask_b32_e64 v1, v97, v1, s10
-; GFX11-NEXT: v_cmp_class_f64_e64 s10, v[36:37], 64
-; GFX11-NEXT: v_cndmask_b32_e64 v0, v0, v86, s14
-; GFX11-NEXT: v_cndmask_b32_e64 v86, v32, v4, s0
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4)
-; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, v87, s14
-; GFX11-NEXT: v_cndmask_b32_e64 v87, v34, v6, s2
-; GFX11-NEXT: v_cndmask_b32_e64 v2, v98, v2, s11
-; GFX11-NEXT: v_cndmask_b32_e64 v3, v99, v3, s11
-; GFX11-NEXT: v_cndmask_b32_e32 v100, 0, v86, vcc_lo
-; GFX11-NEXT: v_cmp_class_f64_e64 vcc_lo, v[4:5], 64
-; GFX11-NEXT: v_cndmask_b32_e64 v102, 0, v87, s1
-; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, v84, s16
-; GFX11-NEXT: v_cndmask_b32_e64 v84, v36, v8, s4
-; GFX11-NEXT: v_cndmask_b32_e64 v86, v38, v10, s6
-; GFX11-NEXT: v_cndmask_b32_e64 v87, v48, v12, s8
-; GFX11-NEXT: v_cndmask_b32_e64 v119, 0x7ff80000, v118, s9
-; GFX11-NEXT: v_cndmask_b32_e64 v128, v53, v17, s12
-; GFX11-NEXT: v_cndmask_b32_e64 v112, 0, v84, s3
-; GFX11-NEXT: v_cndmask_b32_e64 v114, 0, v86, s5
-; GFX11-NEXT: v_cndmask_b32_e64 v116, 0, v87, s7
-; GFX11-NEXT: v_cndmask_b32_e64 v84, v50, v14, s13
-; GFX11-NEXT: v_cndmask_b32_e64 v129, 0x7ff80000, v128, s15
-; GFX11-NEXT: v_cndmask_b32_e64 v86, v52, v16, s12
-; GFX11-NEXT: v_cndmask_b32_e64 v87, v54, v18, s17
-; GFX11-NEXT: v_cndmask_b32_e64 v3, v3, v85, s16
-; GFX11-NEXT: v_cndmask_b32_e64 v118, 0, v84, s9
-; GFX11-NEXT: v_cndmask_b32_e64 v84, v64, v20, s19
-; GFX11-NEXT: v_cndmask_b32_e64 v128, 0, v86, s15
-; GFX11-NEXT: v_cndmask_b32_e64 v130, 0, v87, s18
-; GFX11-NEXT: v_cndmask_b32_e64 v86, v66, v22, s21
-; GFX11-NEXT: v_cndmask_b32_e64 v85, 0x7ff80000, v144, s24
-; GFX11-NEXT: v_cndmask_b32_e64 v132, 0, v84, s20
-; GFX11-NEXT: v_cndmask_b32_e64 v87, v68, v24, s23
-; GFX11-NEXT: v_cndmask_b32_e64 v144, v70, v26, s25
-; GFX11-NEXT: v_cndmask_b32_e64 v134, 0, v86, s22
-; GFX11-NEXT: v_cmp_class_f64_e64 s0, v[68:69], 64
-; GFX11-NEXT: v_cmp_class_f64_e64 s1, v[70:71], 64
-; GFX11-NEXT: v_cndmask_b32_e64 v84, 0, v87, s24
-; GFX11-NEXT: v_cndmask_b32_e64 v87, 0x7ff80000, v145, s26
-; GFX11-NEXT: v_cndmask_b32_e64 v86, 0, v144, s26
-; GFX11-NEXT: v_cndmask_b32_e64 v145, 0x7ff80000, v146, s28
-; GFX11-NEXT: v_cndmask_b32_e64 v144, 0, v148, s28
-; GFX11-NEXT: v_cndmask_b32_e64 v146, v82, v30, s29
-; GFX11-NEXT: v_cmp_class_f64_e64 s2, v[80:81], 64
-; GFX11-NEXT: v_cmp_class_f64_e64 s3, v[82:83], 64
-; GFX11-NEXT: v_cmp_class_f64_e64 s6, v[32:33], 64
-; GFX11-NEXT: v_cmp_class_f64_e64 s8, v[34:35], 64
-; GFX11-NEXT: v_dual_cndmask_b32 v5, v101, v5 :: v_dual_cndmask_b32 v4, v100, v4
-; GFX11-NEXT: v_cmp_class_f64_e64 vcc_lo, v[6:7], 64
-; GFX11-NEXT: v_cndmask_b32_e64 v146, 0, v146, vcc_hi
-; GFX11-NEXT: v_cmp_class_f64_e64 s12, v[38:39], 64
-; GFX11-NEXT: v_cmp_class_f64_e64 s14, v[48:49], 64
-; GFX11-NEXT: v_cmp_class_f64_e64 s16, v[50:51], 64
-; GFX11-NEXT: v_cmp_class_f64_e64 s18, v[52:53], 64
-; GFX11-NEXT: v_cmp_class_f64_e64 s20, v[54:55], 64
-; GFX11-NEXT: v_cmp_class_f64_e64 s21, v[64:65], 64
-; GFX11-NEXT: v_cmp_eq_f64_e64 s4, 0, v[96:97]
-; GFX11-NEXT: v_cmp_eq_f64_e64 s5, 0, v[98:99]
-; GFX11-NEXT: v_cmp_eq_f64_e64 s7, 0, v[100:101]
-; GFX11-NEXT: v_cmp_eq_f64_e64 s9, 0, v[102:103]
-; GFX11-NEXT: v_cmp_eq_f64_e64 s11, 0, v[112:113]
-; GFX11-NEXT: v_cmp_eq_f64_e64 s13, 0, v[114:115]
-; GFX11-NEXT: v_cmp_eq_f64_e64 s15, 0, v[116:117]
-; GFX11-NEXT: v_cmp_eq_f64_e64 s17, 0, v[118:119]
-; GFX11-NEXT: v_cmp_eq_f64_e64 s19, 0, v[128:129]
-; GFX11-NEXT: v_cmp_eq_f64_e64 s22, 0, v[130:131]
-; GFX11-NEXT: v_cmp_eq_f64_e64 s23, 0, v[132:133]
-; GFX11-NEXT: v_cmp_eq_f64_e64 s24, 0, v[134:135]
-; GFX11-NEXT: v_cmp_eq_f64_e64 s25, 0, v[84:85]
-; GFX11-NEXT: v_cmp_eq_f64_e64 s26, 0, v[86:87]
-; GFX11-NEXT: v_cmp_eq_f64_e64 s27, 0, v[144:145]
-; GFX11-NEXT: v_cmp_eq_f64_e64 s28, 0, v[146:147]
-; GFX11-NEXT: v_cndmask_b32_e64 v5, v5, v33, s6
-; GFX11-NEXT: v_dual_cndmask_b32 v7, v103, v7 :: v_dual_cndmask_b32 v6, v102, v6
-; GFX11-NEXT: v_cmp_class_f64_e64 vcc_lo, v[8:9], 64
-; GFX11-NEXT: v_cndmask_b32_e64 v4, v4, v32, s6
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_4)
-; GFX11-NEXT: v_cndmask_b32_e64 v7, v7, v35, s8
-; GFX11-NEXT: v_cndmask_b32_e64 v0, v96, v0, s4
-; GFX11-NEXT: v_cndmask_b32_e64 v2, v98, v2, s5
-; GFX11-NEXT: v_cndmask_b32_e64 v4, v100, v4, s7
-; GFX11-NEXT: v_cndmask_b32_e64 v1, v97, v1, s4
-; GFX11-NEXT: v_cndmask_b32_e64 v3, v99, v3, s5
-; GFX11-NEXT: v_cndmask_b32_e64 v5, v101, v5, s7
-; GFX11-NEXT: v_cndmask_b32_e64 v7, v103, v7, s9
-; GFX11-NEXT: v_dual_cndmask_b32 v9, v113, v9 :: v_dual_cndmask_b32 v8, v112, v8
-; GFX11-NEXT: v_cmp_class_f64_e64 vcc_lo, v[10:11], 64
-; GFX11-NEXT: v_cndmask_b32_e64 v6, v6, v34, s8
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NEXT: v_cndmask_b32_e64 v9, v9, v37, s10
-; GFX11-NEXT: v_cndmask_b32_e64 v6, v102, v6, s9
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(VALU_DEP_3)
-; GFX11-NEXT: v_cndmask_b32_e64 v9, v113, v9, s11
-; GFX11-NEXT: v_dual_cndmask_b32 v11, v115, v11 :: v_dual_cndmask_b32 v10, v114, v10
-; GFX11-NEXT: v_cmp_class_f64_e64 vcc_lo, v[12:13], 64
-; GFX11-NEXT: v_cndmask_b32_e64 v8, v8, v36, s10
-; GFX11-NEXT: v_cndmask_b32_e64 v11, v11, v39, s12
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NEXT: v_cndmask_b32_e64 v8, v112, v8, s11
-; GFX11-NEXT: v_cndmask_b32_e64 v11, v115, v11, s13
-; GFX11-NEXT: v_dual_cndmask_b32 v13, v117, v13 :: v_dual_cndmask_b32 v12, v116, v12
-; GFX11-NEXT: v_cmp_class_f64_e64 vcc_lo, v[14:15], 64
-; GFX11-NEXT: v_cndmask_b32_e64 v10, v10, v38, s12
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NEXT: v_cndmask_b32_e64 v13, v13, v49, s14
-; GFX11-NEXT: v_cndmask_b32_e64 v10, v114, v10, s13
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(VALU_DEP_3)
-; GFX11-NEXT: v_cndmask_b32_e64 v13, v117, v13, s15
-; GFX11-NEXT: v_dual_cndmask_b32 v15, v119, v15 :: v_dual_cndmask_b32 v14, v118, v14
-; GFX11-NEXT: v_cmp_class_f64_e64 vcc_lo, v[16:17], 64
-; GFX11-NEXT: v_cndmask_b32_e64 v12, v12, v48, s14
-; GFX11-NEXT: v_cndmask_b32_e64 v15, v15, v51, s16
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NEXT: v_cndmask_b32_e64 v12, v116, v12, s15
-; GFX11-NEXT: v_cndmask_b32_e64 v15, v119, v15, s17
-; GFX11-NEXT: v_dual_cndmask_b32 v17, v129, v17 :: v_dual_cndmask_b32 v16, v128, v16
-; GFX11-NEXT: v_cmp_class_f64_e64 vcc_lo, v[18:19], 64
-; GFX11-NEXT: v_cndmask_b32_e64 v14, v14, v50, s16
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NEXT: v_cndmask_b32_e64 v17, v17, v53, s18
-; GFX11-NEXT: v_cndmask_b32_e64 v14, v118, v14, s17
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(VALU_DEP_3)
-; GFX11-NEXT: v_cndmask_b32_e64 v17, v129, v17, s19
-; GFX11-NEXT: v_dual_cndmask_b32 v19, v131, v19 :: v_dual_cndmask_b32 v18, v130, v18
-; GFX11-NEXT: v_cmp_class_f64_e64 vcc_lo, v[20:21], 64
-; GFX11-NEXT: v_cndmask_b32_e64 v16, v16, v52, s18
-; GFX11-NEXT: v_cndmask_b32_e64 v19, v19, v55, s20
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NEXT: v_cndmask_b32_e64 v16, v128, v16, s19
-; GFX11-NEXT: v_cndmask_b32_e64 v19, v131, v19, s22
-; GFX11-NEXT: v_dual_cndmask_b32 v21, v133, v21 :: v_dual_cndmask_b32 v20, v132, v20
-; GFX11-NEXT: v_cmp_class_f64_e64 vcc_lo, v[22:23], 64
-; GFX11-NEXT: v_cndmask_b32_e64 v18, v18, v54, s20
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NEXT: v_cndmask_b32_e64 v21, v21, v65, s21
-; GFX11-NEXT: v_cndmask_b32_e64 v18, v130, v18, s22
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(VALU_DEP_1)
-; GFX11-NEXT: v_cndmask_b32_e64 v21, v133, v21, s23
-; GFX11-NEXT: v_dual_cndmask_b32 v23, v135, v23 :: v_dual_cndmask_b32 v22, v134, v22
-; GFX11-NEXT: v_cmp_class_f64_e64 vcc_lo, v[24:25], 64
-; GFX11-NEXT: v_cndmask_b32_e64 v20, v20, v64, s21
-; GFX11-NEXT: v_cndmask_b32_e64 v20, v132, v20, s23
-; GFX11-NEXT: v_dual_cndmask_b32 v25, v85, v25 :: v_dual_cndmask_b32 v24, v84, v24
-; GFX11-NEXT: v_cmp_class_f64_e64 vcc_lo, v[26:27], 64
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT: v_cndmask_b32_e64 v25, v25, v69, s0
-; GFX11-NEXT: v_cndmask_b32_e64 v25, v85, v25, s25
-; GFX11-NEXT: v_dual_cndmask_b32 v27, v87, v27 :: v_dual_cndmask_b32 v26, v86, v26
-; GFX11-NEXT: v_cmp_class_f64_e64 vcc_lo, v[28:29], 64
-; GFX11-NEXT: v_cndmask_b32_e64 v24, v24, v68, s0
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NEXT: v_cndmask_b32_e64 v27, v27, v71, s1
-; GFX11-NEXT: v_cndmask_b32_e64 v24, v84, v24, s25
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(VALU_DEP_3)
-; GFX11-NEXT: v_cndmask_b32_e64 v27, v87, v27, s26
-; GFX11-NEXT: v_dual_cndmask_b32 v29, v145, v29 :: v_dual_cndmask_b32 v28, v144, v28
-; GFX11-NEXT: v_cmp_class_f64_e64 vcc_lo, v[30:31], 64
-; GFX11-NEXT: v_cndmask_b32_e64 v26, v26, v70, s1
-; GFX11-NEXT: v_cndmask_b32_e64 v29, v29, v81, s2
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NEXT: v_cndmask_b32_e64 v26, v86, v26, s26
-; GFX11-NEXT: v_cndmask_b32_e64 v29, v145, v29, s27
-; GFX11-NEXT: v_dual_cndmask_b32 v31, v147, v31 :: v_dual_cndmask_b32 v30, v146, v30
-; GFX11-NEXT: v_cmp_class_f64_e64 vcc_lo, v[66:67], 64
-; GFX11-NEXT: v_cndmask_b32_e64 v28, v28, v80, s2
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NEXT: v_cndmask_b32_e64 v31, v31, v83, s3
-; GFX11-NEXT: v_cndmask_b32_e64 v28, v144, v28, s27
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_2)
-; GFX11-NEXT: v_cndmask_b32_e64 v31, v147, v31, s28
-; GFX11-NEXT: v_dual_cndmask_b32 v23, v23, v67 :: v_dual_cndmask_b32 v22, v22, v66
-; GFX11-NEXT: v_cndmask_b32_e64 v30, v30, v82, s3
-; GFX11-NEXT: v_cndmask_b32_e64 v23, v135, v23, s24
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NEXT: v_cndmask_b32_e64 v22, v134, v22, s24
-; GFX11-NEXT: v_cndmask_b32_e64 v30, v146, v30, s28
+; GFX11-NEXT: v_max_f64 v[84:85], v[30:31], v[86:87]
+; GFX11-NEXT: v_cmp_u_f64_e64 s14, v[30:31], v[86:87]
+; GFX11-NEXT: v_cndmask_b32_e64 v0, v96, 0, vcc_lo
+; GFX11-NEXT: v_cndmask_b32_e64 v1, v97, 0x7ff80000, vcc_lo
+; GFX11-NEXT: v_cndmask_b32_e64 v2, v32, 0, s0
+; GFX11-NEXT: v_cndmask_b32_e64 v3, v33, 0x7ff80000, s0
+; GFX11-NEXT: v_cndmask_b32_e64 v4, v34, 0, s1
+; GFX11-NEXT: v_cndmask_b32_e64 v5, v35, 0x7ff80000, s1
+; GFX11-NEXT: v_cndmask_b32_e64 v6, v36, 0, s2
+; GFX11-NEXT: v_cndmask_b32_e64 v7, v37, 0x7ff80000, s2
+; GFX11-NEXT: v_cndmask_b32_e64 v8, v38, 0, s3
+; GFX11-NEXT: v_cndmask_b32_e64 v9, v39, 0x7ff80000, s3
+; GFX11-NEXT: v_cndmask_b32_e64 v10, v48, 0, s4
+; GFX11-NEXT: v_cndmask_b32_e64 v11, v49, 0x7ff80000, s4
+; GFX11-NEXT: v_cndmask_b32_e64 v12, v50, 0, s5
+; GFX11-NEXT: v_cndmask_b32_e64 v13, v51, 0x7ff80000, s5
+; GFX11-NEXT: v_cndmask_b32_e64 v14, v52, 0, s6
+; GFX11-NEXT: v_cndmask_b32_e64 v15, v53, 0x7ff80000, s6
+; GFX11-NEXT: v_cndmask_b32_e64 v16, v54, 0, s7
+; GFX11-NEXT: v_cndmask_b32_e64 v17, v55, 0x7ff80000, s7
+; GFX11-NEXT: v_cndmask_b32_e64 v18, v64, 0, s8
+; GFX11-NEXT: v_cndmask_b32_e64 v19, v65, 0x7ff80000, s8
+; GFX11-NEXT: v_cndmask_b32_e64 v20, v66, 0, s9
+; GFX11-NEXT: v_cndmask_b32_e64 v21, v67, 0x7ff80000, s9
+; GFX11-NEXT: v_cndmask_b32_e64 v22, v68, 0, s10
+; GFX11-NEXT: v_cndmask_b32_e64 v23, v69, 0x7ff80000, s10
+; GFX11-NEXT: v_cndmask_b32_e64 v24, v70, 0, s11
+; GFX11-NEXT: v_cndmask_b32_e64 v25, v71, 0x7ff80000, s11
+; GFX11-NEXT: v_cndmask_b32_e64 v26, v80, 0, s12
+; GFX11-NEXT: v_cndmask_b32_e64 v27, v81, 0x7ff80000, s12
+; GFX11-NEXT: v_cndmask_b32_e64 v28, v82, 0, s13
+; GFX11-NEXT: v_cndmask_b32_e64 v29, v83, 0x7ff80000, s13
+; GFX11-NEXT: v_cndmask_b32_e64 v30, v84, 0, s14
+; GFX11-NEXT: v_cndmask_b32_e64 v31, v85, 0x7ff80000, s14
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_maximum_v16f64:
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.minimum.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.minimum.f16.ll
index 95d351e8f1fa..e00ebff751c7 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.minimum.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.minimum.f16.ll
@@ -443,28 +443,14 @@ define <2 x half> @v_minimum_v2f16(<2 x half> %src0, <2 x half> %src1) {
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_lshrrev_b32_e32 v2, 16, v1
; GFX8-NEXT: v_lshrrev_b32_e32 v3, 16, v0
-; GFX8-NEXT: v_cmp_lt_f16_e32 vcc, v3, v2
-; GFX8-NEXT: v_cndmask_b32_e32 v4, v2, v3, vcc
+; GFX8-NEXT: v_min_f16_e32 v4, v3, v2
; GFX8-NEXT: v_mov_b32_e32 v5, 0x7e00
; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v3, v2
-; GFX8-NEXT: v_cndmask_b32_e32 v4, v5, v4, vcc
-; GFX8-NEXT: v_cmp_class_f16_e64 vcc, v3, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc
-; GFX8-NEXT: v_cmp_class_f16_e64 vcc, v2, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
-; GFX8-NEXT: v_cmp_eq_f16_e32 vcc, 0, v4
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
-; GFX8-NEXT: v_cmp_lt_f16_e32 vcc, v0, v1
-; GFX8-NEXT: v_cndmask_b32_e32 v3, v1, v0, vcc
+; GFX8-NEXT: v_cndmask_b32_e32 v2, v5, v4, vcc
+; GFX8-NEXT: v_min_f16_e32 v3, v0, v1
; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
-; GFX8-NEXT: v_cndmask_b32_e32 v3, v5, v3, vcc
-; GFX8-NEXT: v_cmp_class_f16_e64 vcc, v0, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc
-; GFX8-NEXT: v_cmp_class_f16_e64 vcc, v1, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
-; GFX8-NEXT: v_cmp_eq_f16_e32 vcc, 0, v3
; GFX8-NEXT: v_lshlrev_b32_e32 v2, 16, v2
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc
+; GFX8-NEXT: v_cndmask_b32_e32 v0, v5, v3, vcc
; GFX8-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
@@ -543,26 +529,9 @@ define <2 x half> @v_minimum_v2f16__nnan(<2 x half> %src0, <2 x half> %src1) {
; GFX8-LABEL: v_minimum_v2f16__nnan:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_lshrrev_b32_e32 v2, 16, v1
-; GFX8-NEXT: v_lshrrev_b32_e32 v3, 16, v0
-; GFX8-NEXT: v_cmp_lt_f16_e32 vcc, v3, v2
-; GFX8-NEXT: v_cndmask_b32_e32 v4, v2, v3, vcc
-; GFX8-NEXT: v_cmp_class_f16_e64 vcc, v3, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc
-; GFX8-NEXT: v_cmp_class_f16_e64 vcc, v2, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
-; GFX8-NEXT: v_cmp_eq_f16_e32 vcc, 0, v4
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
-; GFX8-NEXT: v_cmp_lt_f16_e32 vcc, v0, v1
-; GFX8-NEXT: v_cndmask_b32_e32 v3, v1, v0, vcc
-; GFX8-NEXT: v_cmp_class_f16_e64 vcc, v0, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc
-; GFX8-NEXT: v_cmp_class_f16_e64 vcc, v1, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
-; GFX8-NEXT: v_cmp_eq_f16_e32 vcc, 0, v3
-; GFX8-NEXT: v_lshlrev_b32_e32 v2, 16, v2
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc
-; GFX8-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX8-NEXT: v_min_f16_sdwa v2, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-NEXT: v_min_f16_e32 v0, v0, v1
+; GFX8-NEXT: v_or_b32_e32 v0, v0, v2
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_minimum_v2f16__nnan:
@@ -608,13 +577,11 @@ define <2 x half> @v_minimum_v2f16__nsz(<2 x half> %src0, <2 x half> %src1) {
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_lshrrev_b32_e32 v2, 16, v1
; GFX8-NEXT: v_lshrrev_b32_e32 v3, 16, v0
-; GFX8-NEXT: v_cmp_lt_f16_e32 vcc, v3, v2
-; GFX8-NEXT: v_cndmask_b32_e32 v4, v2, v3, vcc
+; GFX8-NEXT: v_min_f16_e32 v4, v3, v2
; GFX8-NEXT: v_mov_b32_e32 v5, 0x7e00
; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v3, v2
; GFX8-NEXT: v_cndmask_b32_e32 v2, v5, v4, vcc
-; GFX8-NEXT: v_cmp_lt_f16_e32 vcc, v0, v1
-; GFX8-NEXT: v_cndmask_b32_e32 v3, v1, v0, vcc
+; GFX8-NEXT: v_min_f16_e32 v3, v0, v1
; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
; GFX8-NEXT: v_lshlrev_b32_e32 v2, 16, v2
; GFX8-NEXT: v_cndmask_b32_e32 v0, v5, v3, vcc
@@ -696,14 +663,9 @@ define <2 x half> @v_minimum_v2f16__nnan_nsz(<2 x half> %src0, <2 x half> %src1)
; GFX8-LABEL: v_minimum_v2f16__nnan_nsz:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_lshrrev_b32_e32 v2, 16, v1
-; GFX8-NEXT: v_lshrrev_b32_e32 v3, 16, v0
-; GFX8-NEXT: v_cmp_lt_f16_e32 vcc, v3, v2
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
-; GFX8-NEXT: v_cmp_lt_f16_e32 vcc, v0, v1
-; GFX8-NEXT: v_lshlrev_b32_e32 v2, 16, v2
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
-; GFX8-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX8-NEXT: v_min_f16_sdwa v2, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-NEXT: v_min_f16_e32 v0, v0, v1
+; GFX8-NEXT: v_or_b32_e32 v0, v0, v2
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_minimum_v2f16__nnan_nsz:
@@ -750,31 +712,15 @@ define void @s_minimum_v2f16(<2 x half> inreg %src0, <2 x half> inreg %src1) {
; GFX8-NEXT: s_lshr_b32 s6, s5, 16
; GFX8-NEXT: s_lshr_b32 s7, s4, 16
; GFX8-NEXT: v_mov_b32_e32 v0, s6
-; GFX8-NEXT: v_mov_b32_e32 v1, s7
-; GFX8-NEXT: v_cmp_lt_f16_e32 vcc, s7, v0
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v0, v1, vcc
-; GFX8-NEXT: v_mov_b32_e32 v3, 0x7e00
+; GFX8-NEXT: v_min_f16_e32 v1, s7, v0
+; GFX8-NEXT: v_mov_b32_e32 v2, 0x7e00
; GFX8-NEXT: v_cmp_o_f16_e32 vcc, s7, v0
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
-; GFX8-NEXT: v_cmp_class_f16_e64 vcc, s7, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
-; GFX8-NEXT: v_cmp_class_f16_e64 vcc, s6, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
-; GFX8-NEXT: v_cmp_eq_f16_e32 vcc, 0, v2
+; GFX8-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
; GFX8-NEXT: v_mov_b32_e32 v1, s5
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
-; GFX8-NEXT: v_mov_b32_e32 v2, s4
-; GFX8-NEXT: v_cmp_lt_f16_e32 vcc, s4, v1
-; GFX8-NEXT: v_cndmask_b32_e32 v4, v1, v2, vcc
+; GFX8-NEXT: v_min_f16_e32 v3, s4, v1
; GFX8-NEXT: v_cmp_o_f16_e32 vcc, s4, v1
-; GFX8-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc
-; GFX8-NEXT: v_cmp_class_f16_e64 vcc, s4, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
-; GFX8-NEXT: v_cmp_class_f16_e64 vcc, s5, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
-; GFX8-NEXT: v_cmp_eq_f16_e32 vcc, 0, v3
; GFX8-NEXT: v_lshlrev_b32_e32 v0, 16, v0
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
+; GFX8-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc
; GFX8-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: ;;#ASMSTART
; GFX8-NEXT: ; use v0
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.minimum.f32.ll b/llvm/test/CodeGen/AMDGPU/llvm.minimum.f32.ll
index 1da2647fbd60..e056682051aa 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.minimum.f32.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.minimum.f32.ll
@@ -495,167 +495,73 @@ define <2 x float> @v_minimum_v2f32(<2 x float> %src0, <2 x float> %src1) {
; GFX7-LABEL: v_minimum_v2f32:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_min_legacy_f32_e32 v4, v0, v2
+; GFX7-NEXT: v_min_f32_e32 v4, v0, v2
; GFX7-NEXT: v_mov_b32_e32 v5, 0x7fc00000
; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
-; GFX7-NEXT: v_cndmask_b32_e32 v4, v5, v4, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v0, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v2, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
-; GFX7-NEXT: v_min_legacy_f32_e32 v2, v1, v3
+; GFX7-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc
+; GFX7-NEXT: v_min_f32_e32 v2, v1, v3
; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v1, v3
-; GFX7-NEXT: v_cndmask_b32_e32 v2, v5, v2, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v1, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v3, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v2
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
+; GFX7-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_minimum_v2f32:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
-; GFX8-NEXT: v_cndmask_b32_e32 v4, v2, v0, vcc
+; GFX8-NEXT: v_min_f32_e32 v4, v0, v2
; GFX8-NEXT: v_mov_b32_e32 v5, 0x7fc00000
; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
-; GFX8-NEXT: v_cndmask_b32_e32 v4, v5, v4, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v0, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v2, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
-; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v1, v3
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v3, v1, vcc
+; GFX8-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc
+; GFX8-NEXT: v_min_f32_e32 v2, v1, v3
; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v1, v3
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v5, v2, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v1, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v3, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v2
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
+; GFX8-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_minimum_v2f32:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v2, v0, vcc
+; GFX9-NEXT: v_min_f32_e32 v4, v0, v2
; GFX9-NEXT: v_mov_b32_e32 v5, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v5, v4, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v0, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v2, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v1, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v3, v1, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc
+; GFX9-NEXT: v_min_f32_e32 v2, v1, v3
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v5, v2, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v1, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v3, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v2
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX940-LABEL: v_minimum_v2f32:
; GFX940: ; %bb.0:
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
+; GFX940-NEXT: v_min_f32_e32 v4, v0, v2
; GFX940-NEXT: v_mov_b32_e32 v5, 0x7fc00000
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v2, v0, vcc
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v5, v4, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v0, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v2, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
-; GFX940-NEXT: v_cmp_lt_f32_e32 vcc, v1, v3
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v3, v1, vcc
+; GFX940-NEXT: v_min_f32_e32 v2, v1, v3
+; GFX940-NEXT: s_nop 0
+; GFX940-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v3
; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v5, v2, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v1, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v3, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v2
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc
; GFX940-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimum_v2f32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_cmp_lt_f32_e32 vcc_lo, v0, v2
-; GFX10-NEXT: v_cndmask_b32_e32 v4, v2, v0, vcc_lo
-; GFX10-NEXT: v_cmp_lt_f32_e32 vcc_lo, v1, v3
-; GFX10-NEXT: v_cndmask_b32_e32 v5, v3, v1, vcc_lo
+; GFX10-NEXT: v_min_f32_e32 v4, v0, v2
; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v0, v2
-; GFX10-NEXT: v_cndmask_b32_e32 v4, 0x7fc00000, v4, vcc_lo
+; GFX10-NEXT: v_min_f32_e32 v5, v1, v3
+; GFX10-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v4, vcc_lo
; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v1, v3
-; GFX10-NEXT: v_cndmask_b32_e32 v5, 0x7fc00000, v5, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v0, 32
-; GFX10-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v1, 32
-; GFX10-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v2, 32
-; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v3, 32
-; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc_lo
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v4
-; GFX10-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc_lo
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v5
-; GFX10-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x7fc00000, v5, vcc_lo
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_minimum_v2f32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_cmp_lt_f32_e32 vcc_lo, v0, v2
-; GFX11-NEXT: v_cndmask_b32_e32 v4, v2, v0, vcc_lo
-; GFX11-NEXT: v_cmp_lt_f32_e32 vcc_lo, v1, v3
-; GFX11-NEXT: v_cndmask_b32_e32 v5, v3, v1, vcc_lo
+; GFX11-NEXT: v_dual_min_f32 v4, v0, v2 :: v_dual_min_f32 v5, v1, v3
; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v0, v2
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4)
-; GFX11-NEXT: v_cndmask_b32_e32 v4, 0x7fc00000, v4, vcc_lo
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_4)
+; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v4, vcc_lo
; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v1, v3
-; GFX11-NEXT: v_cndmask_b32_e32 v5, 0x7fc00000, v5, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v0, 32
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4)
-; GFX11-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v1, 32
-; GFX11-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v2, 32
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4)
-; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v3, 32
-; GFX11-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc_lo
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v4
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4)
-; GFX11-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc_lo
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v5
-; GFX11-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc_lo
+; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7fc00000, v5, vcc_lo
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_minimum_v2f32:
@@ -676,136 +582,42 @@ define <2 x float> @v_minimum_v2f32__nnan(<2 x float> %src0, <2 x float> %src1)
; GFX7-LABEL: v_minimum_v2f32__nnan:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_min_legacy_f32_e32 v4, v0, v2
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v0, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v2, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
-; GFX7-NEXT: v_min_legacy_f32_e32 v2, v1, v3
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v1, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v3, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v2
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
+; GFX7-NEXT: v_min_f32_e32 v0, v0, v2
+; GFX7-NEXT: v_min_f32_e32 v1, v1, v3
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_minimum_v2f32__nnan:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
-; GFX8-NEXT: v_cndmask_b32_e32 v4, v2, v0, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v0, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v2, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
-; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v1, v3
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v3, v1, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v1, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v3, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v2
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
+; GFX8-NEXT: v_min_f32_e32 v0, v0, v2
+; GFX8-NEXT: v_min_f32_e32 v1, v1, v3
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_minimum_v2f32__nnan:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v2, v0, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v0, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v2, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v1, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v3, v1, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v1, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v3, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v2
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
+; GFX9-NEXT: v_min_f32_e32 v0, v0, v2
+; GFX9-NEXT: v_min_f32_e32 v1, v1, v3
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX940-LABEL: v_minimum_v2f32__nnan:
; GFX940: ; %bb.0:
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v2, v0, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v0, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v2, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
-; GFX940-NEXT: v_cmp_lt_f32_e32 vcc, v1, v3
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v3, v1, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v1, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v3, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v2
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
+; GFX940-NEXT: v_min_f32_e32 v0, v0, v2
+; GFX940-NEXT: v_min_f32_e32 v1, v1, v3
; GFX940-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimum_v2f32__nnan:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_cmp_lt_f32_e32 vcc_lo, v0, v2
-; GFX10-NEXT: v_cndmask_b32_e32 v4, v2, v0, vcc_lo
-; GFX10-NEXT: v_cmp_lt_f32_e32 vcc_lo, v1, v3
-; GFX10-NEXT: v_cndmask_b32_e32 v5, v3, v1, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v0, 32
-; GFX10-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v1, 32
-; GFX10-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v2, 32
-; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v3, 32
-; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc_lo
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v4
-; GFX10-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc_lo
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v5
-; GFX10-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc_lo
+; GFX10-NEXT: v_min_f32_e32 v0, v0, v2
+; GFX10-NEXT: v_min_f32_e32 v1, v1, v3
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_minimum_v2f32__nnan:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_cmp_lt_f32_e32 vcc_lo, v0, v2
-; GFX11-NEXT: v_cndmask_b32_e32 v4, v2, v0, vcc_lo
-; GFX11-NEXT: v_cmp_lt_f32_e32 vcc_lo, v1, v3
-; GFX11-NEXT: v_cndmask_b32_e32 v5, v3, v1, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v0, 32
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4)
-; GFX11-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v1, 32
-; GFX11-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v2, 32
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4)
-; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v3, 32
-; GFX11-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc_lo
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v4
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4)
-; GFX11-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc_lo
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v5
-; GFX11-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc_lo
+; GFX11-NEXT: v_dual_min_f32 v0, v0, v2 :: v_dual_min_f32 v1, v1, v3
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_minimum_v2f32__nnan:
@@ -826,11 +638,11 @@ define <2 x float> @v_minimum_v2f32__nsz(<2 x float> %src0, <2 x float> %src1) {
; GFX7-LABEL: v_minimum_v2f32__nsz:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_min_legacy_f32_e32 v4, v0, v2
+; GFX7-NEXT: v_min_f32_e32 v4, v0, v2
; GFX7-NEXT: v_mov_b32_e32 v5, 0x7fc00000
; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
; GFX7-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc
-; GFX7-NEXT: v_min_legacy_f32_e32 v2, v1, v3
+; GFX7-NEXT: v_min_f32_e32 v2, v1, v3
; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v1, v3
; GFX7-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc
; GFX7-NEXT: s_setpc_b64 s[30:31]
@@ -838,13 +650,11 @@ define <2 x float> @v_minimum_v2f32__nsz(<2 x float> %src0, <2 x float> %src1) {
; GFX8-LABEL: v_minimum_v2f32__nsz:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
-; GFX8-NEXT: v_cndmask_b32_e32 v4, v2, v0, vcc
+; GFX8-NEXT: v_min_f32_e32 v4, v0, v2
; GFX8-NEXT: v_mov_b32_e32 v5, 0x7fc00000
; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
; GFX8-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc
-; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v1, v3
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v3, v1, vcc
+; GFX8-NEXT: v_min_f32_e32 v2, v1, v3
; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v1, v3
; GFX8-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc
; GFX8-NEXT: s_setpc_b64 s[30:31]
@@ -852,13 +662,11 @@ define <2 x float> @v_minimum_v2f32__nsz(<2 x float> %src0, <2 x float> %src1) {
; GFX9-LABEL: v_minimum_v2f32__nsz:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v2, v0, vcc
+; GFX9-NEXT: v_min_f32_e32 v4, v0, v2
; GFX9-NEXT: v_mov_b32_e32 v5, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
; GFX9-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v1, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v3, v1, vcc
+; GFX9-NEXT: v_min_f32_e32 v2, v1, v3
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v3
; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
@@ -866,16 +674,12 @@ define <2 x float> @v_minimum_v2f32__nsz(<2 x float> %src0, <2 x float> %src1) {
; GFX940-LABEL: v_minimum_v2f32__nsz:
; GFX940: ; %bb.0:
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
+; GFX940-NEXT: v_min_f32_e32 v4, v0, v2
; GFX940-NEXT: v_mov_b32_e32 v5, 0x7fc00000
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v2, v0, vcc
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
-; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_min_f32_e32 v2, v1, v3
+; GFX940-NEXT: s_nop 0
; GFX940-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc
-; GFX940-NEXT: v_cmp_lt_f32_e32 vcc, v1, v3
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v3, v1, vcc
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v3
; GFX940-NEXT: s_nop 1
; GFX940-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc
@@ -884,11 +688,9 @@ define <2 x float> @v_minimum_v2f32__nsz(<2 x float> %src0, <2 x float> %src1) {
; GFX10-LABEL: v_minimum_v2f32__nsz:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_cmp_lt_f32_e32 vcc_lo, v0, v2
-; GFX10-NEXT: v_cndmask_b32_e32 v4, v2, v0, vcc_lo
-; GFX10-NEXT: v_cmp_lt_f32_e32 vcc_lo, v1, v3
-; GFX10-NEXT: v_cndmask_b32_e32 v5, v3, v1, vcc_lo
+; GFX10-NEXT: v_min_f32_e32 v4, v0, v2
; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v0, v2
+; GFX10-NEXT: v_min_f32_e32 v5, v1, v3
; GFX10-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v4, vcc_lo
; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v1, v3
; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x7fc00000, v5, vcc_lo
@@ -897,12 +699,9 @@ define <2 x float> @v_minimum_v2f32__nsz(<2 x float> %src0, <2 x float> %src1) {
; GFX11-LABEL: v_minimum_v2f32__nsz:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_cmp_lt_f32_e32 vcc_lo, v0, v2
-; GFX11-NEXT: v_cndmask_b32_e32 v4, v2, v0, vcc_lo
-; GFX11-NEXT: v_cmp_lt_f32_e32 vcc_lo, v1, v3
-; GFX11-NEXT: v_cndmask_b32_e32 v5, v3, v1, vcc_lo
+; GFX11-NEXT: v_dual_min_f32 v4, v0, v2 :: v_dual_min_f32 v5, v1, v3
; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v0, v2
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4)
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_4)
; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v4, vcc_lo
; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v1, v3
; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7fc00000, v5, vcc_lo
@@ -926,55 +725,42 @@ define <2 x float> @v_minimum_v2f32__nnan_nsz(<2 x float> %src0, <2 x float> %sr
; GFX7-LABEL: v_minimum_v2f32__nnan_nsz:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_min_legacy_f32_e32 v0, v0, v2
-; GFX7-NEXT: v_min_legacy_f32_e32 v1, v1, v3
+; GFX7-NEXT: v_min_f32_e32 v0, v0, v2
+; GFX7-NEXT: v_min_f32_e32 v1, v1, v3
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_minimum_v2f32__nnan_nsz:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
-; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v1, v3
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
+; GFX8-NEXT: v_min_f32_e32 v0, v0, v2
+; GFX8-NEXT: v_min_f32_e32 v1, v1, v3
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_minimum_v2f32__nnan_nsz:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v1, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
+; GFX9-NEXT: v_min_f32_e32 v0, v0, v2
+; GFX9-NEXT: v_min_f32_e32 v1, v1, v3
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX940-LABEL: v_minimum_v2f32__nnan_nsz:
; GFX940: ; %bb.0:
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
-; GFX940-NEXT: v_cmp_lt_f32_e32 vcc, v1, v3
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
+; GFX940-NEXT: v_min_f32_e32 v0, v0, v2
+; GFX940-NEXT: v_min_f32_e32 v1, v1, v3
; GFX940-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimum_v2f32__nnan_nsz:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_cmp_lt_f32_e32 vcc_lo, v0, v2
-; GFX10-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo
-; GFX10-NEXT: v_cmp_lt_f32_e32 vcc_lo, v1, v3
-; GFX10-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc_lo
+; GFX10-NEXT: v_min_f32_e32 v0, v0, v2
+; GFX10-NEXT: v_min_f32_e32 v1, v1, v3
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_minimum_v2f32__nnan_nsz:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_cmp_lt_f32_e32 vcc_lo, v0, v2
-; GFX11-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo
-; GFX11-NEXT: v_cmp_lt_f32_e32 vcc_lo, v1, v3
-; GFX11-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc_lo
+; GFX11-NEXT: v_dual_min_f32 v0, v0, v2 :: v_dual_min_f32 v1, v1, v3
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_minimum_v2f32__nnan_nsz:
@@ -996,28 +782,14 @@ define void @s_minimum_v2f32(<2 x float> inreg %src0, <2 x float> inreg %src1) {
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7-NEXT: v_mov_b32_e32 v0, s7
-; GFX7-NEXT: v_min_legacy_f32_e32 v1, s5, v0
+; GFX7-NEXT: v_min_f32_e32 v1, s5, v0
; GFX7-NEXT: v_mov_b32_e32 v2, 0x7fc00000
; GFX7-NEXT: v_cmp_o_f32_e32 vcc, s5, v0
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
-; GFX7-NEXT: v_mov_b32_e32 v3, s5
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, s5, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v3, v1, v3, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, s7, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v1
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v1, v0, vcc
; GFX7-NEXT: v_mov_b32_e32 v0, s6
-; GFX7-NEXT: v_min_legacy_f32_e32 v3, s4, v0
+; GFX7-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
+; GFX7-NEXT: v_min_f32_e32 v3, s4, v0
; GFX7-NEXT: v_cmp_o_f32_e32 vcc, s4, v0
-; GFX7-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
-; GFX7-NEXT: v_mov_b32_e32 v3, s4
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, s4, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v3, v2, v3, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, s6, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v2
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
+; GFX7-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc
; GFX7-NEXT: ;;#ASMSTART
; GFX7-NEXT: ; use v[0:1]
; GFX7-NEXT: ;;#ASMEND
@@ -1027,30 +799,14 @@ define void @s_minimum_v2f32(<2 x float> inreg %src0, <2 x float> inreg %src1) {
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_mov_b32_e32 v0, s7
-; GFX8-NEXT: v_mov_b32_e32 v1, s5
-; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, s5, v0
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v0, v1, vcc
-; GFX8-NEXT: v_mov_b32_e32 v3, 0x7fc00000
+; GFX8-NEXT: v_min_f32_e32 v1, s5, v0
+; GFX8-NEXT: v_mov_b32_e32 v2, 0x7fc00000
; GFX8-NEXT: v_cmp_o_f32_e32 vcc, s5, v0
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, s5, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, s7, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v2
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v2, v0, vcc
; GFX8-NEXT: v_mov_b32_e32 v0, s6
-; GFX8-NEXT: v_mov_b32_e32 v2, s4
-; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, s4, v0
-; GFX8-NEXT: v_cndmask_b32_e32 v4, v0, v2, vcc
+; GFX8-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
+; GFX8-NEXT: v_min_f32_e32 v3, s4, v0
; GFX8-NEXT: v_cmp_o_f32_e32 vcc, s4, v0
-; GFX8-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, s4, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, s6, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc
+; GFX8-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc
; GFX8-NEXT: ;;#ASMSTART
; GFX8-NEXT: ; use v[0:1]
; GFX8-NEXT: ;;#ASMEND
@@ -1060,30 +816,14 @@ define void @s_minimum_v2f32(<2 x float> inreg %src0, <2 x float> inreg %src1) {
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v0, s7
-; GFX9-NEXT: v_mov_b32_e32 v1, s5
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, s5, v0
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v0, v1, vcc
-; GFX9-NEXT: v_mov_b32_e32 v3, 0x7fc00000
+; GFX9-NEXT: v_min_f32_e32 v1, s5, v0
+; GFX9-NEXT: v_mov_b32_e32 v2, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, s5, v0
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, s5, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, s7, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v2
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v2, v0, vcc
; GFX9-NEXT: v_mov_b32_e32 v0, s6
-; GFX9-NEXT: v_mov_b32_e32 v2, s4
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, s4, v0
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v0, v2, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
+; GFX9-NEXT: v_min_f32_e32 v3, s4, v0
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, s4, v0
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, s4, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, s6, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; use v[0:1]
; GFX9-NEXT: ;;#ASMEND
@@ -1093,40 +833,15 @@ define void @s_minimum_v2f32(<2 x float> inreg %src0, <2 x float> inreg %src1) {
; GFX940: ; %bb.0:
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX940-NEXT: v_mov_b32_e32 v0, s3
-; GFX940-NEXT: v_mov_b32_e32 v1, s1
-; GFX940-NEXT: v_cmp_lt_f32_e32 vcc, s1, v0
-; GFX940-NEXT: v_mov_b32_e32 v3, 0x7fc00000
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v0, v1, vcc
+; GFX940-NEXT: v_min_f32_e32 v1, s1, v0
+; GFX940-NEXT: v_mov_b32_e32 v2, 0x7fc00000
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, s1, v0
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, s1, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, s3, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v2
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v2, v0, vcc
; GFX940-NEXT: v_mov_b32_e32 v0, s2
-; GFX940-NEXT: v_mov_b32_e32 v2, s0
-; GFX940-NEXT: v_cmp_lt_f32_e32 vcc, s0, v0
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v0, v2, vcc
+; GFX940-NEXT: v_min_f32_e32 v3, s0, v0
+; GFX940-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, s0, v0
; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, s0, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, s2, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc
; GFX940-NEXT: ;;#ASMSTART
; GFX940-NEXT: ; use v[0:1]
; GFX940-NEXT: ;;#ASMEND
@@ -1135,28 +850,12 @@ define void @s_minimum_v2f32(<2 x float> inreg %src0, <2 x float> inreg %src1) {
; GFX10-LABEL: s_minimum_v2f32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_mov_b32_e32 v0, s5
-; GFX10-NEXT: v_cmp_lt_f32_e64 vcc_lo, s5, s7
-; GFX10-NEXT: v_mov_b32_e32 v1, s4
-; GFX10-NEXT: v_cmp_class_f32_e64 s8, s5, 32
-; GFX10-NEXT: v_cndmask_b32_e32 v0, s7, v0, vcc_lo
-; GFX10-NEXT: v_cmp_lt_f32_e64 vcc_lo, s4, s6
-; GFX10-NEXT: v_cndmask_b32_e32 v1, s6, v1, vcc_lo
+; GFX10-NEXT: v_min_f32_e64 v0, s5, s7
; GFX10-NEXT: v_cmp_o_f32_e64 vcc_lo, s5, s7
-; GFX10-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v0, vcc_lo
+; GFX10-NEXT: v_min_f32_e64 v2, s4, s6
+; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x7fc00000, v0, vcc_lo
; GFX10-NEXT: v_cmp_o_f32_e64 vcc_lo, s4, s6
-; GFX10-NEXT: v_cndmask_b32_e32 v2, 0x7fc00000, v1, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v1, v0, s5, s8
-; GFX10-NEXT: v_cmp_class_f32_e64 s5, s4, 32
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v0
-; GFX10-NEXT: v_cndmask_b32_e64 v3, v2, s4, s5
-; GFX10-NEXT: v_cmp_class_f32_e64 s4, s7, 32
-; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s7, s4
-; GFX10-NEXT: v_cmp_class_f32_e64 s4, s6, 32
-; GFX10-NEXT: v_cndmask_b32_e32 v1, v0, v1, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v3, v3, s6, s4
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v2
-; GFX10-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v2, vcc_lo
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ; use v[0:1]
; GFX10-NEXT: ;;#ASMEND
@@ -1165,32 +864,13 @@ define void @s_minimum_v2f32(<2 x float> inreg %src0, <2 x float> inreg %src1) {
; GFX11-LABEL: s_minimum_v2f32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_dual_mov_b32 v0, s1 :: v_dual_mov_b32 v1, s0
-; GFX11-NEXT: v_cmp_lt_f32_e64 vcc_lo, s1, s3
-; GFX11-NEXT: v_cmp_class_f32_e64 s4, s1, 32
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_3) | instid1(VALU_DEP_4)
-; GFX11-NEXT: v_cndmask_b32_e32 v0, s3, v0, vcc_lo
-; GFX11-NEXT: v_cmp_lt_f32_e64 vcc_lo, s0, s2
-; GFX11-NEXT: v_cndmask_b32_e32 v1, s2, v1, vcc_lo
+; GFX11-NEXT: v_min_f32_e64 v0, s1, s3
; GFX11-NEXT: v_cmp_o_f32_e64 vcc_lo, s1, s3
-; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v0, vcc_lo
+; GFX11-NEXT: v_min_f32_e64 v2, s0, s2
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3)
+; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7fc00000, v0, vcc_lo
; GFX11-NEXT: v_cmp_o_f32_e64 vcc_lo, s0, s2
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NEXT: v_cndmask_b32_e32 v2, 0x7fc00000, v1, vcc_lo
-; GFX11-NEXT: v_cndmask_b32_e64 v1, v0, s1, s4
-; GFX11-NEXT: v_cmp_class_f32_e64 s1, s0, 32
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v0
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1)
-; GFX11-NEXT: v_cndmask_b32_e64 v3, v2, s0, s1
-; GFX11-NEXT: v_cmp_class_f32_e64 s0, s3, 32
-; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s3, s0
-; GFX11-NEXT: v_cmp_class_f32_e64 s0, s2, 32
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NEXT: v_cndmask_b32_e64 v3, v3, s2, s0
-; GFX11-NEXT: v_cndmask_b32_e32 v1, v0, v1, vcc_lo
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v2
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3)
-; GFX11-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc_lo
+; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v2, vcc_lo
; GFX11-NEXT: ;;#ASMSTART
; GFX11-NEXT: ; use v[0:1]
; GFX11-NEXT: ;;#ASMEND
@@ -1218,227 +898,92 @@ define <3 x float> @v_minimum_v3f32(<3 x float> %src0, <3 x float> %src1) {
; GFX7-LABEL: v_minimum_v3f32:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_min_legacy_f32_e32 v6, v0, v3
+; GFX7-NEXT: v_min_f32_e32 v6, v0, v3
; GFX7-NEXT: v_mov_b32_e32 v7, 0x7fc00000
; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v0, v3
-; GFX7-NEXT: v_cndmask_b32_e32 v6, v7, v6, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v0, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v3, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v6
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc
-; GFX7-NEXT: v_min_legacy_f32_e32 v3, v1, v4
+; GFX7-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
+; GFX7-NEXT: v_min_f32_e32 v3, v1, v4
; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v1, v4
-; GFX7-NEXT: v_cndmask_b32_e32 v3, v7, v3, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v1, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v4, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; GFX7-NEXT: v_min_legacy_f32_e32 v3, v2, v5
+; GFX7-NEXT: v_cndmask_b32_e32 v1, v7, v3, vcc
+; GFX7-NEXT: v_min_f32_e32 v3, v2, v5
; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v2, v5
-; GFX7-NEXT: v_cndmask_b32_e32 v3, v7, v3, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v2, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v5, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
-; GFX7-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
+; GFX7-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_minimum_v3f32:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v0, v3
-; GFX8-NEXT: v_cndmask_b32_e32 v6, v3, v0, vcc
+; GFX8-NEXT: v_min_f32_e32 v6, v0, v3
; GFX8-NEXT: v_mov_b32_e32 v7, 0x7fc00000
; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v0, v3
-; GFX8-NEXT: v_cndmask_b32_e32 v6, v7, v6, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v0, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v3, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v6
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc
-; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v1, v4
-; GFX8-NEXT: v_cndmask_b32_e32 v3, v4, v1, vcc
+; GFX8-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
+; GFX8-NEXT: v_min_f32_e32 v3, v1, v4
; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v1, v4
-; GFX8-NEXT: v_cndmask_b32_e32 v3, v7, v3, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v1, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v4, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v2, v5
-; GFX8-NEXT: v_cndmask_b32_e32 v3, v5, v2, vcc
+; GFX8-NEXT: v_cndmask_b32_e32 v1, v7, v3, vcc
+; GFX8-NEXT: v_min_f32_e32 v3, v2, v5
; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v2, v5
-; GFX8-NEXT: v_cndmask_b32_e32 v3, v7, v3, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v2, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v5, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
+; GFX8-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_minimum_v3f32:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v0, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v6, v3, v0, vcc
+; GFX9-NEXT: v_min_f32_e32 v6, v0, v3
; GFX9-NEXT: v_mov_b32_e32 v7, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v6, v7, v6, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v0, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v3, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v6
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v1, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v4, v1, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
+; GFX9-NEXT: v_min_f32_e32 v3, v1, v4
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v7, v3, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v1, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v4, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v2, v5
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v5, v2, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v3, vcc
+; GFX9-NEXT: v_min_f32_e32 v3, v2, v5
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v2, v5
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v7, v3, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v2, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v5, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX940-LABEL: v_minimum_v3f32:
; GFX940: ; %bb.0:
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_cmp_lt_f32_e32 vcc, v0, v3
+; GFX940-NEXT: v_min_f32_e32 v6, v0, v3
; GFX940-NEXT: v_mov_b32_e32 v7, 0x7fc00000
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v6, v3, v0, vcc
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v3
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v6, v7, v6, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v0, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v3, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v6
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc
-; GFX940-NEXT: v_cmp_lt_f32_e32 vcc, v1, v4
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v4, v1, vcc
+; GFX940-NEXT: v_min_f32_e32 v3, v1, v4
+; GFX940-NEXT: s_nop 0
+; GFX940-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v4
; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v7, v3, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v1, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v4, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; GFX940-NEXT: v_cmp_lt_f32_e32 vcc, v2, v5
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v5, v2, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v1, v7, v3, vcc
+; GFX940-NEXT: v_min_f32_e32 v3, v2, v5
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v2, v5
; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v7, v3, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v2, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v5, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc
; GFX940-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimum_v3f32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_cmp_lt_f32_e32 vcc_lo, v0, v3
-; GFX10-NEXT: v_cndmask_b32_e32 v6, v3, v0, vcc_lo
-; GFX10-NEXT: v_cmp_lt_f32_e32 vcc_lo, v1, v4
-; GFX10-NEXT: v_cndmask_b32_e32 v7, v4, v1, vcc_lo
-; GFX10-NEXT: v_cmp_lt_f32_e32 vcc_lo, v2, v5
-; GFX10-NEXT: v_cndmask_b32_e32 v8, v5, v2, vcc_lo
+; GFX10-NEXT: v_min_f32_e32 v6, v0, v3
; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v0, v3
-; GFX10-NEXT: v_cndmask_b32_e32 v6, 0x7fc00000, v6, vcc_lo
+; GFX10-NEXT: v_min_f32_e32 v7, v1, v4
+; GFX10-NEXT: v_min_f32_e32 v8, v2, v5
+; GFX10-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v6, vcc_lo
; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v1, v4
-; GFX10-NEXT: v_cndmask_b32_e32 v7, 0x7fc00000, v7, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x7fc00000, v7, vcc_lo
; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v2, v5
-; GFX10-NEXT: v_cndmask_b32_e32 v8, 0x7fc00000, v8, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v0, 32
-; GFX10-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v1, 32
-; GFX10-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v2, 32
-; GFX10-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v3, 32
-; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v4, 32
-; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v5, 32
-; GFX10-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc_lo
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v6
-; GFX10-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc_lo
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v7
-; GFX10-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc_lo
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v8
-; GFX10-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e32 v2, 0x7fc00000, v8, vcc_lo
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_minimum_v3f32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_cmp_lt_f32_e32 vcc_lo, v0, v3
-; GFX11-NEXT: v_cndmask_b32_e32 v6, v3, v0, vcc_lo
-; GFX11-NEXT: v_cmp_lt_f32_e32 vcc_lo, v1, v4
-; GFX11-NEXT: v_cndmask_b32_e32 v7, v4, v1, vcc_lo
-; GFX11-NEXT: v_cmp_lt_f32_e32 vcc_lo, v2, v5
-; GFX11-NEXT: v_cndmask_b32_e32 v8, v5, v2, vcc_lo
+; GFX11-NEXT: v_dual_min_f32 v6, v0, v3 :: v_dual_min_f32 v7, v1, v4
; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v0, v3
-; GFX11-NEXT: v_cndmask_b32_e32 v6, 0x7fc00000, v6, vcc_lo
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_4)
+; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v6, vcc_lo
; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v1, v4
-; GFX11-NEXT: v_cndmask_b32_e32 v7, 0x7fc00000, v7, vcc_lo
+; GFX11-NEXT: v_dual_min_f32 v8, v2, v5 :: v_dual_cndmask_b32 v1, 0x7fc00000, v7
; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v2, v5
-; GFX11-NEXT: v_cndmask_b32_e32 v8, 0x7fc00000, v8, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v0, 32
-; GFX11-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v1, 32
-; GFX11-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v2, 32
-; GFX11-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v3, 32
-; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v4, 32
-; GFX11-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v5, 32
-; GFX11-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc_lo
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v6
-; GFX11-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc_lo
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v7
-; GFX11-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc_lo
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v8
-; GFX11-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc_lo
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX11-NEXT: v_cndmask_b32_e32 v2, 0x7fc00000, v8, vcc_lo
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_minimum_v3f32:
@@ -1460,184 +1005,48 @@ define <3 x float> @v_minimum_v3f32__nnan(<3 x float> %src0, <3 x float> %src1)
; GFX7-LABEL: v_minimum_v3f32__nnan:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_min_legacy_f32_e32 v6, v0, v3
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v0, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v3, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v6
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc
-; GFX7-NEXT: v_min_legacy_f32_e32 v3, v1, v4
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v1, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v4, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; GFX7-NEXT: v_min_legacy_f32_e32 v3, v2, v5
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v2, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v5, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
-; GFX7-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
+; GFX7-NEXT: v_min_f32_e32 v0, v0, v3
+; GFX7-NEXT: v_min_f32_e32 v1, v1, v4
+; GFX7-NEXT: v_min_f32_e32 v2, v2, v5
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_minimum_v3f32__nnan:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v0, v3
-; GFX8-NEXT: v_cndmask_b32_e32 v6, v3, v0, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v0, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v3, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v6
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc
-; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v1, v4
-; GFX8-NEXT: v_cndmask_b32_e32 v3, v4, v1, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v1, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v4, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v2, v5
-; GFX8-NEXT: v_cndmask_b32_e32 v3, v5, v2, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v2, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v5, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
+; GFX8-NEXT: v_min_f32_e32 v0, v0, v3
+; GFX8-NEXT: v_min_f32_e32 v1, v1, v4
+; GFX8-NEXT: v_min_f32_e32 v2, v2, v5
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_minimum_v3f32__nnan:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v0, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v6, v3, v0, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v0, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v3, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v6
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v1, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v4, v1, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v1, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v4, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v2, v5
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v5, v2, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v2, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v5, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
+; GFX9-NEXT: v_min_f32_e32 v0, v0, v3
+; GFX9-NEXT: v_min_f32_e32 v1, v1, v4
+; GFX9-NEXT: v_min_f32_e32 v2, v2, v5
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX940-LABEL: v_minimum_v3f32__nnan:
; GFX940: ; %bb.0:
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_cmp_lt_f32_e32 vcc, v0, v3
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v6, v3, v0, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v0, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v3, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v6
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc
-; GFX940-NEXT: v_cmp_lt_f32_e32 vcc, v1, v4
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v4, v1, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v1, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v4, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; GFX940-NEXT: v_cmp_lt_f32_e32 vcc, v2, v5
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v5, v2, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v2, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v5, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
+; GFX940-NEXT: v_min_f32_e32 v0, v0, v3
+; GFX940-NEXT: v_min_f32_e32 v1, v1, v4
+; GFX940-NEXT: v_min_f32_e32 v2, v2, v5
; GFX940-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimum_v3f32__nnan:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_cmp_lt_f32_e32 vcc_lo, v0, v3
-; GFX10-NEXT: v_cndmask_b32_e32 v6, v3, v0, vcc_lo
-; GFX10-NEXT: v_cmp_lt_f32_e32 vcc_lo, v1, v4
-; GFX10-NEXT: v_cndmask_b32_e32 v7, v4, v1, vcc_lo
-; GFX10-NEXT: v_cmp_lt_f32_e32 vcc_lo, v2, v5
-; GFX10-NEXT: v_cndmask_b32_e32 v8, v5, v2, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v0, 32
-; GFX10-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v1, 32
-; GFX10-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v2, 32
-; GFX10-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v3, 32
-; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v4, 32
-; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v5, 32
-; GFX10-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc_lo
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v6
-; GFX10-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc_lo
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v7
-; GFX10-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc_lo
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v8
-; GFX10-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc_lo
+; GFX10-NEXT: v_min_f32_e32 v0, v0, v3
+; GFX10-NEXT: v_min_f32_e32 v1, v1, v4
+; GFX10-NEXT: v_min_f32_e32 v2, v2, v5
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_minimum_v3f32__nnan:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_cmp_lt_f32_e32 vcc_lo, v0, v3
-; GFX11-NEXT: v_cndmask_b32_e32 v6, v3, v0, vcc_lo
-; GFX11-NEXT: v_cmp_lt_f32_e32 vcc_lo, v1, v4
-; GFX11-NEXT: v_cndmask_b32_e32 v7, v4, v1, vcc_lo
-; GFX11-NEXT: v_cmp_lt_f32_e32 vcc_lo, v2, v5
-; GFX11-NEXT: v_cndmask_b32_e32 v8, v5, v2, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v0, 32
-; GFX11-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v1, 32
-; GFX11-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v2, 32
-; GFX11-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v3, 32
-; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v4, 32
-; GFX11-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v5, 32
-; GFX11-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc_lo
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v6
-; GFX11-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc_lo
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v7
-; GFX11-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc_lo
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v8
-; GFX11-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc_lo
+; GFX11-NEXT: v_dual_min_f32 v0, v0, v3 :: v_dual_min_f32 v1, v1, v4
+; GFX11-NEXT: v_min_f32_e32 v2, v2, v5
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_minimum_v3f32__nnan:
@@ -1659,14 +1068,14 @@ define <3 x float> @v_minimum_v3f32__nsz(<3 x float> %src0, <3 x float> %src1) {
; GFX7-LABEL: v_minimum_v3f32__nsz:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_min_legacy_f32_e32 v6, v0, v3
+; GFX7-NEXT: v_min_f32_e32 v6, v0, v3
; GFX7-NEXT: v_mov_b32_e32 v7, 0x7fc00000
; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v0, v3
; GFX7-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
-; GFX7-NEXT: v_min_legacy_f32_e32 v3, v1, v4
+; GFX7-NEXT: v_min_f32_e32 v3, v1, v4
; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v1, v4
; GFX7-NEXT: v_cndmask_b32_e32 v1, v7, v3, vcc
-; GFX7-NEXT: v_min_legacy_f32_e32 v3, v2, v5
+; GFX7-NEXT: v_min_f32_e32 v3, v2, v5
; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v2, v5
; GFX7-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc
; GFX7-NEXT: s_setpc_b64 s[30:31]
@@ -1674,17 +1083,14 @@ define <3 x float> @v_minimum_v3f32__nsz(<3 x float> %src0, <3 x float> %src1) {
; GFX8-LABEL: v_minimum_v3f32__nsz:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v0, v3
-; GFX8-NEXT: v_cndmask_b32_e32 v6, v3, v0, vcc
+; GFX8-NEXT: v_min_f32_e32 v6, v0, v3
; GFX8-NEXT: v_mov_b32_e32 v7, 0x7fc00000
; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v0, v3
; GFX8-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
-; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v1, v4
-; GFX8-NEXT: v_cndmask_b32_e32 v3, v4, v1, vcc
+; GFX8-NEXT: v_min_f32_e32 v3, v1, v4
; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v1, v4
; GFX8-NEXT: v_cndmask_b32_e32 v1, v7, v3, vcc
-; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v2, v5
-; GFX8-NEXT: v_cndmask_b32_e32 v3, v5, v2, vcc
+; GFX8-NEXT: v_min_f32_e32 v3, v2, v5
; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v2, v5
; GFX8-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc
; GFX8-NEXT: s_setpc_b64 s[30:31]
@@ -1692,17 +1098,14 @@ define <3 x float> @v_minimum_v3f32__nsz(<3 x float> %src0, <3 x float> %src1) {
; GFX9-LABEL: v_minimum_v3f32__nsz:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v0, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v6, v3, v0, vcc
+; GFX9-NEXT: v_min_f32_e32 v6, v0, v3
; GFX9-NEXT: v_mov_b32_e32 v7, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v3
; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v1, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v4, v1, vcc
+; GFX9-NEXT: v_min_f32_e32 v3, v1, v4
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v4
; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v3, vcc
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v2, v5
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v5, v2, vcc
+; GFX9-NEXT: v_min_f32_e32 v3, v2, v5
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v2, v5
; GFX9-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
@@ -1710,22 +1113,16 @@ define <3 x float> @v_minimum_v3f32__nsz(<3 x float> %src0, <3 x float> %src1) {
; GFX940-LABEL: v_minimum_v3f32__nsz:
; GFX940: ; %bb.0:
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_cmp_lt_f32_e32 vcc, v0, v3
+; GFX940-NEXT: v_min_f32_e32 v6, v0, v3
; GFX940-NEXT: v_mov_b32_e32 v7, 0x7fc00000
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v6, v3, v0, vcc
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v3
-; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_min_f32_e32 v3, v1, v4
+; GFX940-NEXT: s_nop 0
; GFX940-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
-; GFX940-NEXT: v_cmp_lt_f32_e32 vcc, v1, v4
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v4, v1, vcc
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v4
; GFX940-NEXT: s_nop 1
; GFX940-NEXT: v_cndmask_b32_e32 v1, v7, v3, vcc
-; GFX940-NEXT: v_cmp_lt_f32_e32 vcc, v2, v5
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v5, v2, vcc
+; GFX940-NEXT: v_min_f32_e32 v3, v2, v5
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v2, v5
; GFX940-NEXT: s_nop 1
; GFX940-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc
@@ -1734,13 +1131,10 @@ define <3 x float> @v_minimum_v3f32__nsz(<3 x float> %src0, <3 x float> %src1) {
; GFX10-LABEL: v_minimum_v3f32__nsz:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_cmp_lt_f32_e32 vcc_lo, v0, v3
-; GFX10-NEXT: v_cndmask_b32_e32 v6, v3, v0, vcc_lo
-; GFX10-NEXT: v_cmp_lt_f32_e32 vcc_lo, v1, v4
-; GFX10-NEXT: v_cndmask_b32_e32 v7, v4, v1, vcc_lo
-; GFX10-NEXT: v_cmp_lt_f32_e32 vcc_lo, v2, v5
-; GFX10-NEXT: v_cndmask_b32_e32 v8, v5, v2, vcc_lo
+; GFX10-NEXT: v_min_f32_e32 v6, v0, v3
; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v0, v3
+; GFX10-NEXT: v_min_f32_e32 v7, v1, v4
+; GFX10-NEXT: v_min_f32_e32 v8, v2, v5
; GFX10-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v6, vcc_lo
; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v1, v4
; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x7fc00000, v7, vcc_lo
@@ -1751,17 +1145,14 @@ define <3 x float> @v_minimum_v3f32__nsz(<3 x float> %src0, <3 x float> %src1) {
; GFX11-LABEL: v_minimum_v3f32__nsz:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_cmp_lt_f32_e32 vcc_lo, v0, v3
-; GFX11-NEXT: v_cndmask_b32_e32 v6, v3, v0, vcc_lo
-; GFX11-NEXT: v_cmp_lt_f32_e32 vcc_lo, v1, v4
-; GFX11-NEXT: v_cndmask_b32_e32 v7, v4, v1, vcc_lo
-; GFX11-NEXT: v_cmp_lt_f32_e32 vcc_lo, v2, v5
-; GFX11-NEXT: v_cndmask_b32_e32 v8, v5, v2, vcc_lo
+; GFX11-NEXT: v_dual_min_f32 v6, v0, v3 :: v_dual_min_f32 v7, v1, v4
; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v0, v3
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_4)
; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v6, vcc_lo
; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v1, v4
-; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7fc00000, v7, vcc_lo
+; GFX11-NEXT: v_dual_min_f32 v8, v2, v5 :: v_dual_cndmask_b32 v1, 0x7fc00000, v7
; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v2, v5
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX11-NEXT: v_cndmask_b32_e32 v2, 0x7fc00000, v8, vcc_lo
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
@@ -1784,67 +1175,48 @@ define <3 x float> @v_minimum_v3f32__nnan_nsz(<3 x float> %src0, <3 x float> %sr
; GFX7-LABEL: v_minimum_v3f32__nnan_nsz:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_min_legacy_f32_e32 v0, v0, v3
-; GFX7-NEXT: v_min_legacy_f32_e32 v1, v1, v4
-; GFX7-NEXT: v_min_legacy_f32_e32 v2, v2, v5
+; GFX7-NEXT: v_min_f32_e32 v0, v0, v3
+; GFX7-NEXT: v_min_f32_e32 v1, v1, v4
+; GFX7-NEXT: v_min_f32_e32 v2, v2, v5
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_minimum_v3f32__nnan_nsz:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v0, v3
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc
-; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v1, v4
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc
-; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v2, v5
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v5, v2, vcc
+; GFX8-NEXT: v_min_f32_e32 v0, v0, v3
+; GFX8-NEXT: v_min_f32_e32 v1, v1, v4
+; GFX8-NEXT: v_min_f32_e32 v2, v2, v5
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_minimum_v3f32__nnan_nsz:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v0, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v1, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v2, v5
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v5, v2, vcc
+; GFX9-NEXT: v_min_f32_e32 v0, v0, v3
+; GFX9-NEXT: v_min_f32_e32 v1, v1, v4
+; GFX9-NEXT: v_min_f32_e32 v2, v2, v5
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX940-LABEL: v_minimum_v3f32__nnan_nsz:
; GFX940: ; %bb.0:
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_cmp_lt_f32_e32 vcc, v0, v3
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc
-; GFX940-NEXT: v_cmp_lt_f32_e32 vcc, v1, v4
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc
-; GFX940-NEXT: v_cmp_lt_f32_e32 vcc, v2, v5
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v5, v2, vcc
+; GFX940-NEXT: v_min_f32_e32 v0, v0, v3
+; GFX940-NEXT: v_min_f32_e32 v1, v1, v4
+; GFX940-NEXT: v_min_f32_e32 v2, v2, v5
; GFX940-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimum_v3f32__nnan_nsz:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_cmp_lt_f32_e32 vcc_lo, v0, v3
-; GFX10-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc_lo
-; GFX10-NEXT: v_cmp_lt_f32_e32 vcc_lo, v1, v4
-; GFX10-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc_lo
-; GFX10-NEXT: v_cmp_lt_f32_e32 vcc_lo, v2, v5
-; GFX10-NEXT: v_cndmask_b32_e32 v2, v5, v2, vcc_lo
+; GFX10-NEXT: v_min_f32_e32 v0, v0, v3
+; GFX10-NEXT: v_min_f32_e32 v1, v1, v4
+; GFX10-NEXT: v_min_f32_e32 v2, v2, v5
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_minimum_v3f32__nnan_nsz:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_cmp_lt_f32_e32 vcc_lo, v0, v3
-; GFX11-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc_lo
-; GFX11-NEXT: v_cmp_lt_f32_e32 vcc_lo, v1, v4
-; GFX11-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc_lo
-; GFX11-NEXT: v_cmp_lt_f32_e32 vcc_lo, v2, v5
-; GFX11-NEXT: v_cndmask_b32_e32 v2, v5, v2, vcc_lo
+; GFX11-NEXT: v_dual_min_f32 v0, v0, v3 :: v_dual_min_f32 v1, v1, v4
+; GFX11-NEXT: v_min_f32_e32 v2, v2, v5
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_minimum_v3f32__nnan_nsz:
@@ -1866,292 +1238,111 @@ define <4 x float> @v_minimum_v4f32(<4 x float> %src0, <4 x float> %src1) {
; GFX7-LABEL: v_minimum_v4f32:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_min_legacy_f32_e32 v8, v0, v4
+; GFX7-NEXT: v_min_f32_e32 v8, v0, v4
; GFX7-NEXT: v_mov_b32_e32 v9, 0x7fc00000
; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v0, v4
-; GFX7-NEXT: v_cndmask_b32_e32 v8, v9, v8, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v0, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v4, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v8
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc
-; GFX7-NEXT: v_min_legacy_f32_e32 v4, v1, v5
+; GFX7-NEXT: v_cndmask_b32_e32 v0, v9, v8, vcc
+; GFX7-NEXT: v_min_f32_e32 v4, v1, v5
; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v1, v5
-; GFX7-NEXT: v_cndmask_b32_e32 v4, v9, v4, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v1, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v5, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc
-; GFX7-NEXT: v_min_legacy_f32_e32 v4, v2, v6
+; GFX7-NEXT: v_cndmask_b32_e32 v1, v9, v4, vcc
+; GFX7-NEXT: v_min_f32_e32 v4, v2, v6
; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v2, v6
-; GFX7-NEXT: v_cndmask_b32_e32 v4, v9, v4, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v2, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v6, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4
-; GFX7-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
-; GFX7-NEXT: v_min_legacy_f32_e32 v4, v3, v7
+; GFX7-NEXT: v_cndmask_b32_e32 v2, v9, v4, vcc
+; GFX7-NEXT: v_min_f32_e32 v4, v3, v7
; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v3, v7
-; GFX7-NEXT: v_cndmask_b32_e32 v4, v9, v4, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v3, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v7, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v3, v3, v7, vcc
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4
-; GFX7-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc
+; GFX7-NEXT: v_cndmask_b32_e32 v3, v9, v4, vcc
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_minimum_v4f32:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v0, v4
-; GFX8-NEXT: v_cndmask_b32_e32 v8, v4, v0, vcc
+; GFX8-NEXT: v_min_f32_e32 v8, v0, v4
; GFX8-NEXT: v_mov_b32_e32 v9, 0x7fc00000
; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v0, v4
-; GFX8-NEXT: v_cndmask_b32_e32 v8, v9, v8, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v0, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v4, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v8
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc
-; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v1, v5
-; GFX8-NEXT: v_cndmask_b32_e32 v4, v5, v1, vcc
+; GFX8-NEXT: v_cndmask_b32_e32 v0, v9, v8, vcc
+; GFX8-NEXT: v_min_f32_e32 v4, v1, v5
; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v1, v5
-; GFX8-NEXT: v_cndmask_b32_e32 v4, v9, v4, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v1, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v5, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc
-; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v2, v6
-; GFX8-NEXT: v_cndmask_b32_e32 v4, v6, v2, vcc
+; GFX8-NEXT: v_cndmask_b32_e32 v1, v9, v4, vcc
+; GFX8-NEXT: v_min_f32_e32 v4, v2, v6
; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v2, v6
-; GFX8-NEXT: v_cndmask_b32_e32 v4, v9, v4, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v2, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v6, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
-; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v3, v7
-; GFX8-NEXT: v_cndmask_b32_e32 v4, v7, v3, vcc
+; GFX8-NEXT: v_cndmask_b32_e32 v2, v9, v4, vcc
+; GFX8-NEXT: v_min_f32_e32 v4, v3, v7
; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v3, v7
-; GFX8-NEXT: v_cndmask_b32_e32 v4, v9, v4, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v3, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v7, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v3, v3, v7, vcc
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4
-; GFX8-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc
+; GFX8-NEXT: v_cndmask_b32_e32 v3, v9, v4, vcc
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_minimum_v4f32:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v0, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v8, v4, v0, vcc
+; GFX9-NEXT: v_min_f32_e32 v8, v0, v4
; GFX9-NEXT: v_mov_b32_e32 v9, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v8, v9, v8, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v0, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v4, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v8
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v1, v5
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v5, v1, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v0, v9, v8, vcc
+; GFX9-NEXT: v_min_f32_e32 v4, v1, v5
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v5
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v9, v4, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v1, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v5, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v2, v6
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v6, v2, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v9, v4, vcc
+; GFX9-NEXT: v_min_f32_e32 v4, v2, v6
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v2, v6
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v9, v4, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v2, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v6, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v3, v7
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v7, v3, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v2, v9, v4, vcc
+; GFX9-NEXT: v_min_f32_e32 v4, v3, v7
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v3, v7
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v9, v4, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v3, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v7, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v3, v7, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v3, v9, v4, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX940-LABEL: v_minimum_v4f32:
; GFX940: ; %bb.0:
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_cmp_lt_f32_e32 vcc, v0, v4
+; GFX940-NEXT: v_min_f32_e32 v8, v0, v4
; GFX940-NEXT: v_mov_b32_e32 v9, 0x7fc00000
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v8, v4, v0, vcc
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v4
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v8, v9, v8, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v0, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v4, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v8
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc
-; GFX940-NEXT: v_cmp_lt_f32_e32 vcc, v1, v5
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v5, v1, vcc
+; GFX940-NEXT: v_min_f32_e32 v4, v1, v5
+; GFX940-NEXT: s_nop 0
+; GFX940-NEXT: v_cndmask_b32_e32 v0, v9, v8, vcc
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v5
; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v9, v4, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v1, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v5, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc
-; GFX940-NEXT: v_cmp_lt_f32_e32 vcc, v2, v6
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v6, v2, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v1, v9, v4, vcc
+; GFX940-NEXT: v_min_f32_e32 v4, v2, v6
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v2, v6
; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v9, v4, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v2, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v6, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
-; GFX940-NEXT: v_cmp_lt_f32_e32 vcc, v3, v7
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v7, v3, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v2, v9, v4, vcc
+; GFX940-NEXT: v_min_f32_e32 v4, v3, v7
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v3, v7
; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v9, v4, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v3, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v7, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v3, v7, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v3, v9, v4, vcc
; GFX940-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimum_v4f32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_cmp_lt_f32_e32 vcc_lo, v0, v4
-; GFX10-NEXT: v_cndmask_b32_e32 v8, v4, v0, vcc_lo
+; GFX10-NEXT: v_min_f32_e32 v8, v0, v4
; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v0, v4
-; GFX10-NEXT: v_cndmask_b32_e32 v8, 0x7fc00000, v8, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v0, 32
-; GFX10-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc_lo
-; GFX10-NEXT: v_cmp_lt_f32_e32 vcc_lo, v1, v5
-; GFX10-NEXT: v_cndmask_b32_e32 v9, v5, v1, vcc_lo
-; GFX10-NEXT: v_cmp_lt_f32_e32 vcc_lo, v2, v6
-; GFX10-NEXT: v_cndmask_b32_e32 v10, v6, v2, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v4, 32
-; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc_lo
-; GFX10-NEXT: v_cmp_lt_f32_e32 vcc_lo, v3, v7
-; GFX10-NEXT: v_cndmask_b32_e32 v4, v7, v3, vcc_lo
+; GFX10-NEXT: v_min_f32_e32 v9, v1, v5
+; GFX10-NEXT: v_min_f32_e32 v4, v2, v6
+; GFX10-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v8, vcc_lo
; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v1, v5
-; GFX10-NEXT: v_cndmask_b32_e32 v9, 0x7fc00000, v9, vcc_lo
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v8
-; GFX10-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc_lo
+; GFX10-NEXT: v_min_f32_e32 v8, v3, v7
+; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x7fc00000, v9, vcc_lo
; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v2, v6
-; GFX10-NEXT: v_cndmask_b32_e32 v8, 0x7fc00000, v10, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e32 v2, 0x7fc00000, v4, vcc_lo
; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v3, v7
-; GFX10-NEXT: v_cndmask_b32_e32 v4, 0x7fc00000, v4, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v1, 32
-; GFX10-NEXT: v_cndmask_b32_e32 v1, v9, v1, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v2, 32
-; GFX10-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v3, 32
-; GFX10-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v5, 32
-; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v6, 32
-; GFX10-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v7, 32
-; GFX10-NEXT: v_cndmask_b32_e32 v3, v3, v7, vcc_lo
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v9
-; GFX10-NEXT: v_cndmask_b32_e32 v1, v9, v1, vcc_lo
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v8
-; GFX10-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc_lo
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v4
-; GFX10-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e32 v3, 0x7fc00000, v8, vcc_lo
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_minimum_v4f32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_cmp_lt_f32_e32 vcc_lo, v0, v4
-; GFX11-NEXT: v_cndmask_b32_e32 v8, v4, v0, vcc_lo
+; GFX11-NEXT: v_dual_min_f32 v8, v0, v4 :: v_dual_min_f32 v9, v1, v5
; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v0, v4
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-NEXT: v_cndmask_b32_e32 v8, 0x7fc00000, v8, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v0, 32
-; GFX11-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc_lo
-; GFX11-NEXT: v_cmp_lt_f32_e32 vcc_lo, v1, v5
-; GFX11-NEXT: v_cndmask_b32_e32 v9, v5, v1, vcc_lo
-; GFX11-NEXT: v_cmp_lt_f32_e32 vcc_lo, v2, v6
-; GFX11-NEXT: v_cndmask_b32_e32 v10, v6, v2, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v4, 32
-; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc_lo
-; GFX11-NEXT: v_cmp_lt_f32_e32 vcc_lo, v3, v7
-; GFX11-NEXT: v_cndmask_b32_e32 v4, v7, v3, vcc_lo
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_4) | instid1(VALU_DEP_3)
+; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v8, vcc_lo
; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v1, v5
-; GFX11-NEXT: v_cndmask_b32_e32 v9, 0x7fc00000, v9, vcc_lo
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v8
-; GFX11-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc_lo
+; GFX11-NEXT: v_min_f32_e32 v4, v2, v6
+; GFX11-NEXT: v_dual_min_f32 v8, v3, v7 :: v_dual_cndmask_b32 v1, 0x7fc00000, v9
; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v2, v6
-; GFX11-NEXT: v_cndmask_b32_e32 v8, 0x7fc00000, v10, vcc_lo
+; GFX11-NEXT: v_cndmask_b32_e32 v2, 0x7fc00000, v4, vcc_lo
; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v3, v7
-; GFX11-NEXT: v_cndmask_b32_e32 v4, 0x7fc00000, v4, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v1, 32
-; GFX11-NEXT: v_cndmask_b32_e32 v1, v9, v1, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v2, 32
-; GFX11-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v3, 32
-; GFX11-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v5, 32
-; GFX11-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v6, 32
-; GFX11-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v7, 32
-; GFX11-NEXT: v_cndmask_b32_e32 v3, v3, v7, vcc_lo
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v9
-; GFX11-NEXT: v_cndmask_b32_e32 v1, v9, v1, vcc_lo
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v8
-; GFX11-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc_lo
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v4
-; GFX11-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc_lo
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4)
+; GFX11-NEXT: v_cndmask_b32_e32 v3, 0x7fc00000, v8, vcc_lo
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_minimum_v4f32:
@@ -2174,236 +1365,53 @@ define <4 x float> @v_minimum_v4f32__nnan(<4 x float> %src0, <4 x float> %src1)
; GFX7-LABEL: v_minimum_v4f32__nnan:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_min_legacy_f32_e32 v8, v0, v4
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v0, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v4, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v8
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc
-; GFX7-NEXT: v_min_legacy_f32_e32 v4, v1, v5
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v1, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v5, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc
-; GFX7-NEXT: v_min_legacy_f32_e32 v4, v2, v6
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v2, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v6, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4
-; GFX7-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
-; GFX7-NEXT: v_min_legacy_f32_e32 v4, v3, v7
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v3, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v7, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v3, v3, v7, vcc
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4
-; GFX7-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc
+; GFX7-NEXT: v_min_f32_e32 v0, v0, v4
+; GFX7-NEXT: v_min_f32_e32 v1, v1, v5
+; GFX7-NEXT: v_min_f32_e32 v2, v2, v6
+; GFX7-NEXT: v_min_f32_e32 v3, v3, v7
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_minimum_v4f32__nnan:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v0, v4
-; GFX8-NEXT: v_cndmask_b32_e32 v8, v4, v0, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v0, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v4, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v8
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc
-; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v1, v5
-; GFX8-NEXT: v_cndmask_b32_e32 v4, v5, v1, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v1, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v5, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc
-; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v2, v6
-; GFX8-NEXT: v_cndmask_b32_e32 v4, v6, v2, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v2, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v6, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
-; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v3, v7
-; GFX8-NEXT: v_cndmask_b32_e32 v4, v7, v3, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v3, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v7, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v3, v3, v7, vcc
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4
-; GFX8-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc
+; GFX8-NEXT: v_min_f32_e32 v0, v0, v4
+; GFX8-NEXT: v_min_f32_e32 v1, v1, v5
+; GFX8-NEXT: v_min_f32_e32 v2, v2, v6
+; GFX8-NEXT: v_min_f32_e32 v3, v3, v7
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_minimum_v4f32__nnan:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v0, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v8, v4, v0, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v0, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v4, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v8
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v1, v5
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v5, v1, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v1, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v5, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v2, v6
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v6, v2, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v2, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v6, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v3, v7
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v7, v3, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v3, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v7, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v3, v7, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc
+; GFX9-NEXT: v_min_f32_e32 v0, v0, v4
+; GFX9-NEXT: v_min_f32_e32 v1, v1, v5
+; GFX9-NEXT: v_min_f32_e32 v2, v2, v6
+; GFX9-NEXT: v_min_f32_e32 v3, v3, v7
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX940-LABEL: v_minimum_v4f32__nnan:
; GFX940: ; %bb.0:
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_cmp_lt_f32_e32 vcc, v0, v4
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v8, v4, v0, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v0, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v4, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v8
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc
-; GFX940-NEXT: v_cmp_lt_f32_e32 vcc, v1, v5
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v5, v1, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v1, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v5, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc
-; GFX940-NEXT: v_cmp_lt_f32_e32 vcc, v2, v6
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v6, v2, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v2, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v6, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
-; GFX940-NEXT: v_cmp_lt_f32_e32 vcc, v3, v7
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v7, v3, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v3, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v7, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v3, v7, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc
+; GFX940-NEXT: v_min_f32_e32 v0, v0, v4
+; GFX940-NEXT: v_min_f32_e32 v1, v1, v5
+; GFX940-NEXT: v_min_f32_e32 v2, v2, v6
+; GFX940-NEXT: v_min_f32_e32 v3, v3, v7
; GFX940-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimum_v4f32__nnan:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_cmp_lt_f32_e32 vcc_lo, v0, v4
-; GFX10-NEXT: v_cndmask_b32_e32 v8, v4, v0, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v0, 32
-; GFX10-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v4, 32
-; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc_lo
-; GFX10-NEXT: v_cmp_lt_f32_e32 vcc_lo, v1, v5
-; GFX10-NEXT: v_cndmask_b32_e32 v4, v5, v1, vcc_lo
-; GFX10-NEXT: v_cmp_lt_f32_e32 vcc_lo, v2, v6
-; GFX10-NEXT: v_cndmask_b32_e32 v9, v6, v2, vcc_lo
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v8
-; GFX10-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc_lo
-; GFX10-NEXT: v_cmp_lt_f32_e32 vcc_lo, v3, v7
-; GFX10-NEXT: v_cndmask_b32_e32 v8, v7, v3, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v1, 32
-; GFX10-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v2, 32
-; GFX10-NEXT: v_cndmask_b32_e32 v2, v9, v2, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v3, 32
-; GFX10-NEXT: v_cndmask_b32_e32 v3, v8, v3, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v5, 32
-; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v6, 32
-; GFX10-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v7, 32
-; GFX10-NEXT: v_cndmask_b32_e32 v3, v3, v7, vcc_lo
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v4
-; GFX10-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc_lo
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v9
-; GFX10-NEXT: v_cndmask_b32_e32 v2, v9, v2, vcc_lo
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v8
-; GFX10-NEXT: v_cndmask_b32_e32 v3, v8, v3, vcc_lo
+; GFX10-NEXT: v_min_f32_e32 v0, v0, v4
+; GFX10-NEXT: v_min_f32_e32 v1, v1, v5
+; GFX10-NEXT: v_min_f32_e32 v2, v2, v6
+; GFX10-NEXT: v_min_f32_e32 v3, v3, v7
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_minimum_v4f32__nnan:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_cmp_lt_f32_e32 vcc_lo, v0, v4
-; GFX11-NEXT: v_cndmask_b32_e32 v8, v4, v0, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v0, 32
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v4, 32
-; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc_lo
-; GFX11-NEXT: v_cmp_lt_f32_e32 vcc_lo, v1, v5
-; GFX11-NEXT: v_cndmask_b32_e32 v4, v5, v1, vcc_lo
-; GFX11-NEXT: v_cmp_lt_f32_e32 vcc_lo, v2, v6
-; GFX11-NEXT: v_cndmask_b32_e32 v9, v6, v2, vcc_lo
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v8
-; GFX11-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc_lo
-; GFX11-NEXT: v_cmp_lt_f32_e32 vcc_lo, v3, v7
-; GFX11-NEXT: v_cndmask_b32_e32 v8, v7, v3, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v1, 32
-; GFX11-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v2, 32
-; GFX11-NEXT: v_cndmask_b32_e32 v2, v9, v2, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v3, 32
-; GFX11-NEXT: v_cndmask_b32_e32 v3, v8, v3, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v5, 32
-; GFX11-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v6, 32
-; GFX11-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v7, 32
-; GFX11-NEXT: v_cndmask_b32_e32 v3, v3, v7, vcc_lo
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v4
-; GFX11-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc_lo
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v9
-; GFX11-NEXT: v_cndmask_b32_e32 v2, v9, v2, vcc_lo
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v8
-; GFX11-NEXT: v_cndmask_b32_e32 v3, v8, v3, vcc_lo
+; GFX11-NEXT: v_dual_min_f32 v0, v0, v4 :: v_dual_min_f32 v1, v1, v5
+; GFX11-NEXT: v_dual_min_f32 v2, v2, v6 :: v_dual_min_f32 v3, v3, v7
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_minimum_v4f32__nnan:
@@ -2426,17 +1434,17 @@ define <4 x float> @v_minimum_v4f32__nsz(<4 x float> %src0, <4 x float> %src1) {
; GFX7-LABEL: v_minimum_v4f32__nsz:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_min_legacy_f32_e32 v8, v0, v4
+; GFX7-NEXT: v_min_f32_e32 v8, v0, v4
; GFX7-NEXT: v_mov_b32_e32 v9, 0x7fc00000
; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v0, v4
; GFX7-NEXT: v_cndmask_b32_e32 v0, v9, v8, vcc
-; GFX7-NEXT: v_min_legacy_f32_e32 v4, v1, v5
+; GFX7-NEXT: v_min_f32_e32 v4, v1, v5
; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v1, v5
; GFX7-NEXT: v_cndmask_b32_e32 v1, v9, v4, vcc
-; GFX7-NEXT: v_min_legacy_f32_e32 v4, v2, v6
+; GFX7-NEXT: v_min_f32_e32 v4, v2, v6
; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v2, v6
; GFX7-NEXT: v_cndmask_b32_e32 v2, v9, v4, vcc
-; GFX7-NEXT: v_min_legacy_f32_e32 v4, v3, v7
+; GFX7-NEXT: v_min_f32_e32 v4, v3, v7
; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v3, v7
; GFX7-NEXT: v_cndmask_b32_e32 v3, v9, v4, vcc
; GFX7-NEXT: s_setpc_b64 s[30:31]
@@ -2444,21 +1452,17 @@ define <4 x float> @v_minimum_v4f32__nsz(<4 x float> %src0, <4 x float> %src1) {
; GFX8-LABEL: v_minimum_v4f32__nsz:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v0, v4
-; GFX8-NEXT: v_cndmask_b32_e32 v8, v4, v0, vcc
+; GFX8-NEXT: v_min_f32_e32 v8, v0, v4
; GFX8-NEXT: v_mov_b32_e32 v9, 0x7fc00000
; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v0, v4
; GFX8-NEXT: v_cndmask_b32_e32 v0, v9, v8, vcc
-; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v1, v5
-; GFX8-NEXT: v_cndmask_b32_e32 v4, v5, v1, vcc
+; GFX8-NEXT: v_min_f32_e32 v4, v1, v5
; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v1, v5
; GFX8-NEXT: v_cndmask_b32_e32 v1, v9, v4, vcc
-; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v2, v6
-; GFX8-NEXT: v_cndmask_b32_e32 v4, v6, v2, vcc
+; GFX8-NEXT: v_min_f32_e32 v4, v2, v6
; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v2, v6
; GFX8-NEXT: v_cndmask_b32_e32 v2, v9, v4, vcc
-; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v3, v7
-; GFX8-NEXT: v_cndmask_b32_e32 v4, v7, v3, vcc
+; GFX8-NEXT: v_min_f32_e32 v4, v3, v7
; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v3, v7
; GFX8-NEXT: v_cndmask_b32_e32 v3, v9, v4, vcc
; GFX8-NEXT: s_setpc_b64 s[30:31]
@@ -2466,21 +1470,17 @@ define <4 x float> @v_minimum_v4f32__nsz(<4 x float> %src0, <4 x float> %src1) {
; GFX9-LABEL: v_minimum_v4f32__nsz:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v0, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v8, v4, v0, vcc
+; GFX9-NEXT: v_min_f32_e32 v8, v0, v4
; GFX9-NEXT: v_mov_b32_e32 v9, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v4
; GFX9-NEXT: v_cndmask_b32_e32 v0, v9, v8, vcc
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v1, v5
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v5, v1, vcc
+; GFX9-NEXT: v_min_f32_e32 v4, v1, v5
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v5
; GFX9-NEXT: v_cndmask_b32_e32 v1, v9, v4, vcc
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v2, v6
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v6, v2, vcc
+; GFX9-NEXT: v_min_f32_e32 v4, v2, v6
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v2, v6
; GFX9-NEXT: v_cndmask_b32_e32 v2, v9, v4, vcc
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v3, v7
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v7, v3, vcc
+; GFX9-NEXT: v_min_f32_e32 v4, v3, v7
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v3, v7
; GFX9-NEXT: v_cndmask_b32_e32 v3, v9, v4, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
@@ -2488,28 +1488,20 @@ define <4 x float> @v_minimum_v4f32__nsz(<4 x float> %src0, <4 x float> %src1) {
; GFX940-LABEL: v_minimum_v4f32__nsz:
; GFX940: ; %bb.0:
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_cmp_lt_f32_e32 vcc, v0, v4
+; GFX940-NEXT: v_min_f32_e32 v8, v0, v4
; GFX940-NEXT: v_mov_b32_e32 v9, 0x7fc00000
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v8, v4, v0, vcc
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v4
-; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_min_f32_e32 v4, v1, v5
+; GFX940-NEXT: s_nop 0
; GFX940-NEXT: v_cndmask_b32_e32 v0, v9, v8, vcc
-; GFX940-NEXT: v_cmp_lt_f32_e32 vcc, v1, v5
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v5, v1, vcc
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v5
; GFX940-NEXT: s_nop 1
; GFX940-NEXT: v_cndmask_b32_e32 v1, v9, v4, vcc
-; GFX940-NEXT: v_cmp_lt_f32_e32 vcc, v2, v6
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v6, v2, vcc
+; GFX940-NEXT: v_min_f32_e32 v4, v2, v6
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v2, v6
; GFX940-NEXT: s_nop 1
; GFX940-NEXT: v_cndmask_b32_e32 v2, v9, v4, vcc
-; GFX940-NEXT: v_cmp_lt_f32_e32 vcc, v3, v7
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v7, v3, vcc
+; GFX940-NEXT: v_min_f32_e32 v4, v3, v7
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v3, v7
; GFX940-NEXT: s_nop 1
; GFX940-NEXT: v_cndmask_b32_e32 v3, v9, v4, vcc
@@ -2518,44 +1510,35 @@ define <4 x float> @v_minimum_v4f32__nsz(<4 x float> %src0, <4 x float> %src1) {
; GFX10-LABEL: v_minimum_v4f32__nsz:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_cmp_lt_f32_e32 vcc_lo, v0, v4
-; GFX10-NEXT: v_cndmask_b32_e32 v8, v4, v0, vcc_lo
+; GFX10-NEXT: v_min_f32_e32 v8, v0, v4
; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v0, v4
+; GFX10-NEXT: v_min_f32_e32 v9, v1, v5
+; GFX10-NEXT: v_min_f32_e32 v4, v2, v6
; GFX10-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v8, vcc_lo
-; GFX10-NEXT: v_cmp_lt_f32_e32 vcc_lo, v1, v5
-; GFX10-NEXT: v_cndmask_b32_e32 v4, v5, v1, vcc_lo
-; GFX10-NEXT: v_cmp_lt_f32_e32 vcc_lo, v2, v6
-; GFX10-NEXT: v_cndmask_b32_e32 v8, v6, v2, vcc_lo
-; GFX10-NEXT: v_cmp_lt_f32_e32 vcc_lo, v3, v7
-; GFX10-NEXT: v_cndmask_b32_e32 v9, v7, v3, vcc_lo
; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v1, v5
-; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x7fc00000, v4, vcc_lo
+; GFX10-NEXT: v_min_f32_e32 v8, v3, v7
+; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x7fc00000, v9, vcc_lo
; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v2, v6
-; GFX10-NEXT: v_cndmask_b32_e32 v2, 0x7fc00000, v8, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e32 v2, 0x7fc00000, v4, vcc_lo
; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v3, v7
-; GFX10-NEXT: v_cndmask_b32_e32 v3, 0x7fc00000, v9, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e32 v3, 0x7fc00000, v8, vcc_lo
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_minimum_v4f32__nsz:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_cmp_lt_f32_e32 vcc_lo, v0, v4
-; GFX11-NEXT: v_cndmask_b32_e32 v8, v4, v0, vcc_lo
+; GFX11-NEXT: v_dual_min_f32 v8, v0, v4 :: v_dual_min_f32 v9, v1, v5
; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v0, v4
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_4) | instid1(VALU_DEP_3)
; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v8, vcc_lo
-; GFX11-NEXT: v_cmp_lt_f32_e32 vcc_lo, v1, v5
-; GFX11-NEXT: v_cndmask_b32_e32 v4, v5, v1, vcc_lo
-; GFX11-NEXT: v_cmp_lt_f32_e32 vcc_lo, v2, v6
-; GFX11-NEXT: v_cndmask_b32_e32 v8, v6, v2, vcc_lo
-; GFX11-NEXT: v_cmp_lt_f32_e32 vcc_lo, v3, v7
-; GFX11-NEXT: v_cndmask_b32_e32 v9, v7, v3, vcc_lo
; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v1, v5
-; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7fc00000, v4, vcc_lo
+; GFX11-NEXT: v_min_f32_e32 v4, v2, v6
+; GFX11-NEXT: v_dual_min_f32 v8, v3, v7 :: v_dual_cndmask_b32 v1, 0x7fc00000, v9
; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v2, v6
-; GFX11-NEXT: v_cndmask_b32_e32 v2, 0x7fc00000, v8, vcc_lo
+; GFX11-NEXT: v_cndmask_b32_e32 v2, 0x7fc00000, v4, vcc_lo
; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v3, v7
-; GFX11-NEXT: v_cndmask_b32_e32 v3, 0x7fc00000, v9, vcc_lo
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4)
+; GFX11-NEXT: v_cndmask_b32_e32 v3, 0x7fc00000, v8, vcc_lo
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_minimum_v4f32__nsz:
@@ -2578,79 +1561,53 @@ define <4 x float> @v_minimum_v4f32__nnan_nsz(<4 x float> %src0, <4 x float> %sr
; GFX7-LABEL: v_minimum_v4f32__nnan_nsz:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_min_legacy_f32_e32 v0, v0, v4
-; GFX7-NEXT: v_min_legacy_f32_e32 v1, v1, v5
-; GFX7-NEXT: v_min_legacy_f32_e32 v2, v2, v6
-; GFX7-NEXT: v_min_legacy_f32_e32 v3, v3, v7
+; GFX7-NEXT: v_min_f32_e32 v0, v0, v4
+; GFX7-NEXT: v_min_f32_e32 v1, v1, v5
+; GFX7-NEXT: v_min_f32_e32 v2, v2, v6
+; GFX7-NEXT: v_min_f32_e32 v3, v3, v7
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_minimum_v4f32__nnan_nsz:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v0, v4
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
-; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v1, v5
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
-; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v2, v6
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v6, v2, vcc
-; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v3, v7
-; GFX8-NEXT: v_cndmask_b32_e32 v3, v7, v3, vcc
+; GFX8-NEXT: v_min_f32_e32 v0, v0, v4
+; GFX8-NEXT: v_min_f32_e32 v1, v1, v5
+; GFX8-NEXT: v_min_f32_e32 v2, v2, v6
+; GFX8-NEXT: v_min_f32_e32 v3, v3, v7
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_minimum_v4f32__nnan_nsz:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v0, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v1, v5
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v2, v6
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v6, v2, vcc
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v3, v7
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v7, v3, vcc
+; GFX9-NEXT: v_min_f32_e32 v0, v0, v4
+; GFX9-NEXT: v_min_f32_e32 v1, v1, v5
+; GFX9-NEXT: v_min_f32_e32 v2, v2, v6
+; GFX9-NEXT: v_min_f32_e32 v3, v3, v7
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX940-LABEL: v_minimum_v4f32__nnan_nsz:
; GFX940: ; %bb.0:
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_cmp_lt_f32_e32 vcc, v0, v4
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
-; GFX940-NEXT: v_cmp_lt_f32_e32 vcc, v1, v5
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
-; GFX940-NEXT: v_cmp_lt_f32_e32 vcc, v2, v6
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v6, v2, vcc
-; GFX940-NEXT: v_cmp_lt_f32_e32 vcc, v3, v7
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v7, v3, vcc
+; GFX940-NEXT: v_min_f32_e32 v0, v0, v4
+; GFX940-NEXT: v_min_f32_e32 v1, v1, v5
+; GFX940-NEXT: v_min_f32_e32 v2, v2, v6
+; GFX940-NEXT: v_min_f32_e32 v3, v3, v7
; GFX940-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimum_v4f32__nnan_nsz:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_cmp_lt_f32_e32 vcc_lo, v0, v4
-; GFX10-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc_lo
-; GFX10-NEXT: v_cmp_lt_f32_e32 vcc_lo, v1, v5
-; GFX10-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc_lo
-; GFX10-NEXT: v_cmp_lt_f32_e32 vcc_lo, v2, v6
-; GFX10-NEXT: v_cndmask_b32_e32 v2, v6, v2, vcc_lo
-; GFX10-NEXT: v_cmp_lt_f32_e32 vcc_lo, v3, v7
-; GFX10-NEXT: v_cndmask_b32_e32 v3, v7, v3, vcc_lo
+; GFX10-NEXT: v_min_f32_e32 v0, v0, v4
+; GFX10-NEXT: v_min_f32_e32 v1, v1, v5
+; GFX10-NEXT: v_min_f32_e32 v2, v2, v6
+; GFX10-NEXT: v_min_f32_e32 v3, v3, v7
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_minimum_v4f32__nnan_nsz:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_cmp_lt_f32_e32 vcc_lo, v0, v4
-; GFX11-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc_lo
-; GFX11-NEXT: v_cmp_lt_f32_e32 vcc_lo, v1, v5
-; GFX11-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc_lo
-; GFX11-NEXT: v_cmp_lt_f32_e32 vcc_lo, v2, v6
-; GFX11-NEXT: v_cndmask_b32_e32 v2, v6, v2, vcc_lo
-; GFX11-NEXT: v_cmp_lt_f32_e32 vcc_lo, v3, v7
-; GFX11-NEXT: v_cndmask_b32_e32 v3, v7, v3, vcc_lo
+; GFX11-NEXT: v_dual_min_f32 v0, v0, v4 :: v_dual_min_f32 v1, v1, v5
+; GFX11-NEXT: v_dual_min_f32 v2, v2, v6 :: v_dual_min_f32 v3, v3, v7
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_minimum_v4f32__nnan_nsz:
@@ -2673,551 +1630,185 @@ define <8 x float> @v_minimum_v8f32(<8 x float> %src0, <8 x float> %src1) {
; GFX7-LABEL: v_minimum_v8f32:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_min_legacy_f32_e32 v16, v0, v8
+; GFX7-NEXT: v_min_f32_e32 v16, v0, v8
; GFX7-NEXT: v_mov_b32_e32 v17, 0x7fc00000
; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v0, v8
-; GFX7-NEXT: v_cndmask_b32_e32 v16, v17, v16, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v0, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v16, v0, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v8, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v16, v0, vcc
-; GFX7-NEXT: v_min_legacy_f32_e32 v8, v1, v9
+; GFX7-NEXT: v_cndmask_b32_e32 v0, v17, v16, vcc
+; GFX7-NEXT: v_min_f32_e32 v8, v1, v9
; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v1, v9
-; GFX7-NEXT: v_cndmask_b32_e32 v8, v17, v8, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v1, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v8, v1, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v9, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v8
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v8, v1, vcc
-; GFX7-NEXT: v_min_legacy_f32_e32 v8, v2, v10
+; GFX7-NEXT: v_cndmask_b32_e32 v1, v17, v8, vcc
+; GFX7-NEXT: v_min_f32_e32 v8, v2, v10
; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v2, v10
-; GFX7-NEXT: v_cndmask_b32_e32 v8, v17, v8, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v2, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v10, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v2, v2, v10, vcc
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v8
-; GFX7-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc
-; GFX7-NEXT: v_min_legacy_f32_e32 v8, v3, v11
+; GFX7-NEXT: v_cndmask_b32_e32 v2, v17, v8, vcc
+; GFX7-NEXT: v_min_f32_e32 v8, v3, v11
; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v3, v11
-; GFX7-NEXT: v_cndmask_b32_e32 v8, v17, v8, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v3, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v3, v8, v3, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v11, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v3, v3, v11, vcc
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v8
-; GFX7-NEXT: v_cndmask_b32_e32 v3, v8, v3, vcc
-; GFX7-NEXT: v_min_legacy_f32_e32 v8, v4, v12
+; GFX7-NEXT: v_cndmask_b32_e32 v3, v17, v8, vcc
+; GFX7-NEXT: v_min_f32_e32 v8, v4, v12
; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v4, v12
-; GFX7-NEXT: v_cndmask_b32_e32 v8, v17, v8, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v4, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v4, v8, v4, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v12, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v4, v4, v12, vcc
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v8
-; GFX7-NEXT: v_cndmask_b32_e32 v4, v8, v4, vcc
-; GFX7-NEXT: v_min_legacy_f32_e32 v8, v5, v13
+; GFX7-NEXT: v_cndmask_b32_e32 v4, v17, v8, vcc
+; GFX7-NEXT: v_min_f32_e32 v8, v5, v13
; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v5, v13
-; GFX7-NEXT: v_cndmask_b32_e32 v8, v17, v8, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v5, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v5, v8, v5, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v13, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v5, v5, v13, vcc
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v8
-; GFX7-NEXT: v_cndmask_b32_e32 v5, v8, v5, vcc
-; GFX7-NEXT: v_min_legacy_f32_e32 v8, v6, v14
+; GFX7-NEXT: v_cndmask_b32_e32 v5, v17, v8, vcc
+; GFX7-NEXT: v_min_f32_e32 v8, v6, v14
; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v6, v14
-; GFX7-NEXT: v_cndmask_b32_e32 v8, v17, v8, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v6, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v6, v8, v6, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v14, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v6, v6, v14, vcc
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v8
-; GFX7-NEXT: v_cndmask_b32_e32 v6, v8, v6, vcc
-; GFX7-NEXT: v_min_legacy_f32_e32 v8, v7, v15
+; GFX7-NEXT: v_cndmask_b32_e32 v6, v17, v8, vcc
+; GFX7-NEXT: v_min_f32_e32 v8, v7, v15
; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v7, v15
-; GFX7-NEXT: v_cndmask_b32_e32 v8, v17, v8, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v7, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v7, v8, v7, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v15, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v7, v7, v15, vcc
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v8
-; GFX7-NEXT: v_cndmask_b32_e32 v7, v8, v7, vcc
+; GFX7-NEXT: v_cndmask_b32_e32 v7, v17, v8, vcc
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_minimum_v8f32:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v0, v8
-; GFX8-NEXT: v_cndmask_b32_e32 v16, v8, v0, vcc
+; GFX8-NEXT: v_min_f32_e32 v16, v0, v8
; GFX8-NEXT: v_mov_b32_e32 v17, 0x7fc00000
; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v0, v8
-; GFX8-NEXT: v_cndmask_b32_e32 v16, v17, v16, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v0, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v16, v0, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v8, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v16, v0, vcc
-; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v1, v9
-; GFX8-NEXT: v_cndmask_b32_e32 v8, v9, v1, vcc
+; GFX8-NEXT: v_cndmask_b32_e32 v0, v17, v16, vcc
+; GFX8-NEXT: v_min_f32_e32 v8, v1, v9
; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v1, v9
-; GFX8-NEXT: v_cndmask_b32_e32 v8, v17, v8, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v1, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v8, v1, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v9, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v8
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v8, v1, vcc
-; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v2, v10
-; GFX8-NEXT: v_cndmask_b32_e32 v8, v10, v2, vcc
+; GFX8-NEXT: v_cndmask_b32_e32 v1, v17, v8, vcc
+; GFX8-NEXT: v_min_f32_e32 v8, v2, v10
; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v2, v10
-; GFX8-NEXT: v_cndmask_b32_e32 v8, v17, v8, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v2, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v10, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v10, vcc
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v8
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc
-; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v3, v11
-; GFX8-NEXT: v_cndmask_b32_e32 v8, v11, v3, vcc
+; GFX8-NEXT: v_cndmask_b32_e32 v2, v17, v8, vcc
+; GFX8-NEXT: v_min_f32_e32 v8, v3, v11
; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v3, v11
-; GFX8-NEXT: v_cndmask_b32_e32 v8, v17, v8, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v3, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v3, v8, v3, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v11, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v3, v3, v11, vcc
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v8
-; GFX8-NEXT: v_cndmask_b32_e32 v3, v8, v3, vcc
-; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v4, v12
-; GFX8-NEXT: v_cndmask_b32_e32 v8, v12, v4, vcc
+; GFX8-NEXT: v_cndmask_b32_e32 v3, v17, v8, vcc
+; GFX8-NEXT: v_min_f32_e32 v8, v4, v12
; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v4, v12
-; GFX8-NEXT: v_cndmask_b32_e32 v8, v17, v8, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v4, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v4, v8, v4, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v12, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v4, v4, v12, vcc
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v8
-; GFX8-NEXT: v_cndmask_b32_e32 v4, v8, v4, vcc
-; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v5, v13
-; GFX8-NEXT: v_cndmask_b32_e32 v8, v13, v5, vcc
+; GFX8-NEXT: v_cndmask_b32_e32 v4, v17, v8, vcc
+; GFX8-NEXT: v_min_f32_e32 v8, v5, v13
; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v5, v13
-; GFX8-NEXT: v_cndmask_b32_e32 v8, v17, v8, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v5, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v5, v8, v5, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v13, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v5, v5, v13, vcc
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v8
-; GFX8-NEXT: v_cndmask_b32_e32 v5, v8, v5, vcc
-; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v6, v14
-; GFX8-NEXT: v_cndmask_b32_e32 v8, v14, v6, vcc
+; GFX8-NEXT: v_cndmask_b32_e32 v5, v17, v8, vcc
+; GFX8-NEXT: v_min_f32_e32 v8, v6, v14
; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v6, v14
-; GFX8-NEXT: v_cndmask_b32_e32 v8, v17, v8, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v6, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v6, v8, v6, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v14, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v6, v6, v14, vcc
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v8
-; GFX8-NEXT: v_cndmask_b32_e32 v6, v8, v6, vcc
-; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v7, v15
-; GFX8-NEXT: v_cndmask_b32_e32 v8, v15, v7, vcc
+; GFX8-NEXT: v_cndmask_b32_e32 v6, v17, v8, vcc
+; GFX8-NEXT: v_min_f32_e32 v8, v7, v15
; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v7, v15
-; GFX8-NEXT: v_cndmask_b32_e32 v8, v17, v8, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v7, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v7, v8, v7, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v15, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v7, v7, v15, vcc
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v8
-; GFX8-NEXT: v_cndmask_b32_e32 v7, v8, v7, vcc
+; GFX8-NEXT: v_cndmask_b32_e32 v7, v17, v8, vcc
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_minimum_v8f32:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v0, v8
-; GFX9-NEXT: v_cndmask_b32_e32 v16, v8, v0, vcc
+; GFX9-NEXT: v_min_f32_e32 v16, v0, v8
; GFX9-NEXT: v_mov_b32_e32 v17, 0x7fc00000
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v8
-; GFX9-NEXT: v_cndmask_b32_e32 v16, v17, v16, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v0, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v16, v0, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v8, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v16, v0, vcc
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v1, v9
-; GFX9-NEXT: v_cndmask_b32_e32 v8, v9, v1, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v0, v17, v16, vcc
+; GFX9-NEXT: v_min_f32_e32 v8, v1, v9
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v9
-; GFX9-NEXT: v_cndmask_b32_e32 v8, v17, v8, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v1, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v8, v1, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v9, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v8
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v8, v1, vcc
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v2, v10
-; GFX9-NEXT: v_cndmask_b32_e32 v8, v10, v2, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v17, v8, vcc
+; GFX9-NEXT: v_min_f32_e32 v8, v2, v10
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v2, v10
-; GFX9-NEXT: v_cndmask_b32_e32 v8, v17, v8, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v2, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v10, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v10, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v8
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v3, v11
-; GFX9-NEXT: v_cndmask_b32_e32 v8, v11, v3, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v2, v17, v8, vcc
+; GFX9-NEXT: v_min_f32_e32 v8, v3, v11
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v3, v11
-; GFX9-NEXT: v_cndmask_b32_e32 v8, v17, v8, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v3, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v8, v3, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v11, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v3, v11, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v8
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v8, v3, vcc
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v4, v12
-; GFX9-NEXT: v_cndmask_b32_e32 v8, v12, v4, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v3, v17, v8, vcc
+; GFX9-NEXT: v_min_f32_e32 v8, v4, v12
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v4, v12
-; GFX9-NEXT: v_cndmask_b32_e32 v8, v17, v8, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v4, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v8, v4, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v12, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v4, v12, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v8
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v8, v4, vcc
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v5, v13
-; GFX9-NEXT: v_cndmask_b32_e32 v8, v13, v5, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v4, v17, v8, vcc
+; GFX9-NEXT: v_min_f32_e32 v8, v5, v13
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v5, v13
-; GFX9-NEXT: v_cndmask_b32_e32 v8, v17, v8, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v5, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v5, v8, v5, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v13, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v5, v5, v13, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v8
-; GFX9-NEXT: v_cndmask_b32_e32 v5, v8, v5, vcc
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v6, v14
-; GFX9-NEXT: v_cndmask_b32_e32 v8, v14, v6, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v5, v17, v8, vcc
+; GFX9-NEXT: v_min_f32_e32 v8, v6, v14
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v6, v14
-; GFX9-NEXT: v_cndmask_b32_e32 v8, v17, v8, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v6, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v6, v8, v6, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v14, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v6, v6, v14, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v8
-; GFX9-NEXT: v_cndmask_b32_e32 v6, v8, v6, vcc
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v7, v15
-; GFX9-NEXT: v_cndmask_b32_e32 v8, v15, v7, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v6, v17, v8, vcc
+; GFX9-NEXT: v_min_f32_e32 v8, v7, v15
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v7, v15
-; GFX9-NEXT: v_cndmask_b32_e32 v8, v17, v8, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v7, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v7, v8, v7, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v15, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v7, v7, v15, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v8
-; GFX9-NEXT: v_cndmask_b32_e32 v7, v8, v7, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v7, v17, v8, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX940-LABEL: v_minimum_v8f32:
; GFX940: ; %bb.0:
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_cmp_lt_f32_e32 vcc, v0, v8
+; GFX940-NEXT: v_min_f32_e32 v16, v0, v8
; GFX940-NEXT: v_mov_b32_e32 v17, 0x7fc00000
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v8, v0, vcc
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v8
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v17, v16, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v0, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v16, v0, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v8, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v16, v0, vcc
-; GFX940-NEXT: v_cmp_lt_f32_e32 vcc, v1, v9
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v8, v9, v1, vcc
+; GFX940-NEXT: v_min_f32_e32 v8, v1, v9
+; GFX940-NEXT: s_nop 0
+; GFX940-NEXT: v_cndmask_b32_e32 v0, v17, v16, vcc
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v9
; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v8, v17, v8, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v1, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v8, v1, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v9, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v8
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v8, v1, vcc
-; GFX940-NEXT: v_cmp_lt_f32_e32 vcc, v2, v10
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v8, v10, v2, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v1, v17, v8, vcc
+; GFX940-NEXT: v_min_f32_e32 v8, v2, v10
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v2, v10
; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v8, v17, v8, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v2, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v10, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v2, v10, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v8
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc
-; GFX940-NEXT: v_cmp_lt_f32_e32 vcc, v3, v11
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v8, v11, v3, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v2, v17, v8, vcc
+; GFX940-NEXT: v_min_f32_e32 v8, v3, v11
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v3, v11
; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v8, v17, v8, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v3, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v8, v3, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v11, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v3, v11, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v8
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v8, v3, vcc
-; GFX940-NEXT: v_cmp_lt_f32_e32 vcc, v4, v12
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v8, v12, v4, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v3, v17, v8, vcc
+; GFX940-NEXT: v_min_f32_e32 v8, v4, v12
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v4, v12
; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v8, v17, v8, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v4, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v8, v4, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v12, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v4, v12, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v8
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v8, v4, vcc
-; GFX940-NEXT: v_cmp_lt_f32_e32 vcc, v5, v13
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v8, v13, v5, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v4, v17, v8, vcc
+; GFX940-NEXT: v_min_f32_e32 v8, v5, v13
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v5, v13
; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v8, v17, v8, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v5, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v5, v8, v5, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v13, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v5, v5, v13, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v8
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v5, v8, v5, vcc
-; GFX940-NEXT: v_cmp_lt_f32_e32 vcc, v6, v14
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v8, v14, v6, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v5, v17, v8, vcc
+; GFX940-NEXT: v_min_f32_e32 v8, v6, v14
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v6, v14
; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v8, v17, v8, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v6, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v6, v8, v6, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v14, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v6, v6, v14, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v8
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v6, v8, v6, vcc
-; GFX940-NEXT: v_cmp_lt_f32_e32 vcc, v7, v15
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v8, v15, v7, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v6, v17, v8, vcc
+; GFX940-NEXT: v_min_f32_e32 v8, v7, v15
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v7, v15
; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v8, v17, v8, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v7, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v7, v8, v7, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v15, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v7, v7, v15, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v8
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v7, v8, v7, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v7, v17, v8, vcc
; GFX940-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimum_v8f32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_cmp_lt_f32_e32 vcc_lo, v0, v8
-; GFX10-NEXT: v_cndmask_b32_e32 v16, v8, v0, vcc_lo
-; GFX10-NEXT: v_cmp_lt_f32_e32 vcc_lo, v1, v9
-; GFX10-NEXT: v_cndmask_b32_e32 v17, v9, v1, vcc_lo
+; GFX10-NEXT: v_min_f32_e32 v16, v0, v8
; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v0, v8
-; GFX10-NEXT: v_cndmask_b32_e32 v16, 0x7fc00000, v16, vcc_lo
+; GFX10-NEXT: v_min_f32_e32 v17, v1, v9
+; GFX10-NEXT: v_min_f32_e32 v8, v2, v10
+; GFX10-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v16, vcc_lo
; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v1, v9
-; GFX10-NEXT: v_cndmask_b32_e32 v17, 0x7fc00000, v17, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v0, 32
-; GFX10-NEXT: v_cndmask_b32_e32 v0, v16, v0, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v1, 32
-; GFX10-NEXT: v_cndmask_b32_e32 v1, v17, v1, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v8, 32
-; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v9, 32
-; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc_lo
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v16
-; GFX10-NEXT: v_cndmask_b32_e32 v0, v16, v0, vcc_lo
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v17
-; GFX10-NEXT: v_cndmask_b32_e32 v1, v17, v1, vcc_lo
-; GFX10-NEXT: v_cmp_lt_f32_e32 vcc_lo, v2, v10
-; GFX10-NEXT: v_cndmask_b32_e32 v8, v10, v2, vcc_lo
-; GFX10-NEXT: v_cmp_lt_f32_e32 vcc_lo, v3, v11
-; GFX10-NEXT: v_cndmask_b32_e32 v9, v11, v3, vcc_lo
-; GFX10-NEXT: v_cmp_lt_f32_e32 vcc_lo, v4, v12
-; GFX10-NEXT: v_cndmask_b32_e32 v16, v12, v4, vcc_lo
+; GFX10-NEXT: v_min_f32_e32 v9, v3, v11
+; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x7fc00000, v17, vcc_lo
; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v2, v10
-; GFX10-NEXT: v_cndmask_b32_e32 v8, 0x7fc00000, v8, vcc_lo
+; GFX10-NEXT: v_min_f32_e32 v10, v7, v15
+; GFX10-NEXT: v_cndmask_b32_e32 v2, 0x7fc00000, v8, vcc_lo
; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v3, v11
-; GFX10-NEXT: v_cndmask_b32_e32 v9, 0x7fc00000, v9, vcc_lo
+; GFX10-NEXT: v_min_f32_e32 v8, v4, v12
+; GFX10-NEXT: v_cndmask_b32_e32 v3, 0x7fc00000, v9, vcc_lo
; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v4, v12
-; GFX10-NEXT: v_cndmask_b32_e32 v16, 0x7fc00000, v16, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v2, 32
-; GFX10-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v3, 32
-; GFX10-NEXT: v_cndmask_b32_e32 v3, v9, v3, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v4, 32
-; GFX10-NEXT: v_cndmask_b32_e32 v4, v16, v4, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v10, 32
-; GFX10-NEXT: v_cndmask_b32_e32 v2, v2, v10, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v11, 32
-; GFX10-NEXT: v_cndmask_b32_e32 v3, v3, v11, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v12, 32
-; GFX10-NEXT: v_cndmask_b32_e32 v4, v4, v12, vcc_lo
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v8
-; GFX10-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc_lo
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v9
-; GFX10-NEXT: v_cndmask_b32_e32 v3, v9, v3, vcc_lo
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v16
-; GFX10-NEXT: v_cndmask_b32_e32 v4, v16, v4, vcc_lo
-; GFX10-NEXT: v_cmp_lt_f32_e32 vcc_lo, v5, v13
-; GFX10-NEXT: v_cndmask_b32_e32 v8, v13, v5, vcc_lo
-; GFX10-NEXT: v_cmp_lt_f32_e32 vcc_lo, v6, v14
-; GFX10-NEXT: v_cndmask_b32_e32 v9, v14, v6, vcc_lo
-; GFX10-NEXT: v_cmp_lt_f32_e32 vcc_lo, v7, v15
-; GFX10-NEXT: v_cndmask_b32_e32 v10, v15, v7, vcc_lo
+; GFX10-NEXT: v_min_f32_e32 v9, v5, v13
+; GFX10-NEXT: v_cndmask_b32_e32 v4, 0x7fc00000, v8, vcc_lo
; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v5, v13
-; GFX10-NEXT: v_cndmask_b32_e32 v8, 0x7fc00000, v8, vcc_lo
+; GFX10-NEXT: v_min_f32_e32 v8, v6, v14
+; GFX10-NEXT: v_cndmask_b32_e32 v5, 0x7fc00000, v9, vcc_lo
; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v6, v14
-; GFX10-NEXT: v_cndmask_b32_e32 v9, 0x7fc00000, v9, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e32 v6, 0x7fc00000, v8, vcc_lo
; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v7, v15
-; GFX10-NEXT: v_cndmask_b32_e32 v10, 0x7fc00000, v10, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v5, 32
-; GFX10-NEXT: v_cndmask_b32_e32 v5, v8, v5, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v6, 32
-; GFX10-NEXT: v_cndmask_b32_e32 v6, v9, v6, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v7, 32
-; GFX10-NEXT: v_cndmask_b32_e32 v7, v10, v7, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v13, 32
-; GFX10-NEXT: v_cndmask_b32_e32 v5, v5, v13, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v14, 32
-; GFX10-NEXT: v_cndmask_b32_e32 v6, v6, v14, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v15, 32
-; GFX10-NEXT: v_cndmask_b32_e32 v7, v7, v15, vcc_lo
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v8
-; GFX10-NEXT: v_cndmask_b32_e32 v5, v8, v5, vcc_lo
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v9
-; GFX10-NEXT: v_cndmask_b32_e32 v6, v9, v6, vcc_lo
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v10
-; GFX10-NEXT: v_cndmask_b32_e32 v7, v10, v7, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e32 v7, 0x7fc00000, v10, vcc_lo
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_minimum_v8f32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_cmp_lt_f32_e32 vcc_lo, v0, v8
-; GFX11-NEXT: v_cndmask_b32_e32 v16, v8, v0, vcc_lo
-; GFX11-NEXT: v_cmp_lt_f32_e32 vcc_lo, v1, v9
-; GFX11-NEXT: v_cndmask_b32_e32 v17, v9, v1, vcc_lo
+; GFX11-NEXT: v_dual_min_f32 v16, v0, v8 :: v_dual_min_f32 v17, v1, v9
; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v0, v8
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4)
-; GFX11-NEXT: v_cndmask_b32_e32 v16, 0x7fc00000, v16, vcc_lo
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v16, vcc_lo
; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v1, v9
-; GFX11-NEXT: v_cndmask_b32_e32 v17, 0x7fc00000, v17, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v0, 32
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4)
-; GFX11-NEXT: v_cndmask_b32_e32 v0, v16, v0, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v1, 32
-; GFX11-NEXT: v_cndmask_b32_e32 v1, v17, v1, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v8, 32
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4)
-; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v9, 32
-; GFX11-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc_lo
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v16
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4)
-; GFX11-NEXT: v_cndmask_b32_e32 v0, v16, v0, vcc_lo
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v17
-; GFX11-NEXT: v_cndmask_b32_e32 v1, v17, v1, vcc_lo
-; GFX11-NEXT: v_cmp_lt_f32_e32 vcc_lo, v2, v10
-; GFX11-NEXT: v_cndmask_b32_e32 v8, v10, v2, vcc_lo
-; GFX11-NEXT: v_cmp_lt_f32_e32 vcc_lo, v3, v11
-; GFX11-NEXT: v_cndmask_b32_e32 v9, v11, v3, vcc_lo
-; GFX11-NEXT: v_cmp_lt_f32_e32 vcc_lo, v4, v12
-; GFX11-NEXT: v_cndmask_b32_e32 v16, v12, v4, vcc_lo
+; GFX11-NEXT: v_dual_min_f32 v9, v3, v11 :: v_dual_min_f32 v8, v2, v10
+; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7fc00000, v17, vcc_lo
; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v2, v10
-; GFX11-NEXT: v_cndmask_b32_e32 v8, 0x7fc00000, v8, vcc_lo
+; GFX11-NEXT: v_min_f32_e32 v10, v7, v15
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_3) | instid1(VALU_DEP_2)
+; GFX11-NEXT: v_cndmask_b32_e32 v2, 0x7fc00000, v8, vcc_lo
; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v3, v11
-; GFX11-NEXT: v_cndmask_b32_e32 v9, 0x7fc00000, v9, vcc_lo
+; GFX11-NEXT: v_dual_min_f32 v8, v4, v12 :: v_dual_cndmask_b32 v3, 0x7fc00000, v9
; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v4, v12
-; GFX11-NEXT: v_cndmask_b32_e32 v16, 0x7fc00000, v16, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v2, 32
-; GFX11-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v3, 32
-; GFX11-NEXT: v_cndmask_b32_e32 v3, v9, v3, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v4, 32
-; GFX11-NEXT: v_cndmask_b32_e32 v4, v16, v4, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v10, 32
-; GFX11-NEXT: v_cndmask_b32_e32 v2, v2, v10, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v11, 32
-; GFX11-NEXT: v_cndmask_b32_e32 v3, v3, v11, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v12, 32
-; GFX11-NEXT: v_cndmask_b32_e32 v4, v4, v12, vcc_lo
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v8
-; GFX11-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc_lo
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v9
-; GFX11-NEXT: v_cndmask_b32_e32 v3, v9, v3, vcc_lo
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v16
-; GFX11-NEXT: v_cndmask_b32_e32 v4, v16, v4, vcc_lo
-; GFX11-NEXT: v_cmp_lt_f32_e32 vcc_lo, v5, v13
-; GFX11-NEXT: v_cndmask_b32_e32 v8, v13, v5, vcc_lo
-; GFX11-NEXT: v_cmp_lt_f32_e32 vcc_lo, v6, v14
-; GFX11-NEXT: v_cndmask_b32_e32 v9, v14, v6, vcc_lo
-; GFX11-NEXT: v_cmp_lt_f32_e32 vcc_lo, v7, v15
-; GFX11-NEXT: v_cndmask_b32_e32 v10, v15, v7, vcc_lo
+; GFX11-NEXT: v_dual_min_f32 v9, v5, v13 :: v_dual_cndmask_b32 v4, 0x7fc00000, v8
; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v5, v13
-; GFX11-NEXT: v_cndmask_b32_e32 v8, 0x7fc00000, v8, vcc_lo
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-NEXT: v_dual_min_f32 v8, v6, v14 :: v_dual_cndmask_b32 v5, 0x7fc00000, v9
; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v6, v14
-; GFX11-NEXT: v_cndmask_b32_e32 v9, 0x7fc00000, v9, vcc_lo
+; GFX11-NEXT: v_cndmask_b32_e32 v6, 0x7fc00000, v8, vcc_lo
; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v7, v15
-; GFX11-NEXT: v_cndmask_b32_e32 v10, 0x7fc00000, v10, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v5, 32
-; GFX11-NEXT: v_cndmask_b32_e32 v5, v8, v5, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v6, 32
-; GFX11-NEXT: v_cndmask_b32_e32 v6, v9, v6, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v7, 32
-; GFX11-NEXT: v_cndmask_b32_e32 v7, v10, v7, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v13, 32
-; GFX11-NEXT: v_cndmask_b32_e32 v5, v5, v13, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v14, 32
-; GFX11-NEXT: v_cndmask_b32_e32 v6, v6, v14, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v15, 32
-; GFX11-NEXT: v_cndmask_b32_e32 v7, v7, v15, vcc_lo
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v8
-; GFX11-NEXT: v_cndmask_b32_e32 v5, v8, v5, vcc_lo
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v9
-; GFX11-NEXT: v_cndmask_b32_e32 v6, v9, v6, vcc_lo
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v10
-; GFX11-NEXT: v_cndmask_b32_e32 v7, v10, v7, vcc_lo
+; GFX11-NEXT: v_cndmask_b32_e32 v7, 0x7fc00000, v10, vcc_lo
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_minimum_v8f32:
@@ -3244,1071 +1835,371 @@ define <16 x float> @v_minimum_v16f32(<16 x float> %src0, <16 x float> %src1) {
; GFX7-LABEL: v_minimum_v16f32:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_min_legacy_f32_e32 v32, v0, v16
-; GFX7-NEXT: v_mov_b32_e32 v31, 0x7fc00000
-; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v0, v16
-; GFX7-NEXT: v_cndmask_b32_e32 v32, v31, v32, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v0, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v32, v0, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v16, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v0, v16, vcc
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v32
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v32, v0, vcc
-; GFX7-NEXT: v_min_legacy_f32_e32 v16, v1, v17
+; GFX7-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; GFX7-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
+; GFX7-NEXT: s_mov_b64 exec, s[4:5]
+; GFX7-NEXT: v_cmp_o_f32_e64 s[16:17], v0, v16
+; GFX7-NEXT: v_min_f32_e32 v0, v0, v16
+; GFX7-NEXT: buffer_load_dword v16, off, s[0:3], s32
+; GFX7-NEXT: v_writelane_b32 v31, s30, 0
; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v1, v17
-; GFX7-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v1, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v16, v1, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v17, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v1, v17, vcc
-; GFX7-NEXT: buffer_load_dword v17, off, s[0:3], s32
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v16, v1, vcc
-; GFX7-NEXT: v_min_legacy_f32_e32 v16, v2, v18
-; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v2, v18
-; GFX7-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v2, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v2, v16, v2, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v18, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v2, v2, v18, vcc
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX7-NEXT: v_cndmask_b32_e32 v2, v16, v2, vcc
-; GFX7-NEXT: v_min_legacy_f32_e32 v16, v3, v19
-; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v3, v19
-; GFX7-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v3, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v3, v16, v3, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v19, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v3, v3, v19, vcc
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX7-NEXT: v_cndmask_b32_e32 v3, v16, v3, vcc
-; GFX7-NEXT: v_min_legacy_f32_e32 v16, v4, v20
-; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v4, v20
-; GFX7-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v4, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v4, v16, v4, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v20, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v4, v4, v20, vcc
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX7-NEXT: v_cndmask_b32_e32 v4, v16, v4, vcc
-; GFX7-NEXT: v_min_legacy_f32_e32 v16, v5, v21
-; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v5, v21
-; GFX7-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v5, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v5, v16, v5, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v21, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v5, v5, v21, vcc
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX7-NEXT: v_cndmask_b32_e32 v5, v16, v5, vcc
-; GFX7-NEXT: v_min_legacy_f32_e32 v16, v6, v22
-; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v6, v22
-; GFX7-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v6, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v6, v16, v6, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v22, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v6, v6, v22, vcc
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX7-NEXT: v_cndmask_b32_e32 v6, v16, v6, vcc
-; GFX7-NEXT: v_min_legacy_f32_e32 v16, v7, v23
-; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v7, v23
-; GFX7-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v7, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v7, v16, v7, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v23, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v7, v7, v23, vcc
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX7-NEXT: v_cndmask_b32_e32 v7, v16, v7, vcc
-; GFX7-NEXT: v_min_legacy_f32_e32 v16, v8, v24
-; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v8, v24
-; GFX7-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v8, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v8, v16, v8, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v24, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v8, v8, v24, vcc
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX7-NEXT: v_cndmask_b32_e32 v8, v16, v8, vcc
-; GFX7-NEXT: v_min_legacy_f32_e32 v16, v9, v25
-; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v9, v25
-; GFX7-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v9, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v9, v16, v9, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v25, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v9, v9, v25, vcc
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX7-NEXT: v_cndmask_b32_e32 v9, v16, v9, vcc
-; GFX7-NEXT: v_min_legacy_f32_e32 v16, v10, v26
-; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v10, v26
-; GFX7-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v10, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v10, v16, v10, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v26, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v10, v10, v26, vcc
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX7-NEXT: v_cndmask_b32_e32 v10, v16, v10, vcc
-; GFX7-NEXT: v_min_legacy_f32_e32 v16, v11, v27
-; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v11, v27
-; GFX7-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v11, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v11, v16, v11, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v27, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v11, v11, v27, vcc
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX7-NEXT: v_cndmask_b32_e32 v11, v16, v11, vcc
-; GFX7-NEXT: v_min_legacy_f32_e32 v16, v12, v28
-; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v12, v28
-; GFX7-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v12, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v12, v16, v12, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v28, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v12, v12, v28, vcc
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX7-NEXT: v_cndmask_b32_e32 v12, v16, v12, vcc
-; GFX7-NEXT: v_min_legacy_f32_e32 v16, v13, v29
-; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v13, v29
-; GFX7-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v13, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v13, v16, v13, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v29, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v13, v13, v29, vcc
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX7-NEXT: v_cndmask_b32_e32 v13, v16, v13, vcc
-; GFX7-NEXT: v_min_legacy_f32_e32 v16, v14, v30
-; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v14, v30
-; GFX7-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v14, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v14, v16, v14, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v30, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v14, v14, v30, vcc
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX7-NEXT: v_cndmask_b32_e32 v14, v16, v14, vcc
+; GFX7-NEXT: v_min_f32_e32 v1, v1, v17
+; GFX7-NEXT: v_cmp_o_f32_e64 s[4:5], v2, v18
+; GFX7-NEXT: v_min_f32_e32 v2, v2, v18
+; GFX7-NEXT: v_mov_b32_e32 v17, 0x7fc00000
+; GFX7-NEXT: v_min_f32_e32 v18, v13, v29
+; GFX7-NEXT: v_cmp_o_f32_e64 s[28:29], v13, v29
+; GFX7-NEXT: v_writelane_b32 v31, s31, 1
+; GFX7-NEXT: v_cmp_o_f32_e64 s[6:7], v3, v19
+; GFX7-NEXT: v_min_f32_e32 v3, v3, v19
+; GFX7-NEXT: v_cmp_o_f32_e64 s[8:9], v4, v20
+; GFX7-NEXT: v_min_f32_e32 v4, v4, v20
+; GFX7-NEXT: v_cmp_o_f32_e64 s[10:11], v5, v21
+; GFX7-NEXT: v_min_f32_e32 v5, v5, v21
+; GFX7-NEXT: v_cmp_o_f32_e64 s[12:13], v6, v22
+; GFX7-NEXT: v_min_f32_e32 v6, v6, v22
+; GFX7-NEXT: v_cmp_o_f32_e64 s[14:15], v7, v23
+; GFX7-NEXT: v_min_f32_e32 v7, v7, v23
+; GFX7-NEXT: v_cmp_o_f32_e64 s[18:19], v8, v24
+; GFX7-NEXT: v_min_f32_e32 v8, v8, v24
+; GFX7-NEXT: v_cmp_o_f32_e64 s[20:21], v9, v25
+; GFX7-NEXT: v_min_f32_e32 v9, v9, v25
+; GFX7-NEXT: v_cmp_o_f32_e64 s[22:23], v10, v26
+; GFX7-NEXT: v_min_f32_e32 v10, v10, v26
+; GFX7-NEXT: v_cmp_o_f32_e64 s[24:25], v11, v27
+; GFX7-NEXT: v_min_f32_e32 v11, v11, v27
+; GFX7-NEXT: v_cmp_o_f32_e64 s[26:27], v12, v28
+; GFX7-NEXT: v_min_f32_e32 v12, v12, v28
+; GFX7-NEXT: v_min_f32_e32 v19, v14, v30
+; GFX7-NEXT: v_cmp_o_f32_e64 s[30:31], v14, v30
+; GFX7-NEXT: v_cndmask_b32_e32 v1, v17, v1, vcc
+; GFX7-NEXT: v_cndmask_b32_e64 v13, v17, v18, s[28:29]
+; GFX7-NEXT: v_cndmask_b32_e64 v0, v17, v0, s[16:17]
+; GFX7-NEXT: v_cndmask_b32_e64 v2, v17, v2, s[4:5]
+; GFX7-NEXT: v_cndmask_b32_e64 v3, v17, v3, s[6:7]
+; GFX7-NEXT: v_cndmask_b32_e64 v4, v17, v4, s[8:9]
+; GFX7-NEXT: v_cndmask_b32_e64 v5, v17, v5, s[10:11]
+; GFX7-NEXT: v_cndmask_b32_e64 v6, v17, v6, s[12:13]
+; GFX7-NEXT: v_cndmask_b32_e64 v7, v17, v7, s[14:15]
+; GFX7-NEXT: v_cndmask_b32_e64 v8, v17, v8, s[18:19]
+; GFX7-NEXT: v_cndmask_b32_e64 v9, v17, v9, s[20:21]
+; GFX7-NEXT: v_cndmask_b32_e64 v10, v17, v10, s[22:23]
+; GFX7-NEXT: v_cndmask_b32_e64 v11, v17, v11, s[24:25]
+; GFX7-NEXT: v_cndmask_b32_e64 v12, v17, v12, s[26:27]
+; GFX7-NEXT: v_cndmask_b32_e64 v14, v17, v19, s[30:31]
+; GFX7-NEXT: v_readlane_b32 s31, v31, 1
+; GFX7-NEXT: v_readlane_b32 s30, v31, 0
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: v_min_f32_e32 v18, v15, v16
+; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v15, v16
+; GFX7-NEXT: v_cndmask_b32_e32 v15, v17, v18, vcc
+; GFX7-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
+; GFX7-NEXT: s_mov_b64 exec, s[4:5]
; GFX7-NEXT: s_waitcnt vmcnt(0)
-; GFX7-NEXT: v_min_legacy_f32_e32 v16, v15, v17
-; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v15, v17
-; GFX7-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v15, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v15, v16, v15, vcc
-; GFX7-NEXT: v_cmp_class_f32_e64 vcc, v17, 32
-; GFX7-NEXT: v_cndmask_b32_e32 v15, v15, v17, vcc
-; GFX7-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX7-NEXT: v_cndmask_b32_e32 v15, v16, v15, vcc
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_minimum_v16f32:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v0, v16
-; GFX8-NEXT: v_cndmask_b32_e32 v32, v16, v0, vcc
-; GFX8-NEXT: v_mov_b32_e32 v31, 0x7fc00000
-; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v0, v16
-; GFX8-NEXT: v_cndmask_b32_e32 v32, v31, v32, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v0, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v32, v0, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v16, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v16, vcc
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v32
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v32, v0, vcc
-; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v1, v17
-; GFX8-NEXT: v_cndmask_b32_e32 v16, v17, v1, vcc
+; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; GFX8-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
+; GFX8-NEXT: s_mov_b64 exec, s[4:5]
+; GFX8-NEXT: v_cmp_o_f32_e64 s[16:17], v0, v16
+; GFX8-NEXT: v_min_f32_e32 v0, v0, v16
+; GFX8-NEXT: buffer_load_dword v16, off, s[0:3], s32
+; GFX8-NEXT: v_writelane_b32 v31, s30, 0
; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v1, v17
-; GFX8-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v1, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v16, v1, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v17, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v17, vcc
-; GFX8-NEXT: buffer_load_dword v17, off, s[0:3], s32
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v16, v1, vcc
-; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v2, v18
-; GFX8-NEXT: v_cndmask_b32_e32 v16, v18, v2, vcc
-; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v2, v18
-; GFX8-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v2, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v16, v2, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v18, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v18, vcc
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v16, v2, vcc
-; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v3, v19
-; GFX8-NEXT: v_cndmask_b32_e32 v16, v19, v3, vcc
-; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v3, v19
-; GFX8-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v3, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v3, v16, v3, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v19, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v3, v3, v19, vcc
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX8-NEXT: v_cndmask_b32_e32 v3, v16, v3, vcc
-; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v4, v20
-; GFX8-NEXT: v_cndmask_b32_e32 v16, v20, v4, vcc
-; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v4, v20
-; GFX8-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v4, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v4, v16, v4, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v20, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v4, v4, v20, vcc
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX8-NEXT: v_cndmask_b32_e32 v4, v16, v4, vcc
-; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v5, v21
-; GFX8-NEXT: v_cndmask_b32_e32 v16, v21, v5, vcc
-; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v5, v21
-; GFX8-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v5, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v5, v16, v5, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v21, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v5, v5, v21, vcc
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX8-NEXT: v_cndmask_b32_e32 v5, v16, v5, vcc
-; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v6, v22
-; GFX8-NEXT: v_cndmask_b32_e32 v16, v22, v6, vcc
-; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v6, v22
-; GFX8-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v6, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v6, v16, v6, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v22, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v6, v6, v22, vcc
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX8-NEXT: v_cndmask_b32_e32 v6, v16, v6, vcc
-; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v7, v23
-; GFX8-NEXT: v_cndmask_b32_e32 v16, v23, v7, vcc
-; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v7, v23
-; GFX8-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v7, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v7, v16, v7, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v23, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v7, v7, v23, vcc
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX8-NEXT: v_cndmask_b32_e32 v7, v16, v7, vcc
-; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v8, v24
-; GFX8-NEXT: v_cndmask_b32_e32 v16, v24, v8, vcc
-; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v8, v24
-; GFX8-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v8, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v8, v16, v8, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v24, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v8, v8, v24, vcc
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX8-NEXT: v_cndmask_b32_e32 v8, v16, v8, vcc
-; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v9, v25
-; GFX8-NEXT: v_cndmask_b32_e32 v16, v25, v9, vcc
-; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v9, v25
-; GFX8-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v9, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v9, v16, v9, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v25, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v9, v9, v25, vcc
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX8-NEXT: v_cndmask_b32_e32 v9, v16, v9, vcc
-; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v10, v26
-; GFX8-NEXT: v_cndmask_b32_e32 v16, v26, v10, vcc
-; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v10, v26
-; GFX8-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v10, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v10, v16, v10, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v26, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v10, v10, v26, vcc
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX8-NEXT: v_cndmask_b32_e32 v10, v16, v10, vcc
-; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v11, v27
-; GFX8-NEXT: v_cndmask_b32_e32 v16, v27, v11, vcc
-; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v11, v27
-; GFX8-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v11, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v11, v16, v11, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v27, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v11, v11, v27, vcc
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX8-NEXT: v_cndmask_b32_e32 v11, v16, v11, vcc
-; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v12, v28
-; GFX8-NEXT: v_cndmask_b32_e32 v16, v28, v12, vcc
-; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v12, v28
-; GFX8-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v12, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v12, v16, v12, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v28, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v12, v12, v28, vcc
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX8-NEXT: v_cndmask_b32_e32 v12, v16, v12, vcc
-; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v13, v29
-; GFX8-NEXT: v_cndmask_b32_e32 v16, v29, v13, vcc
-; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v13, v29
-; GFX8-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v13, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v13, v16, v13, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v29, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v13, v13, v29, vcc
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX8-NEXT: v_cndmask_b32_e32 v13, v16, v13, vcc
-; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v14, v30
-; GFX8-NEXT: v_cndmask_b32_e32 v16, v30, v14, vcc
-; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v14, v30
-; GFX8-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v14, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v14, v16, v14, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v30, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v14, v14, v30, vcc
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX8-NEXT: v_cndmask_b32_e32 v14, v16, v14, vcc
+; GFX8-NEXT: v_min_f32_e32 v1, v1, v17
+; GFX8-NEXT: v_cmp_o_f32_e64 s[4:5], v2, v18
+; GFX8-NEXT: v_min_f32_e32 v2, v2, v18
+; GFX8-NEXT: v_mov_b32_e32 v17, 0x7fc00000
+; GFX8-NEXT: v_min_f32_e32 v18, v13, v29
+; GFX8-NEXT: v_cmp_o_f32_e64 s[28:29], v13, v29
+; GFX8-NEXT: v_writelane_b32 v31, s31, 1
+; GFX8-NEXT: v_cmp_o_f32_e64 s[6:7], v3, v19
+; GFX8-NEXT: v_min_f32_e32 v3, v3, v19
+; GFX8-NEXT: v_cmp_o_f32_e64 s[8:9], v4, v20
+; GFX8-NEXT: v_min_f32_e32 v4, v4, v20
+; GFX8-NEXT: v_cmp_o_f32_e64 s[10:11], v5, v21
+; GFX8-NEXT: v_min_f32_e32 v5, v5, v21
+; GFX8-NEXT: v_cmp_o_f32_e64 s[12:13], v6, v22
+; GFX8-NEXT: v_min_f32_e32 v6, v6, v22
+; GFX8-NEXT: v_cmp_o_f32_e64 s[14:15], v7, v23
+; GFX8-NEXT: v_min_f32_e32 v7, v7, v23
+; GFX8-NEXT: v_cmp_o_f32_e64 s[18:19], v8, v24
+; GFX8-NEXT: v_min_f32_e32 v8, v8, v24
+; GFX8-NEXT: v_cmp_o_f32_e64 s[20:21], v9, v25
+; GFX8-NEXT: v_min_f32_e32 v9, v9, v25
+; GFX8-NEXT: v_cmp_o_f32_e64 s[22:23], v10, v26
+; GFX8-NEXT: v_min_f32_e32 v10, v10, v26
+; GFX8-NEXT: v_cmp_o_f32_e64 s[24:25], v11, v27
+; GFX8-NEXT: v_min_f32_e32 v11, v11, v27
+; GFX8-NEXT: v_cmp_o_f32_e64 s[26:27], v12, v28
+; GFX8-NEXT: v_min_f32_e32 v12, v12, v28
+; GFX8-NEXT: v_min_f32_e32 v19, v14, v30
+; GFX8-NEXT: v_cmp_o_f32_e64 s[30:31], v14, v30
+; GFX8-NEXT: v_cndmask_b32_e32 v1, v17, v1, vcc
+; GFX8-NEXT: v_cndmask_b32_e64 v13, v17, v18, s[28:29]
+; GFX8-NEXT: v_cndmask_b32_e64 v0, v17, v0, s[16:17]
+; GFX8-NEXT: v_cndmask_b32_e64 v2, v17, v2, s[4:5]
+; GFX8-NEXT: v_cndmask_b32_e64 v3, v17, v3, s[6:7]
+; GFX8-NEXT: v_cndmask_b32_e64 v4, v17, v4, s[8:9]
+; GFX8-NEXT: v_cndmask_b32_e64 v5, v17, v5, s[10:11]
+; GFX8-NEXT: v_cndmask_b32_e64 v6, v17, v6, s[12:13]
+; GFX8-NEXT: v_cndmask_b32_e64 v7, v17, v7, s[14:15]
+; GFX8-NEXT: v_cndmask_b32_e64 v8, v17, v8, s[18:19]
+; GFX8-NEXT: v_cndmask_b32_e64 v9, v17, v9, s[20:21]
+; GFX8-NEXT: v_cndmask_b32_e64 v10, v17, v10, s[22:23]
+; GFX8-NEXT: v_cndmask_b32_e64 v11, v17, v11, s[24:25]
+; GFX8-NEXT: v_cndmask_b32_e64 v12, v17, v12, s[26:27]
+; GFX8-NEXT: v_cndmask_b32_e64 v14, v17, v19, s[30:31]
+; GFX8-NEXT: v_readlane_b32 s31, v31, 1
+; GFX8-NEXT: v_readlane_b32 s30, v31, 0
+; GFX8-NEXT: s_waitcnt vmcnt(0)
+; GFX8-NEXT: v_min_f32_e32 v18, v15, v16
+; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v15, v16
+; GFX8-NEXT: v_cndmask_b32_e32 v15, v17, v18, vcc
+; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
+; GFX8-NEXT: s_mov_b64 exec, s[4:5]
; GFX8-NEXT: s_waitcnt vmcnt(0)
-; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v15, v17
-; GFX8-NEXT: v_cndmask_b32_e32 v16, v17, v15, vcc
-; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v15, v17
-; GFX8-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v15, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v15, v16, v15, vcc
-; GFX8-NEXT: v_cmp_class_f32_e64 vcc, v17, 32
-; GFX8-NEXT: v_cndmask_b32_e32 v15, v15, v17, vcc
-; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX8-NEXT: v_cndmask_b32_e32 v15, v16, v15, vcc
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_minimum_v16f32:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v0, v16
-; GFX9-NEXT: v_cndmask_b32_e32 v32, v16, v0, vcc
-; GFX9-NEXT: v_mov_b32_e32 v31, 0x7fc00000
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v16
-; GFX9-NEXT: v_cndmask_b32_e32 v32, v31, v32, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v0, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v32, v0, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v16, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v16, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v32
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v32, v0, vcc
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v1, v17
-; GFX9-NEXT: v_cndmask_b32_e32 v16, v17, v1, vcc
+; GFX9-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; GFX9-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
+; GFX9-NEXT: s_mov_b64 exec, s[4:5]
+; GFX9-NEXT: v_cmp_o_f32_e64 s[16:17], v0, v16
+; GFX9-NEXT: v_min_f32_e32 v0, v0, v16
+; GFX9-NEXT: buffer_load_dword v16, off, s[0:3], s32
+; GFX9-NEXT: v_writelane_b32 v31, s30, 0
; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v17
-; GFX9-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v1, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v16, v1, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v17, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v17, vcc
-; GFX9-NEXT: buffer_load_dword v17, off, s[0:3], s32
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v16, v1, vcc
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v2, v18
-; GFX9-NEXT: v_cndmask_b32_e32 v16, v18, v2, vcc
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v2, v18
-; GFX9-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v2, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v16, v2, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v18, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v18, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v16, v2, vcc
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v3, v19
-; GFX9-NEXT: v_cndmask_b32_e32 v16, v19, v3, vcc
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v3, v19
-; GFX9-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v3, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v16, v3, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v19, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v3, v19, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v16, v3, vcc
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v4, v20
-; GFX9-NEXT: v_cndmask_b32_e32 v16, v20, v4, vcc
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v4, v20
-; GFX9-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v4, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v16, v4, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v20, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v4, v20, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v16, v4, vcc
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v5, v21
-; GFX9-NEXT: v_cndmask_b32_e32 v16, v21, v5, vcc
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v5, v21
-; GFX9-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v5, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v5, v16, v5, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v21, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v5, v5, v21, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX9-NEXT: v_cndmask_b32_e32 v5, v16, v5, vcc
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v6, v22
-; GFX9-NEXT: v_cndmask_b32_e32 v16, v22, v6, vcc
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v6, v22
-; GFX9-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v6, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v6, v16, v6, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v22, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v6, v6, v22, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX9-NEXT: v_cndmask_b32_e32 v6, v16, v6, vcc
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v7, v23
-; GFX9-NEXT: v_cndmask_b32_e32 v16, v23, v7, vcc
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v7, v23
-; GFX9-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v7, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v7, v16, v7, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v23, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v7, v7, v23, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX9-NEXT: v_cndmask_b32_e32 v7, v16, v7, vcc
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v8, v24
-; GFX9-NEXT: v_cndmask_b32_e32 v16, v24, v8, vcc
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v8, v24
-; GFX9-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v8, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v8, v16, v8, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v24, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v8, v8, v24, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX9-NEXT: v_cndmask_b32_e32 v8, v16, v8, vcc
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v9, v25
-; GFX9-NEXT: v_cndmask_b32_e32 v16, v25, v9, vcc
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v9, v25
-; GFX9-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v9, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v9, v16, v9, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v25, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v9, v9, v25, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX9-NEXT: v_cndmask_b32_e32 v9, v16, v9, vcc
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v10, v26
-; GFX9-NEXT: v_cndmask_b32_e32 v16, v26, v10, vcc
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v10, v26
-; GFX9-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v10, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v10, v16, v10, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v26, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v10, v10, v26, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX9-NEXT: v_cndmask_b32_e32 v10, v16, v10, vcc
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v11, v27
-; GFX9-NEXT: v_cndmask_b32_e32 v16, v27, v11, vcc
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v11, v27
-; GFX9-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v11, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v11, v16, v11, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v27, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v11, v11, v27, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX9-NEXT: v_cndmask_b32_e32 v11, v16, v11, vcc
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v12, v28
-; GFX9-NEXT: v_cndmask_b32_e32 v16, v28, v12, vcc
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v12, v28
-; GFX9-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v12, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v12, v16, v12, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v28, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v12, v12, v28, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX9-NEXT: v_cndmask_b32_e32 v12, v16, v12, vcc
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v13, v29
-; GFX9-NEXT: v_cndmask_b32_e32 v16, v29, v13, vcc
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v13, v29
-; GFX9-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v13, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v13, v16, v13, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v29, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v13, v13, v29, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX9-NEXT: v_cndmask_b32_e32 v13, v16, v13, vcc
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v14, v30
-; GFX9-NEXT: v_cndmask_b32_e32 v16, v30, v14, vcc
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v14, v30
-; GFX9-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v14, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v14, v16, v14, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v30, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v14, v14, v30, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX9-NEXT: v_cndmask_b32_e32 v14, v16, v14, vcc
+; GFX9-NEXT: v_min_f32_e32 v1, v1, v17
+; GFX9-NEXT: v_cmp_o_f32_e64 s[4:5], v2, v18
+; GFX9-NEXT: v_min_f32_e32 v2, v2, v18
+; GFX9-NEXT: v_mov_b32_e32 v17, 0x7fc00000
+; GFX9-NEXT: v_min_f32_e32 v18, v13, v29
+; GFX9-NEXT: v_cmp_o_f32_e64 s[28:29], v13, v29
+; GFX9-NEXT: v_writelane_b32 v31, s31, 1
+; GFX9-NEXT: v_cmp_o_f32_e64 s[6:7], v3, v19
+; GFX9-NEXT: v_min_f32_e32 v3, v3, v19
+; GFX9-NEXT: v_cmp_o_f32_e64 s[8:9], v4, v20
+; GFX9-NEXT: v_min_f32_e32 v4, v4, v20
+; GFX9-NEXT: v_cmp_o_f32_e64 s[10:11], v5, v21
+; GFX9-NEXT: v_min_f32_e32 v5, v5, v21
+; GFX9-NEXT: v_cmp_o_f32_e64 s[12:13], v6, v22
+; GFX9-NEXT: v_min_f32_e32 v6, v6, v22
+; GFX9-NEXT: v_cmp_o_f32_e64 s[14:15], v7, v23
+; GFX9-NEXT: v_min_f32_e32 v7, v7, v23
+; GFX9-NEXT: v_cmp_o_f32_e64 s[18:19], v8, v24
+; GFX9-NEXT: v_min_f32_e32 v8, v8, v24
+; GFX9-NEXT: v_cmp_o_f32_e64 s[20:21], v9, v25
+; GFX9-NEXT: v_min_f32_e32 v9, v9, v25
+; GFX9-NEXT: v_cmp_o_f32_e64 s[22:23], v10, v26
+; GFX9-NEXT: v_min_f32_e32 v10, v10, v26
+; GFX9-NEXT: v_cmp_o_f32_e64 s[24:25], v11, v27
+; GFX9-NEXT: v_min_f32_e32 v11, v11, v27
+; GFX9-NEXT: v_cmp_o_f32_e64 s[26:27], v12, v28
+; GFX9-NEXT: v_min_f32_e32 v12, v12, v28
+; GFX9-NEXT: v_min_f32_e32 v19, v14, v30
+; GFX9-NEXT: v_cmp_o_f32_e64 s[30:31], v14, v30
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v17, v1, vcc
+; GFX9-NEXT: v_cndmask_b32_e64 v13, v17, v18, s[28:29]
+; GFX9-NEXT: v_cndmask_b32_e64 v0, v17, v0, s[16:17]
+; GFX9-NEXT: v_cndmask_b32_e64 v2, v17, v2, s[4:5]
+; GFX9-NEXT: v_cndmask_b32_e64 v3, v17, v3, s[6:7]
+; GFX9-NEXT: v_cndmask_b32_e64 v4, v17, v4, s[8:9]
+; GFX9-NEXT: v_cndmask_b32_e64 v5, v17, v5, s[10:11]
+; GFX9-NEXT: v_cndmask_b32_e64 v6, v17, v6, s[12:13]
+; GFX9-NEXT: v_cndmask_b32_e64 v7, v17, v7, s[14:15]
+; GFX9-NEXT: v_cndmask_b32_e64 v8, v17, v8, s[18:19]
+; GFX9-NEXT: v_cndmask_b32_e64 v9, v17, v9, s[20:21]
+; GFX9-NEXT: v_cndmask_b32_e64 v10, v17, v10, s[22:23]
+; GFX9-NEXT: v_cndmask_b32_e64 v11, v17, v11, s[24:25]
+; GFX9-NEXT: v_cndmask_b32_e64 v12, v17, v12, s[26:27]
+; GFX9-NEXT: v_cndmask_b32_e64 v14, v17, v19, s[30:31]
+; GFX9-NEXT: v_readlane_b32 s31, v31, 1
+; GFX9-NEXT: v_readlane_b32 s30, v31, 0
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: v_min_f32_e32 v18, v15, v16
+; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v15, v16
+; GFX9-NEXT: v_cndmask_b32_e32 v15, v17, v18, vcc
+; GFX9-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
+; GFX9-NEXT: s_mov_b64 exec, s[4:5]
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v15, v17
-; GFX9-NEXT: v_cndmask_b32_e32 v16, v17, v15, vcc
-; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v15, v17
-; GFX9-NEXT: v_cndmask_b32_e32 v16, v31, v16, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v15, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v15, v16, v15, vcc
-; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v17, 32
-; GFX9-NEXT: v_cndmask_b32_e32 v15, v15, v17, vcc
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX9-NEXT: v_cndmask_b32_e32 v15, v16, v15, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX940-LABEL: v_minimum_v16f32:
; GFX940: ; %bb.0:
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX940-NEXT: scratch_load_dword v31, off, s32
-; GFX940-NEXT: v_cmp_lt_f32_e32 vcc, v0, v16
; GFX940-NEXT: v_mov_b32_e32 v32, 0x7fc00000
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v33, v16, v0, vcc
+; GFX940-NEXT: v_min_f32_e32 v33, v0, v16
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v16
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v33, v32, v33, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v0, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v33, v0, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v16, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v0, v16, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v33
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v33, v0, vcc
-; GFX940-NEXT: v_cmp_lt_f32_e32 vcc, v1, v17
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v17, v1, vcc
+; GFX940-NEXT: v_min_f32_e32 v34, v1, v17
+; GFX940-NEXT: v_min_f32_e32 v35, v2, v18
+; GFX940-NEXT: v_cndmask_b32_e32 v0, v32, v33, vcc
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v17
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v32, v16, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v1, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v16, v1, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v17, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v1, v17, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v16, v1, vcc
-; GFX940-NEXT: v_cmp_lt_f32_e32 vcc, v2, v18
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v18, v2, vcc
+; GFX940-NEXT: v_min_f32_e32 v36, v3, v19
+; GFX940-NEXT: v_min_f32_e32 v37, v4, v20
+; GFX940-NEXT: v_cndmask_b32_e32 v1, v32, v34, vcc
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v2, v18
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v32, v16, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v2, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v16, v2, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v18, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v2, v18, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v16, v2, vcc
-; GFX940-NEXT: v_cmp_lt_f32_e32 vcc, v3, v19
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v19, v3, vcc
+; GFX940-NEXT: v_min_f32_e32 v38, v5, v21
+; GFX940-NEXT: v_min_f32_e32 v39, v6, v22
+; GFX940-NEXT: v_cndmask_b32_e32 v2, v32, v35, vcc
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v3, v19
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v32, v16, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v3, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v16, v3, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v19, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v3, v19, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v16, v3, vcc
-; GFX940-NEXT: v_cmp_lt_f32_e32 vcc, v4, v20
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v20, v4, vcc
+; GFX940-NEXT: v_min_f32_e32 v48, v7, v23
+; GFX940-NEXT: v_min_f32_e32 v49, v8, v24
+; GFX940-NEXT: v_cndmask_b32_e32 v3, v32, v36, vcc
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v4, v20
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v32, v16, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v4, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v16, v4, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v20, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v4, v20, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v16, v4, vcc
-; GFX940-NEXT: v_cmp_lt_f32_e32 vcc, v5, v21
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v21, v5, vcc
+; GFX940-NEXT: v_min_f32_e32 v50, v9, v25
+; GFX940-NEXT: v_min_f32_e32 v51, v10, v26
+; GFX940-NEXT: v_cndmask_b32_e32 v4, v32, v37, vcc
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v5, v21
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v32, v16, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v5, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v5, v16, v5, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v21, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v5, v5, v21, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v5, v16, v5, vcc
-; GFX940-NEXT: v_cmp_lt_f32_e32 vcc, v6, v22
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v22, v6, vcc
+; GFX940-NEXT: v_min_f32_e32 v52, v11, v27
+; GFX940-NEXT: v_min_f32_e32 v53, v12, v28
+; GFX940-NEXT: v_cndmask_b32_e32 v5, v32, v38, vcc
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v6, v22
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v32, v16, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v6, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v6, v16, v6, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v22, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v6, v6, v22, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v6, v16, v6, vcc
-; GFX940-NEXT: v_cmp_lt_f32_e32 vcc, v7, v23
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v23, v7, vcc
+; GFX940-NEXT: v_min_f32_e32 v54, v13, v29
+; GFX940-NEXT: v_min_f32_e32 v55, v14, v30
+; GFX940-NEXT: v_cndmask_b32_e32 v6, v32, v39, vcc
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v7, v23
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v32, v16, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v7, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v7, v16, v7, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v23, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v7, v7, v23, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v7, v16, v7, vcc
-; GFX940-NEXT: v_cmp_lt_f32_e32 vcc, v8, v24
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v24, v8, vcc
+; GFX940-NEXT: s_waitcnt vmcnt(0)
+; GFX940-NEXT: v_min_f32_e32 v16, v15, v31
+; GFX940-NEXT: v_cndmask_b32_e32 v7, v32, v48, vcc
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v8, v24
; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v32, v16, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v8, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v8, v16, v8, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v24, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v8, v8, v24, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v8, v16, v8, vcc
-; GFX940-NEXT: v_cmp_lt_f32_e32 vcc, v9, v25
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v25, v9, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v8, v32, v49, vcc
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v9, v25
; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v32, v16, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v9, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v9, v16, v9, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v25, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v9, v9, v25, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v9, v16, v9, vcc
-; GFX940-NEXT: v_cmp_lt_f32_e32 vcc, v10, v26
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v26, v10, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v9, v32, v50, vcc
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v10, v26
; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v32, v16, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v10, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v10, v16, v10, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v26, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v10, v10, v26, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v10, v16, v10, vcc
-; GFX940-NEXT: v_cmp_lt_f32_e32 vcc, v11, v27
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v27, v11, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v10, v32, v51, vcc
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v11, v27
; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v32, v16, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v11, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v11, v16, v11, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v27, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v11, v11, v27, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v11, v16, v11, vcc
-; GFX940-NEXT: v_cmp_lt_f32_e32 vcc, v12, v28
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v28, v12, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v11, v32, v52, vcc
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v12, v28
; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v32, v16, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v12, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v12, v16, v12, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v28, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v12, v12, v28, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v12, v16, v12, vcc
-; GFX940-NEXT: v_cmp_lt_f32_e32 vcc, v13, v29
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v29, v13, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v12, v32, v53, vcc
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v13, v29
; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v32, v16, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v13, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v13, v16, v13, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v29, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v13, v13, v29, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v13, v16, v13, vcc
-; GFX940-NEXT: v_cmp_lt_f32_e32 vcc, v14, v30
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v30, v14, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v13, v32, v54, vcc
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v14, v30
; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v32, v16, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v14, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v14, v16, v14, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v30, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v14, v14, v30, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v14, v16, v14, vcc
-; GFX940-NEXT: s_waitcnt vmcnt(0)
-; GFX940-NEXT: v_cmp_lt_f32_e32 vcc, v15, v31
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v31, v15, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v14, v32, v55, vcc
; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v15, v31
; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v32, v16, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v15, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v15, v16, v15, vcc
-; GFX940-NEXT: v_cmp_class_f32_e64 vcc, v31, 32
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v15, v15, v31, vcc
-; GFX940-NEXT: v_cmp_eq_f32_e32 vcc, 0, v16
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v15, v16, v15, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v15, v32, v16, vcc
; GFX940-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimum_v16f32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_cmp_lt_f32_e32 vcc_lo, v0, v16
; GFX10-NEXT: buffer_load_dword v31, off, s[0:3], s32
-; GFX10-NEXT: v_cndmask_b32_e32 v32, v16, v0, vcc_lo
-; GFX10-NEXT: v_cmp_lt_f32_e32 vcc_lo, v1, v17
-; GFX10-NEXT: v_cndmask_b32_e32 v33, v17, v1, vcc_lo
-; GFX10-NEXT: v_cmp_lt_f32_e32 vcc_lo, v2, v18
-; GFX10-NEXT: v_cndmask_b32_e32 v34, v18, v2, vcc_lo
-; GFX10-NEXT: v_cmp_lt_f32_e32 vcc_lo, v3, v19
-; GFX10-NEXT: v_cndmask_b32_e32 v35, v19, v3, vcc_lo
-; GFX10-NEXT: v_cmp_lt_f32_e32 vcc_lo, v4, v20
-; GFX10-NEXT: v_cndmask_b32_e32 v36, v20, v4, vcc_lo
-; GFX10-NEXT: v_cmp_lt_f32_e32 vcc_lo, v5, v21
-; GFX10-NEXT: v_cndmask_b32_e32 v37, v21, v5, vcc_lo
-; GFX10-NEXT: v_cmp_lt_f32_e32 vcc_lo, v6, v22
-; GFX10-NEXT: v_cndmask_b32_e32 v38, v22, v6, vcc_lo
-; GFX10-NEXT: v_cmp_lt_f32_e32 vcc_lo, v7, v23
-; GFX10-NEXT: v_cndmask_b32_e32 v39, v23, v7, vcc_lo
-; GFX10-NEXT: v_cmp_lt_f32_e32 vcc_lo, v8, v24
-; GFX10-NEXT: v_cndmask_b32_e32 v48, v24, v8, vcc_lo
-; GFX10-NEXT: v_cmp_lt_f32_e32 vcc_lo, v9, v25
-; GFX10-NEXT: v_cndmask_b32_e32 v49, v25, v9, vcc_lo
-; GFX10-NEXT: v_cmp_lt_f32_e32 vcc_lo, v10, v26
-; GFX10-NEXT: v_cndmask_b32_e32 v50, v26, v10, vcc_lo
-; GFX10-NEXT: v_cmp_lt_f32_e32 vcc_lo, v11, v27
-; GFX10-NEXT: v_cndmask_b32_e32 v51, v27, v11, vcc_lo
-; GFX10-NEXT: v_cmp_lt_f32_e32 vcc_lo, v12, v28
-; GFX10-NEXT: v_cndmask_b32_e32 v52, v28, v12, vcc_lo
-; GFX10-NEXT: v_cmp_lt_f32_e32 vcc_lo, v13, v29
-; GFX10-NEXT: v_cndmask_b32_e32 v53, v29, v13, vcc_lo
-; GFX10-NEXT: v_cmp_lt_f32_e32 vcc_lo, v14, v30
-; GFX10-NEXT: v_cndmask_b32_e32 v54, v30, v14, vcc_lo
+; GFX10-NEXT: v_min_f32_e32 v32, v0, v16
; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v0, v16
-; GFX10-NEXT: v_cndmask_b32_e32 v32, 0x7fc00000, v32, vcc_lo
+; GFX10-NEXT: v_min_f32_e32 v33, v1, v17
+; GFX10-NEXT: v_min_f32_e32 v34, v2, v18
+; GFX10-NEXT: v_min_f32_e32 v35, v3, v19
+; GFX10-NEXT: v_min_f32_e32 v36, v4, v20
+; GFX10-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v32, vcc_lo
; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v1, v17
-; GFX10-NEXT: v_cndmask_b32_e32 v33, 0x7fc00000, v33, vcc_lo
+; GFX10-NEXT: v_min_f32_e32 v37, v5, v21
+; GFX10-NEXT: v_min_f32_e32 v38, v6, v22
+; GFX10-NEXT: v_min_f32_e32 v39, v7, v23
+; GFX10-NEXT: v_min_f32_e32 v48, v8, v24
+; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x7fc00000, v33, vcc_lo
; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v2, v18
-; GFX10-NEXT: v_cndmask_b32_e32 v34, 0x7fc00000, v34, vcc_lo
+; GFX10-NEXT: v_min_f32_e32 v49, v9, v25
+; GFX10-NEXT: v_min_f32_e32 v50, v10, v26
+; GFX10-NEXT: v_min_f32_e32 v51, v11, v27
+; GFX10-NEXT: v_min_f32_e32 v52, v12, v28
+; GFX10-NEXT: v_cndmask_b32_e32 v2, 0x7fc00000, v34, vcc_lo
; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v3, v19
-; GFX10-NEXT: v_cndmask_b32_e32 v35, 0x7fc00000, v35, vcc_lo
+; GFX10-NEXT: v_min_f32_e32 v53, v13, v29
+; GFX10-NEXT: v_min_f32_e32 v54, v14, v30
+; GFX10-NEXT: v_cndmask_b32_e32 v3, 0x7fc00000, v35, vcc_lo
; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v4, v20
-; GFX10-NEXT: v_cndmask_b32_e32 v36, 0x7fc00000, v36, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e32 v4, 0x7fc00000, v36, vcc_lo
; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v5, v21
-; GFX10-NEXT: v_cndmask_b32_e32 v37, 0x7fc00000, v37, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e32 v5, 0x7fc00000, v37, vcc_lo
; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v6, v22
-; GFX10-NEXT: v_cndmask_b32_e32 v38, 0x7fc00000, v38, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e32 v6, 0x7fc00000, v38, vcc_lo
; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v7, v23
-; GFX10-NEXT: v_cndmask_b32_e32 v39, 0x7fc00000, v39, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e32 v7, 0x7fc00000, v39, vcc_lo
; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v8, v24
-; GFX10-NEXT: v_cndmask_b32_e32 v48, 0x7fc00000, v48, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e32 v8, 0x7fc00000, v48, vcc_lo
; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v9, v25
-; GFX10-NEXT: v_cndmask_b32_e32 v49, 0x7fc00000, v49, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e32 v9, 0x7fc00000, v49, vcc_lo
; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v10, v26
-; GFX10-NEXT: v_cndmask_b32_e32 v50, 0x7fc00000, v50, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e32 v10, 0x7fc00000, v50, vcc_lo
; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v11, v27
-; GFX10-NEXT: v_cndmask_b32_e32 v51, 0x7fc00000, v51, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e32 v11, 0x7fc00000, v51, vcc_lo
; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v12, v28
-; GFX10-NEXT: v_cndmask_b32_e32 v52, 0x7fc00000, v52, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e32 v12, 0x7fc00000, v52, vcc_lo
; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v13, v29
-; GFX10-NEXT: v_cndmask_b32_e32 v53, 0x7fc00000, v53, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e32 v13, 0x7fc00000, v53, vcc_lo
; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v14, v30
-; GFX10-NEXT: v_cndmask_b32_e32 v54, 0x7fc00000, v54, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v0, 32
-; GFX10-NEXT: v_cndmask_b32_e32 v0, v32, v0, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v1, 32
-; GFX10-NEXT: v_cndmask_b32_e32 v1, v33, v1, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v2, 32
-; GFX10-NEXT: v_cndmask_b32_e32 v2, v34, v2, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v3, 32
-; GFX10-NEXT: v_cndmask_b32_e32 v3, v35, v3, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v4, 32
-; GFX10-NEXT: v_cndmask_b32_e32 v4, v36, v4, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v5, 32
-; GFX10-NEXT: v_cndmask_b32_e32 v5, v37, v5, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v6, 32
-; GFX10-NEXT: v_cndmask_b32_e32 v6, v38, v6, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v7, 32
-; GFX10-NEXT: v_cndmask_b32_e32 v7, v39, v7, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v8, 32
-; GFX10-NEXT: v_cndmask_b32_e32 v8, v48, v8, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v9, 32
-; GFX10-NEXT: v_cndmask_b32_e32 v9, v49, v9, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v10, 32
-; GFX10-NEXT: v_cndmask_b32_e32 v10, v50, v10, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v11, 32
-; GFX10-NEXT: v_cndmask_b32_e32 v11, v51, v11, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v12, 32
-; GFX10-NEXT: v_cndmask_b32_e32 v12, v52, v12, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v13, 32
-; GFX10-NEXT: v_cndmask_b32_e32 v13, v53, v13, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v14, 32
-; GFX10-NEXT: v_cndmask_b32_e32 v14, v54, v14, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v16, 32
-; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v16, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v17, 32
-; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v17, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v18, 32
-; GFX10-NEXT: v_cndmask_b32_e32 v2, v2, v18, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v19, 32
-; GFX10-NEXT: v_cndmask_b32_e32 v3, v3, v19, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v20, 32
-; GFX10-NEXT: v_cndmask_b32_e32 v4, v4, v20, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v21, 32
-; GFX10-NEXT: v_cndmask_b32_e32 v5, v5, v21, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v22, 32
-; GFX10-NEXT: v_cndmask_b32_e32 v6, v6, v22, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v23, 32
-; GFX10-NEXT: v_cndmask_b32_e32 v7, v7, v23, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v24, 32
-; GFX10-NEXT: v_cndmask_b32_e32 v8, v8, v24, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v25, 32
-; GFX10-NEXT: v_cndmask_b32_e32 v9, v9, v25, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v26, 32
-; GFX10-NEXT: v_cndmask_b32_e32 v10, v10, v26, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v27, 32
-; GFX10-NEXT: v_cndmask_b32_e32 v11, v11, v27, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v28, 32
-; GFX10-NEXT: v_cndmask_b32_e32 v12, v12, v28, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v29, 32
-; GFX10-NEXT: v_cndmask_b32_e32 v13, v13, v29, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v30, 32
-; GFX10-NEXT: v_cndmask_b32_e32 v14, v14, v30, vcc_lo
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v32
-; GFX10-NEXT: v_cndmask_b32_e32 v0, v32, v0, vcc_lo
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v33
-; GFX10-NEXT: v_cndmask_b32_e32 v1, v33, v1, vcc_lo
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v34
-; GFX10-NEXT: v_cndmask_b32_e32 v2, v34, v2, vcc_lo
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v35
-; GFX10-NEXT: v_cndmask_b32_e32 v3, v35, v3, vcc_lo
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v36
-; GFX10-NEXT: v_cndmask_b32_e32 v4, v36, v4, vcc_lo
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v37
-; GFX10-NEXT: v_cndmask_b32_e32 v5, v37, v5, vcc_lo
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v38
-; GFX10-NEXT: v_cndmask_b32_e32 v6, v38, v6, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e32 v14, 0x7fc00000, v54, vcc_lo
; GFX10-NEXT: s_waitcnt vmcnt(0)
-; GFX10-NEXT: v_cmp_lt_f32_e32 vcc_lo, v15, v31
-; GFX10-NEXT: v_cndmask_b32_e32 v16, v31, v15, vcc_lo
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v39
-; GFX10-NEXT: v_cndmask_b32_e32 v7, v39, v7, vcc_lo
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v48
-; GFX10-NEXT: v_cndmask_b32_e32 v8, v48, v8, vcc_lo
+; GFX10-NEXT: v_min_f32_e32 v16, v15, v31
; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v15, v31
-; GFX10-NEXT: v_cndmask_b32_e32 v16, 0x7fc00000, v16, vcc_lo
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v49
-; GFX10-NEXT: v_cndmask_b32_e32 v9, v49, v9, vcc_lo
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v50
-; GFX10-NEXT: v_cndmask_b32_e32 v10, v50, v10, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v15, 32
-; GFX10-NEXT: v_cndmask_b32_e32 v15, v16, v15, vcc_lo
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v51
-; GFX10-NEXT: v_cndmask_b32_e32 v11, v51, v11, vcc_lo
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v52
-; GFX10-NEXT: v_cndmask_b32_e32 v12, v52, v12, vcc_lo
-; GFX10-NEXT: v_cmp_class_f32_e64 vcc_lo, v31, 32
-; GFX10-NEXT: v_cndmask_b32_e32 v15, v15, v31, vcc_lo
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v53
-; GFX10-NEXT: v_cndmask_b32_e32 v13, v53, v13, vcc_lo
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v54
-; GFX10-NEXT: v_cndmask_b32_e32 v14, v54, v14, vcc_lo
-; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v16
-; GFX10-NEXT: v_cndmask_b32_e32 v15, v16, v15, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e32 v15, 0x7fc00000, v16, vcc_lo
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_minimum_v16f32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_cmp_lt_f32_e32 vcc_lo, v0, v16
; GFX11-NEXT: scratch_load_b32 v31, off, s32
-; GFX11-NEXT: v_cndmask_b32_e32 v32, v16, v0, vcc_lo
-; GFX11-NEXT: v_cmp_lt_f32_e32 vcc_lo, v1, v17
-; GFX11-NEXT: v_cndmask_b32_e32 v33, v17, v1, vcc_lo
-; GFX11-NEXT: v_cmp_lt_f32_e32 vcc_lo, v2, v18
-; GFX11-NEXT: v_cndmask_b32_e32 v34, v18, v2, vcc_lo
-; GFX11-NEXT: v_cmp_lt_f32_e32 vcc_lo, v3, v19
-; GFX11-NEXT: v_cndmask_b32_e32 v35, v19, v3, vcc_lo
-; GFX11-NEXT: v_cmp_lt_f32_e32 vcc_lo, v4, v20
-; GFX11-NEXT: v_cndmask_b32_e32 v36, v20, v4, vcc_lo
-; GFX11-NEXT: v_cmp_lt_f32_e32 vcc_lo, v5, v21
-; GFX11-NEXT: v_cndmask_b32_e32 v37, v21, v5, vcc_lo
-; GFX11-NEXT: v_cmp_lt_f32_e32 vcc_lo, v6, v22
-; GFX11-NEXT: v_cndmask_b32_e32 v38, v22, v6, vcc_lo
-; GFX11-NEXT: v_cmp_lt_f32_e32 vcc_lo, v7, v23
-; GFX11-NEXT: v_cndmask_b32_e32 v39, v23, v7, vcc_lo
-; GFX11-NEXT: v_cmp_lt_f32_e32 vcc_lo, v8, v24
-; GFX11-NEXT: v_cndmask_b32_e32 v48, v24, v8, vcc_lo
-; GFX11-NEXT: v_cmp_lt_f32_e32 vcc_lo, v9, v25
-; GFX11-NEXT: v_cndmask_b32_e32 v49, v25, v9, vcc_lo
-; GFX11-NEXT: v_cmp_lt_f32_e32 vcc_lo, v10, v26
-; GFX11-NEXT: v_cndmask_b32_e32 v50, v26, v10, vcc_lo
-; GFX11-NEXT: v_cmp_lt_f32_e32 vcc_lo, v11, v27
-; GFX11-NEXT: v_cndmask_b32_e32 v51, v27, v11, vcc_lo
-; GFX11-NEXT: v_cmp_lt_f32_e32 vcc_lo, v12, v28
-; GFX11-NEXT: v_cndmask_b32_e32 v52, v28, v12, vcc_lo
-; GFX11-NEXT: v_cmp_lt_f32_e32 vcc_lo, v13, v29
-; GFX11-NEXT: v_cndmask_b32_e32 v53, v29, v13, vcc_lo
-; GFX11-NEXT: v_cmp_lt_f32_e32 vcc_lo, v14, v30
-; GFX11-NEXT: v_cndmask_b32_e32 v54, v30, v14, vcc_lo
+; GFX11-NEXT: v_dual_min_f32 v32, v0, v16 :: v_dual_min_f32 v33, v1, v17
; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v0, v16
-; GFX11-NEXT: v_cndmask_b32_e32 v32, 0x7fc00000, v32, vcc_lo
+; GFX11-NEXT: v_dual_min_f32 v34, v2, v18 :: v_dual_min_f32 v35, v3, v19
+; GFX11-NEXT: v_dual_min_f32 v36, v4, v20 :: v_dual_min_f32 v37, v5, v21
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4)
+; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v32, vcc_lo
; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v1, v17
-; GFX11-NEXT: v_cndmask_b32_e32 v33, 0x7fc00000, v33, vcc_lo
+; GFX11-NEXT: v_min_f32_e32 v54, v14, v30
+; GFX11-NEXT: v_dual_min_f32 v38, v6, v22 :: v_dual_min_f32 v39, v7, v23
+; GFX11-NEXT: v_dual_min_f32 v48, v8, v24 :: v_dual_min_f32 v49, v9, v25
+; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7fc00000, v33, vcc_lo
; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v2, v18
-; GFX11-NEXT: v_cndmask_b32_e32 v34, 0x7fc00000, v34, vcc_lo
+; GFX11-NEXT: v_dual_min_f32 v50, v10, v26 :: v_dual_min_f32 v51, v11, v27
+; GFX11-NEXT: v_dual_min_f32 v52, v12, v28 :: v_dual_min_f32 v53, v13, v29
+; GFX11-NEXT: v_cndmask_b32_e32 v2, 0x7fc00000, v34, vcc_lo
; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v3, v19
-; GFX11-NEXT: v_cndmask_b32_e32 v35, 0x7fc00000, v35, vcc_lo
+; GFX11-NEXT: v_cndmask_b32_e32 v3, 0x7fc00000, v35, vcc_lo
; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v4, v20
-; GFX11-NEXT: v_cndmask_b32_e32 v36, 0x7fc00000, v36, vcc_lo
+; GFX11-NEXT: v_cndmask_b32_e32 v4, 0x7fc00000, v36, vcc_lo
; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v5, v21
-; GFX11-NEXT: v_cndmask_b32_e32 v37, 0x7fc00000, v37, vcc_lo
+; GFX11-NEXT: v_cndmask_b32_e32 v5, 0x7fc00000, v37, vcc_lo
; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v6, v22
-; GFX11-NEXT: v_cndmask_b32_e32 v38, 0x7fc00000, v38, vcc_lo
+; GFX11-NEXT: v_cndmask_b32_e32 v6, 0x7fc00000, v38, vcc_lo
; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v7, v23
-; GFX11-NEXT: v_cndmask_b32_e32 v39, 0x7fc00000, v39, vcc_lo
+; GFX11-NEXT: v_cndmask_b32_e32 v7, 0x7fc00000, v39, vcc_lo
; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v8, v24
-; GFX11-NEXT: v_cndmask_b32_e32 v48, 0x7fc00000, v48, vcc_lo
+; GFX11-NEXT: v_cndmask_b32_e32 v8, 0x7fc00000, v48, vcc_lo
; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v9, v25
-; GFX11-NEXT: v_cndmask_b32_e32 v49, 0x7fc00000, v49, vcc_lo
+; GFX11-NEXT: v_cndmask_b32_e32 v9, 0x7fc00000, v49, vcc_lo
; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v10, v26
-; GFX11-NEXT: v_cndmask_b32_e32 v50, 0x7fc00000, v50, vcc_lo
+; GFX11-NEXT: v_cndmask_b32_e32 v10, 0x7fc00000, v50, vcc_lo
; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v11, v27
-; GFX11-NEXT: v_cndmask_b32_e32 v51, 0x7fc00000, v51, vcc_lo
+; GFX11-NEXT: v_cndmask_b32_e32 v11, 0x7fc00000, v51, vcc_lo
; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v12, v28
-; GFX11-NEXT: v_cndmask_b32_e32 v52, 0x7fc00000, v52, vcc_lo
+; GFX11-NEXT: v_cndmask_b32_e32 v12, 0x7fc00000, v52, vcc_lo
; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v13, v29
-; GFX11-NEXT: v_cndmask_b32_e32 v53, 0x7fc00000, v53, vcc_lo
+; GFX11-NEXT: v_cndmask_b32_e32 v13, 0x7fc00000, v53, vcc_lo
; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v14, v30
-; GFX11-NEXT: v_cndmask_b32_e32 v54, 0x7fc00000, v54, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v0, 32
-; GFX11-NEXT: v_cndmask_b32_e32 v0, v32, v0, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v1, 32
-; GFX11-NEXT: v_cndmask_b32_e32 v1, v33, v1, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v2, 32
-; GFX11-NEXT: v_cndmask_b32_e32 v2, v34, v2, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v3, 32
-; GFX11-NEXT: v_cndmask_b32_e32 v3, v35, v3, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v4, 32
-; GFX11-NEXT: v_cndmask_b32_e32 v4, v36, v4, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v5, 32
-; GFX11-NEXT: v_cndmask_b32_e32 v5, v37, v5, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v6, 32
-; GFX11-NEXT: v_cndmask_b32_e32 v6, v38, v6, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v7, 32
-; GFX11-NEXT: v_cndmask_b32_e32 v7, v39, v7, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v8, 32
-; GFX11-NEXT: v_cndmask_b32_e32 v8, v48, v8, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v9, 32
-; GFX11-NEXT: v_cndmask_b32_e32 v9, v49, v9, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v10, 32
-; GFX11-NEXT: v_cndmask_b32_e32 v10, v50, v10, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v11, 32
-; GFX11-NEXT: v_cndmask_b32_e32 v11, v51, v11, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v12, 32
-; GFX11-NEXT: v_cndmask_b32_e32 v12, v52, v12, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v13, 32
-; GFX11-NEXT: v_cndmask_b32_e32 v13, v53, v13, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v14, 32
-; GFX11-NEXT: v_cndmask_b32_e32 v14, v54, v14, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v16, 32
-; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v16, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v17, 32
-; GFX11-NEXT: v_cndmask_b32_e32 v1, v1, v17, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v18, 32
-; GFX11-NEXT: v_cndmask_b32_e32 v2, v2, v18, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v19, 32
-; GFX11-NEXT: v_cndmask_b32_e32 v3, v3, v19, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v20, 32
-; GFX11-NEXT: v_cndmask_b32_e32 v4, v4, v20, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v21, 32
-; GFX11-NEXT: v_cndmask_b32_e32 v5, v5, v21, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v22, 32
-; GFX11-NEXT: v_cndmask_b32_e32 v6, v6, v22, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v23, 32
-; GFX11-NEXT: v_cndmask_b32_e32 v7, v7, v23, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v24, 32
-; GFX11-NEXT: v_cndmask_b32_e32 v8, v8, v24, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v25, 32
-; GFX11-NEXT: v_cndmask_b32_e32 v9, v9, v25, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v26, 32
-; GFX11-NEXT: v_cndmask_b32_e32 v10, v10, v26, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v27, 32
-; GFX11-NEXT: v_cndmask_b32_e32 v11, v11, v27, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v28, 32
-; GFX11-NEXT: v_cndmask_b32_e32 v12, v12, v28, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v29, 32
-; GFX11-NEXT: v_cndmask_b32_e32 v13, v13, v29, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v30, 32
-; GFX11-NEXT: v_cndmask_b32_e32 v14, v14, v30, vcc_lo
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v32
-; GFX11-NEXT: v_cndmask_b32_e32 v0, v32, v0, vcc_lo
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v33
-; GFX11-NEXT: v_cndmask_b32_e32 v1, v33, v1, vcc_lo
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v34
-; GFX11-NEXT: v_cndmask_b32_e32 v2, v34, v2, vcc_lo
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v35
-; GFX11-NEXT: v_cndmask_b32_e32 v3, v35, v3, vcc_lo
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v36
-; GFX11-NEXT: v_cndmask_b32_e32 v4, v36, v4, vcc_lo
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v37
-; GFX11-NEXT: v_cndmask_b32_e32 v5, v37, v5, vcc_lo
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v38
-; GFX11-NEXT: v_cndmask_b32_e32 v6, v38, v6, vcc_lo
+; GFX11-NEXT: v_cndmask_b32_e32 v14, 0x7fc00000, v54, vcc_lo
; GFX11-NEXT: s_waitcnt vmcnt(0)
-; GFX11-NEXT: v_cmp_lt_f32_e32 vcc_lo, v15, v31
-; GFX11-NEXT: v_cndmask_b32_e32 v16, v31, v15, vcc_lo
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v39
-; GFX11-NEXT: v_cndmask_b32_e32 v7, v39, v7, vcc_lo
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v48
-; GFX11-NEXT: v_cndmask_b32_e32 v8, v48, v8, vcc_lo
+; GFX11-NEXT: v_min_f32_e32 v16, v15, v31
; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v15, v31
-; GFX11-NEXT: v_cndmask_b32_e32 v16, 0x7fc00000, v16, vcc_lo
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v49
-; GFX11-NEXT: v_cndmask_b32_e32 v9, v49, v9, vcc_lo
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v50
-; GFX11-NEXT: v_cndmask_b32_e32 v10, v50, v10, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v15, 32
-; GFX11-NEXT: v_cndmask_b32_e32 v15, v16, v15, vcc_lo
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v51
-; GFX11-NEXT: v_cndmask_b32_e32 v11, v51, v11, vcc_lo
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v52
-; GFX11-NEXT: v_cndmask_b32_e32 v12, v52, v12, vcc_lo
-; GFX11-NEXT: v_cmp_class_f32_e64 vcc_lo, v31, 32
-; GFX11-NEXT: v_cndmask_b32_e32 v15, v15, v31, vcc_lo
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v53
-; GFX11-NEXT: v_cndmask_b32_e32 v13, v53, v13, vcc_lo
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v54
-; GFX11-NEXT: v_cndmask_b32_e32 v14, v54, v14, vcc_lo
-; GFX11-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v16
-; GFX11-NEXT: v_cndmask_b32_e32 v15, v16, v15, vcc_lo
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX11-NEXT: v_cndmask_b32_e32 v15, 0x7fc00000, v16, vcc_lo
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_minimum_v16f32:
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.minimum.f64.ll b/llvm/test/CodeGen/AMDGPU/llvm.minimum.f64.ll
index 7013c60bada5..37fe2e958e62 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.minimum.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.minimum.f64.ll
@@ -530,221 +530,86 @@ define <2 x double> @v_minimum_v2f64(<2 x double> %src0, <2 x double> %src1) {
; GFX7-LABEL: v_minimum_v2f64:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[4:5]
-; GFX7-NEXT: v_cmp_o_f64_e64 s[4:5], v[0:1], v[4:5]
-; GFX7-NEXT: v_mov_b32_e32 v10, 0x7ff80000
-; GFX7-NEXT: v_cmp_class_f64_e64 s[6:7], v[0:1], 32
-; GFX7-NEXT: v_cmp_class_f64_e64 s[8:9], v[4:5], 32
-; GFX7-NEXT: v_cndmask_b32_e32 v8, v5, v1, vcc
-; GFX7-NEXT: v_cndmask_b32_e64 v9, v10, v8, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e32 v8, v4, v0, vcc
-; GFX7-NEXT: v_cmp_lt_f64_e32 vcc, v[2:3], v[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v8, 0, v8, s[4:5]
-; GFX7-NEXT: v_cmp_o_f64_e64 s[4:5], v[2:3], v[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v0, v8, v0, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v0, v0, v4, s[8:9]
-; GFX7-NEXT: v_cmp_eq_f64_e64 s[10:11], 0, v[8:9]
-; GFX7-NEXT: v_cndmask_b32_e64 v1, v9, v1, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v1, v1, v5, s[8:9]
-; GFX7-NEXT: v_cndmask_b32_e32 v11, v7, v3, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v4, v6, v2, vcc
-; GFX7-NEXT: v_cmp_class_f64_e64 vcc, v[2:3], 32
-; GFX7-NEXT: v_cndmask_b32_e64 v11, v10, v11, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v10, 0, v4, s[4:5]
-; GFX7-NEXT: v_cmp_class_f64_e64 s[4:5], v[6:7], 32
-; GFX7-NEXT: v_cmp_eq_f64_e64 s[12:13], 0, v[10:11]
-; GFX7-NEXT: v_cndmask_b32_e64 v0, v8, v0, s[10:11]
-; GFX7-NEXT: v_cndmask_b32_e64 v1, v9, v1, s[10:11]
-; GFX7-NEXT: v_cndmask_b32_e32 v2, v10, v2, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v3, v11, v3, vcc
-; GFX7-NEXT: v_cndmask_b32_e64 v2, v2, v6, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v3, v3, v7, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v2, v10, v2, s[12:13]
-; GFX7-NEXT: v_cndmask_b32_e64 v3, v11, v3, s[12:13]
+; GFX7-NEXT: v_min_f64 v[8:9], v[0:1], v[4:5]
+; GFX7-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
+; GFX7-NEXT: v_min_f64 v[4:5], v[2:3], v[6:7]
+; GFX7-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[6:7]
+; GFX7-NEXT: v_mov_b32_e32 v3, 0x7ff80000
+; GFX7-NEXT: v_cndmask_b32_e64 v0, v8, 0, vcc
+; GFX7-NEXT: v_cndmask_b32_e32 v1, v9, v3, vcc
+; GFX7-NEXT: v_cndmask_b32_e64 v2, v4, 0, s[4:5]
+; GFX7-NEXT: v_cndmask_b32_e64 v3, v5, v3, s[4:5]
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_minimum_v2f64:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[4:5]
-; GFX8-NEXT: v_cmp_o_f64_e64 s[4:5], v[0:1], v[4:5]
-; GFX8-NEXT: v_mov_b32_e32 v10, 0x7ff80000
-; GFX8-NEXT: v_cmp_class_f64_e64 s[6:7], v[0:1], 32
-; GFX8-NEXT: v_cmp_class_f64_e64 s[8:9], v[4:5], 32
-; GFX8-NEXT: v_cndmask_b32_e32 v8, v5, v1, vcc
-; GFX8-NEXT: v_cndmask_b32_e64 v9, v10, v8, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e32 v8, v4, v0, vcc
-; GFX8-NEXT: v_cmp_lt_f64_e32 vcc, v[2:3], v[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v8, 0, v8, s[4:5]
-; GFX8-NEXT: v_cmp_o_f64_e64 s[4:5], v[2:3], v[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v0, v8, v0, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v0, v0, v4, s[8:9]
-; GFX8-NEXT: v_cmp_eq_f64_e64 s[10:11], 0, v[8:9]
-; GFX8-NEXT: v_cndmask_b32_e64 v1, v9, v1, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v1, v1, v5, s[8:9]
-; GFX8-NEXT: v_cndmask_b32_e32 v11, v7, v3, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v4, v6, v2, vcc
-; GFX8-NEXT: v_cmp_class_f64_e64 vcc, v[2:3], 32
-; GFX8-NEXT: v_cndmask_b32_e64 v11, v10, v11, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v10, 0, v4, s[4:5]
-; GFX8-NEXT: v_cmp_class_f64_e64 s[4:5], v[6:7], 32
-; GFX8-NEXT: v_cmp_eq_f64_e64 s[12:13], 0, v[10:11]
-; GFX8-NEXT: v_cndmask_b32_e64 v0, v8, v0, s[10:11]
-; GFX8-NEXT: v_cndmask_b32_e64 v1, v9, v1, s[10:11]
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v10, v2, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v3, v11, v3, vcc
-; GFX8-NEXT: v_cndmask_b32_e64 v2, v2, v6, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v3, v3, v7, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v2, v10, v2, s[12:13]
-; GFX8-NEXT: v_cndmask_b32_e64 v3, v11, v3, s[12:13]
+; GFX8-NEXT: v_min_f64 v[8:9], v[0:1], v[4:5]
+; GFX8-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
+; GFX8-NEXT: v_min_f64 v[4:5], v[2:3], v[6:7]
+; GFX8-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[6:7]
+; GFX8-NEXT: v_mov_b32_e32 v3, 0x7ff80000
+; GFX8-NEXT: v_cndmask_b32_e64 v0, v8, 0, vcc
+; GFX8-NEXT: v_cndmask_b32_e32 v1, v9, v3, vcc
+; GFX8-NEXT: v_cndmask_b32_e64 v2, v4, 0, s[4:5]
+; GFX8-NEXT: v_cndmask_b32_e64 v3, v5, v3, s[4:5]
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_minimum_v2f64:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[4:5]
-; GFX9-NEXT: v_cmp_o_f64_e64 s[4:5], v[0:1], v[4:5]
-; GFX9-NEXT: v_mov_b32_e32 v10, 0x7ff80000
-; GFX9-NEXT: v_cmp_class_f64_e64 s[6:7], v[0:1], 32
-; GFX9-NEXT: v_cmp_class_f64_e64 s[8:9], v[4:5], 32
-; GFX9-NEXT: v_cndmask_b32_e32 v8, v5, v1, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v9, v10, v8, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e32 v8, v4, v0, vcc
-; GFX9-NEXT: v_cmp_lt_f64_e32 vcc, v[2:3], v[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v8, 0, v8, s[4:5]
-; GFX9-NEXT: v_cmp_o_f64_e64 s[4:5], v[2:3], v[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v8, v0, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v0, v4, s[8:9]
-; GFX9-NEXT: v_cmp_eq_f64_e64 s[10:11], 0, v[8:9]
-; GFX9-NEXT: v_cndmask_b32_e64 v1, v9, v1, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v1, v1, v5, s[8:9]
-; GFX9-NEXT: v_cndmask_b32_e32 v11, v7, v3, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v6, v2, vcc
-; GFX9-NEXT: v_cmp_class_f64_e64 vcc, v[2:3], 32
-; GFX9-NEXT: v_cndmask_b32_e64 v11, v10, v11, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v10, 0, v4, s[4:5]
-; GFX9-NEXT: v_cmp_class_f64_e64 s[4:5], v[6:7], 32
-; GFX9-NEXT: v_cmp_eq_f64_e64 s[12:13], 0, v[10:11]
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v8, v0, s[10:11]
-; GFX9-NEXT: v_cndmask_b32_e64 v1, v9, v1, s[10:11]
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v10, v2, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v11, v3, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v2, v2, v6, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v3, v3, v7, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v2, v10, v2, s[12:13]
-; GFX9-NEXT: v_cndmask_b32_e64 v3, v11, v3, s[12:13]
+; GFX9-NEXT: v_min_f64 v[8:9], v[0:1], v[4:5]
+; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
+; GFX9-NEXT: v_min_f64 v[4:5], v[2:3], v[6:7]
+; GFX9-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[6:7]
+; GFX9-NEXT: v_mov_b32_e32 v3, 0x7ff80000
+; GFX9-NEXT: v_cndmask_b32_e64 v0, v8, 0, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v9, v3, vcc
+; GFX9-NEXT: v_cndmask_b32_e64 v2, v4, 0, s[4:5]
+; GFX9-NEXT: v_cndmask_b32_e64 v3, v5, v3, s[4:5]
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX940-LABEL: v_minimum_v2f64:
; GFX940: ; %bb.0:
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[4:5]
-; GFX940-NEXT: v_mov_b32_e32 v10, 0x7ff80000
-; GFX940-NEXT: v_cmp_o_f64_e64 s[0:1], v[0:1], v[4:5]
-; GFX940-NEXT: v_cndmask_b32_e32 v8, v5, v1, vcc
+; GFX940-NEXT: v_min_f64 v[8:9], v[0:1], v[4:5]
+; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
+; GFX940-NEXT: v_min_f64 v[4:5], v[2:3], v[6:7]
; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e64 v9, v10, v8, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v8, v4, v0, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v8, 0, v8, s[0:1]
-; GFX940-NEXT: v_cmp_class_f64_e64 vcc, v[0:1], 32
-; GFX940-NEXT: v_cmp_class_f64_e64 s[0:1], v[4:5], 32
-; GFX940-NEXT: v_cmp_eq_f64_e64 s[2:3], 0, v[8:9]
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v9, v1, vcc
-; GFX940-NEXT: v_cmp_lt_f64_e32 vcc, v[2:3], v[6:7]
-; GFX940-NEXT: v_cndmask_b32_e64 v0, v0, v4, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v1, v1, v5, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v7, v3, vcc
-; GFX940-NEXT: v_cmp_o_f64_e64 s[0:1], v[2:3], v[6:7]
-; GFX940-NEXT: v_cndmask_b32_e64 v0, v8, v0, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v1, v9, v1, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v5, v10, v4, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v6, v2, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v4, 0, v4, s[0:1]
-; GFX940-NEXT: v_cmp_class_f64_e64 vcc, v[2:3], 32
-; GFX940-NEXT: v_cmp_class_f64_e64 s[0:1], v[6:7], 32
-; GFX940-NEXT: v_cmp_eq_f64_e64 s[2:3], 0, v[4:5]
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v5, v3, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v2, v2, v6, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v3, v3, v7, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v2, v4, v2, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v3, v5, v3, s[2:3]
+; GFX940-NEXT: v_cndmask_b32_e64 v0, v8, 0, vcc
+; GFX940-NEXT: v_mov_b32_e32 v8, 0x7ff80000
+; GFX940-NEXT: v_cndmask_b32_e32 v1, v9, v8, vcc
+; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[6:7]
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e64 v2, v4, 0, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v3, v5, v8, vcc
; GFX940-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimum_v2f64:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_cmp_lt_f64_e32 vcc_lo, v[0:1], v[4:5]
-; GFX10-NEXT: v_cmp_lt_f64_e64 s4, v[2:3], v[6:7]
-; GFX10-NEXT: v_cmp_o_f64_e64 s5, v[0:1], v[4:5]
-; GFX10-NEXT: v_cmp_o_f64_e64 s6, v[2:3], v[6:7]
-; GFX10-NEXT: v_cndmask_b32_e32 v8, v5, v1, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v10, v7, v3, s4
-; GFX10-NEXT: v_cndmask_b32_e32 v12, v4, v0, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v13, v6, v2, s4
-; GFX10-NEXT: v_cmp_class_f64_e64 vcc_lo, v[0:1], 32
-; GFX10-NEXT: v_cmp_class_f64_e64 s4, v[2:3], 32
-; GFX10-NEXT: v_cndmask_b32_e64 v9, 0x7ff80000, v8, s5
-; GFX10-NEXT: v_cndmask_b32_e64 v11, 0x7ff80000, v10, s6
-; GFX10-NEXT: v_cndmask_b32_e64 v8, 0, v12, s5
-; GFX10-NEXT: v_cndmask_b32_e64 v10, 0, v13, s6
-; GFX10-NEXT: v_cmp_class_f64_e64 s5, v[4:5], 32
-; GFX10-NEXT: v_cmp_class_f64_e64 s6, v[6:7], 32
-; GFX10-NEXT: v_cmp_eq_f64_e64 s7, 0, v[8:9]
-; GFX10-NEXT: v_cmp_eq_f64_e64 s8, 0, v[10:11]
-; GFX10-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v2, v10, v2, s4
-; GFX10-NEXT: v_cndmask_b32_e32 v1, v9, v1, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v3, v11, v3, s4
-; GFX10-NEXT: v_cndmask_b32_e64 v0, v0, v4, s5
-; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, v6, s6
-; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, v5, s5
-; GFX10-NEXT: v_cndmask_b32_e64 v3, v3, v7, s6
-; GFX10-NEXT: v_cndmask_b32_e64 v0, v8, v0, s7
-; GFX10-NEXT: v_cndmask_b32_e64 v2, v10, v2, s8
-; GFX10-NEXT: v_cndmask_b32_e64 v1, v9, v1, s7
-; GFX10-NEXT: v_cndmask_b32_e64 v3, v11, v3, s8
+; GFX10-NEXT: v_min_f64 v[8:9], v[0:1], v[4:5]
+; GFX10-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[4:5]
+; GFX10-NEXT: v_min_f64 v[4:5], v[2:3], v[6:7]
+; GFX10-NEXT: v_cmp_u_f64_e64 s4, v[2:3], v[6:7]
+; GFX10-NEXT: v_cndmask_b32_e64 v0, v8, 0, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e64 v1, v9, 0x7ff80000, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e64 v2, v4, 0, s4
+; GFX10-NEXT: v_cndmask_b32_e64 v3, v5, 0x7ff80000, s4
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_minimum_v2f64:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_cmp_lt_f64_e32 vcc_lo, v[0:1], v[4:5]
-; GFX11-NEXT: v_cmp_o_f64_e64 s1, v[0:1], v[4:5]
-; GFX11-NEXT: v_cmp_lt_f64_e64 s0, v[2:3], v[6:7]
-; GFX11-NEXT: v_cmp_o_f64_e64 s2, v[2:3], v[6:7]
-; GFX11-NEXT: v_cndmask_b32_e32 v8, v5, v1, vcc_lo
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_4)
-; GFX11-NEXT: v_cndmask_b32_e64 v10, v7, v3, s0
-; GFX11-NEXT: v_cndmask_b32_e64 v13, v6, v2, s0
-; GFX11-NEXT: v_cmp_class_f64_e64 s0, v[2:3], 32
-; GFX11-NEXT: v_cndmask_b32_e64 v9, 0x7ff80000, v8, s1
-; GFX11-NEXT: v_cndmask_b32_e32 v12, v4, v0, vcc_lo
-; GFX11-NEXT: v_cmp_class_f64_e64 vcc_lo, v[0:1], 32
-; GFX11-NEXT: v_cndmask_b32_e64 v11, 0x7ff80000, v10, s2
-; GFX11-NEXT: v_cndmask_b32_e64 v10, 0, v13, s2
-; GFX11-NEXT: v_cmp_class_f64_e64 s2, v[6:7], 32
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX11-NEXT: v_cmp_eq_f64_e64 s4, 0, v[10:11]
-; GFX11-NEXT: v_cndmask_b32_e64 v2, v10, v2, s0
-; GFX11-NEXT: v_cndmask_b32_e64 v3, v11, v3, s0
-; GFX11-NEXT: v_cndmask_b32_e32 v1, v9, v1, vcc_lo
-; GFX11-NEXT: v_cndmask_b32_e64 v8, 0, v12, s1
-; GFX11-NEXT: v_cmp_class_f64_e64 s1, v[4:5], 32
-; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, v6, s2
-; GFX11-NEXT: v_cndmask_b32_e64 v3, v3, v7, s2
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4)
-; GFX11-NEXT: v_cmp_eq_f64_e64 s3, 0, v[8:9]
-; GFX11-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc_lo
-; GFX11-NEXT: v_cndmask_b32_e64 v2, v10, v2, s4
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NEXT: v_cndmask_b32_e64 v3, v11, v3, s4
-; GFX11-NEXT: v_cndmask_b32_e64 v0, v0, v4, s1
-; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, v5, s1
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NEXT: v_cndmask_b32_e64 v0, v8, v0, s3
-; GFX11-NEXT: v_cndmask_b32_e64 v1, v9, v1, s3
+; GFX11-NEXT: v_min_f64 v[8:9], v[0:1], v[4:5]
+; GFX11-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[4:5]
+; GFX11-NEXT: v_min_f64 v[4:5], v[2:3], v[6:7]
+; GFX11-NEXT: v_cmp_u_f64_e64 s0, v[2:3], v[6:7]
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_3)
+; GFX11-NEXT: v_cndmask_b32_e64 v0, v8, 0, vcc_lo
+; GFX11-NEXT: v_cndmask_b32_e64 v1, v9, 0x7ff80000, vcc_lo
+; GFX11-NEXT: v_cndmask_b32_e64 v2, v4, 0, s0
+; GFX11-NEXT: v_cndmask_b32_e64 v3, v5, 0x7ff80000, s0
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_minimum_v2f64:
@@ -765,182 +630,43 @@ define <2 x double> @v_minimum_v2f64__nnan(<2 x double> %src0, <2 x double> %src
; GFX7-LABEL: v_minimum_v2f64__nnan:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[4:5]
-; GFX7-NEXT: v_cmp_lt_f64_e64 s[4:5], v[2:3], v[6:7]
-; GFX7-NEXT: v_cmp_class_f64_e64 s[6:7], v[4:5], 32
-; GFX7-NEXT: v_cmp_class_f64_e64 s[10:11], v[6:7], 32
-; GFX7-NEXT: v_cndmask_b32_e32 v9, v5, v1, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v8, v4, v0, vcc
-; GFX7-NEXT: v_cmp_class_f64_e64 vcc, v[0:1], 32
-; GFX7-NEXT: v_cndmask_b32_e64 v11, v7, v3, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v10, v6, v2, s[4:5]
-; GFX7-NEXT: v_cmp_class_f64_e64 s[4:5], v[2:3], 32
-; GFX7-NEXT: v_cmp_eq_f64_e64 s[8:9], 0, v[8:9]
-; GFX7-NEXT: v_cmp_eq_f64_e64 s[12:13], 0, v[10:11]
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v9, v1, vcc
-; GFX7-NEXT: v_cndmask_b32_e64 v0, v0, v4, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v2, v10, v2, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v3, v11, v3, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v2, v2, v6, s[10:11]
-; GFX7-NEXT: v_cndmask_b32_e64 v1, v1, v5, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v3, v3, v7, s[10:11]
-; GFX7-NEXT: v_cndmask_b32_e64 v0, v8, v0, s[8:9]
-; GFX7-NEXT: v_cndmask_b32_e64 v1, v9, v1, s[8:9]
-; GFX7-NEXT: v_cndmask_b32_e64 v2, v10, v2, s[12:13]
-; GFX7-NEXT: v_cndmask_b32_e64 v3, v11, v3, s[12:13]
+; GFX7-NEXT: v_min_f64 v[0:1], v[0:1], v[4:5]
+; GFX7-NEXT: v_min_f64 v[2:3], v[2:3], v[6:7]
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_minimum_v2f64__nnan:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[4:5]
-; GFX8-NEXT: v_cmp_lt_f64_e64 s[4:5], v[2:3], v[6:7]
-; GFX8-NEXT: v_cmp_class_f64_e64 s[6:7], v[4:5], 32
-; GFX8-NEXT: v_cmp_class_f64_e64 s[10:11], v[6:7], 32
-; GFX8-NEXT: v_cndmask_b32_e32 v9, v5, v1, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v8, v4, v0, vcc
-; GFX8-NEXT: v_cmp_class_f64_e64 vcc, v[0:1], 32
-; GFX8-NEXT: v_cndmask_b32_e64 v11, v7, v3, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v10, v6, v2, s[4:5]
-; GFX8-NEXT: v_cmp_class_f64_e64 s[4:5], v[2:3], 32
-; GFX8-NEXT: v_cmp_eq_f64_e64 s[8:9], 0, v[8:9]
-; GFX8-NEXT: v_cmp_eq_f64_e64 s[12:13], 0, v[10:11]
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v9, v1, vcc
-; GFX8-NEXT: v_cndmask_b32_e64 v0, v0, v4, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v2, v10, v2, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v3, v11, v3, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v2, v2, v6, s[10:11]
-; GFX8-NEXT: v_cndmask_b32_e64 v1, v1, v5, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v3, v3, v7, s[10:11]
-; GFX8-NEXT: v_cndmask_b32_e64 v0, v8, v0, s[8:9]
-; GFX8-NEXT: v_cndmask_b32_e64 v1, v9, v1, s[8:9]
-; GFX8-NEXT: v_cndmask_b32_e64 v2, v10, v2, s[12:13]
-; GFX8-NEXT: v_cndmask_b32_e64 v3, v11, v3, s[12:13]
+; GFX8-NEXT: v_min_f64 v[0:1], v[0:1], v[4:5]
+; GFX8-NEXT: v_min_f64 v[2:3], v[2:3], v[6:7]
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_minimum_v2f64__nnan:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[4:5]
-; GFX9-NEXT: v_cmp_lt_f64_e64 s[4:5], v[2:3], v[6:7]
-; GFX9-NEXT: v_cmp_class_f64_e64 s[6:7], v[4:5], 32
-; GFX9-NEXT: v_cmp_class_f64_e64 s[10:11], v[6:7], 32
-; GFX9-NEXT: v_cndmask_b32_e32 v9, v5, v1, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v8, v4, v0, vcc
-; GFX9-NEXT: v_cmp_class_f64_e64 vcc, v[0:1], 32
-; GFX9-NEXT: v_cndmask_b32_e64 v11, v7, v3, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v10, v6, v2, s[4:5]
-; GFX9-NEXT: v_cmp_class_f64_e64 s[4:5], v[2:3], 32
-; GFX9-NEXT: v_cmp_eq_f64_e64 s[8:9], 0, v[8:9]
-; GFX9-NEXT: v_cmp_eq_f64_e64 s[12:13], 0, v[10:11]
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v9, v1, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v0, v4, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v2, v10, v2, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v3, v11, v3, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v2, v2, v6, s[10:11]
-; GFX9-NEXT: v_cndmask_b32_e64 v1, v1, v5, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v3, v3, v7, s[10:11]
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v8, v0, s[8:9]
-; GFX9-NEXT: v_cndmask_b32_e64 v1, v9, v1, s[8:9]
-; GFX9-NEXT: v_cndmask_b32_e64 v2, v10, v2, s[12:13]
-; GFX9-NEXT: v_cndmask_b32_e64 v3, v11, v3, s[12:13]
+; GFX9-NEXT: v_min_f64 v[0:1], v[0:1], v[4:5]
+; GFX9-NEXT: v_min_f64 v[2:3], v[2:3], v[6:7]
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX940-LABEL: v_minimum_v2f64__nnan:
; GFX940: ; %bb.0:
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[4:5]
-; GFX940-NEXT: v_cmp_class_f64_e64 s[0:1], v[4:5], 32
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v9, v5, v1, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v8, v4, v0, vcc
-; GFX940-NEXT: v_cmp_class_f64_e64 vcc, v[0:1], 32
-; GFX940-NEXT: v_cmp_eq_f64_e64 s[2:3], 0, v[8:9]
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v9, v1, vcc
-; GFX940-NEXT: v_cmp_lt_f64_e32 vcc, v[2:3], v[6:7]
-; GFX940-NEXT: v_cndmask_b32_e64 v0, v0, v4, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v1, v1, v5, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v5, v7, v3, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v6, v2, vcc
-; GFX940-NEXT: v_cmp_class_f64_e64 vcc, v[2:3], 32
-; GFX940-NEXT: v_cmp_class_f64_e64 s[0:1], v[6:7], 32
-; GFX940-NEXT: v_cndmask_b32_e64 v0, v8, v0, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v5, v3, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v1, v9, v1, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v2, v2, v6, s[0:1]
-; GFX940-NEXT: v_cmp_eq_f64_e64 s[2:3], 0, v[4:5]
-; GFX940-NEXT: v_cndmask_b32_e64 v3, v3, v7, s[0:1]
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e64 v2, v4, v2, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v3, v5, v3, s[2:3]
+; GFX940-NEXT: v_min_f64 v[0:1], v[0:1], v[4:5]
+; GFX940-NEXT: v_min_f64 v[2:3], v[2:3], v[6:7]
; GFX940-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimum_v2f64__nnan:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_cmp_lt_f64_e32 vcc_lo, v[0:1], v[4:5]
-; GFX10-NEXT: v_cmp_lt_f64_e64 s4, v[2:3], v[6:7]
-; GFX10-NEXT: v_cmp_class_f64_e64 s5, v[4:5], 32
-; GFX10-NEXT: v_cmp_class_f64_e64 s6, v[6:7], 32
-; GFX10-NEXT: v_cndmask_b32_e32 v9, v5, v1, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v11, v7, v3, s4
-; GFX10-NEXT: v_cndmask_b32_e32 v8, v4, v0, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v10, v6, v2, s4
-; GFX10-NEXT: v_cmp_class_f64_e64 vcc_lo, v[0:1], 32
-; GFX10-NEXT: v_cmp_class_f64_e64 s4, v[2:3], 32
-; GFX10-NEXT: v_cmp_eq_f64_e64 s7, 0, v[8:9]
-; GFX10-NEXT: v_cmp_eq_f64_e64 s8, 0, v[10:11]
-; GFX10-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v2, v10, v2, s4
-; GFX10-NEXT: v_cndmask_b32_e32 v1, v9, v1, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v3, v11, v3, s4
-; GFX10-NEXT: v_cndmask_b32_e64 v0, v0, v4, s5
-; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, v6, s6
-; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, v5, s5
-; GFX10-NEXT: v_cndmask_b32_e64 v3, v3, v7, s6
-; GFX10-NEXT: v_cndmask_b32_e64 v0, v8, v0, s7
-; GFX10-NEXT: v_cndmask_b32_e64 v2, v10, v2, s8
-; GFX10-NEXT: v_cndmask_b32_e64 v1, v9, v1, s7
-; GFX10-NEXT: v_cndmask_b32_e64 v3, v11, v3, s8
+; GFX10-NEXT: v_min_f64 v[0:1], v[0:1], v[4:5]
+; GFX10-NEXT: v_min_f64 v[2:3], v[2:3], v[6:7]
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_minimum_v2f64__nnan:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_cmp_lt_f64_e32 vcc_lo, v[0:1], v[4:5]
-; GFX11-NEXT: v_cmp_lt_f64_e64 s0, v[2:3], v[6:7]
-; GFX11-NEXT: v_cmp_class_f64_e64 s1, v[4:5], 32
-; GFX11-NEXT: v_cmp_class_f64_e64 s2, v[6:7], 32
-; GFX11-NEXT: v_dual_cndmask_b32 v9, v5, v1 :: v_dual_cndmask_b32 v8, v4, v0
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_4) | instid1(VALU_DEP_4)
-; GFX11-NEXT: v_cndmask_b32_e64 v11, v7, v3, s0
-; GFX11-NEXT: v_cndmask_b32_e64 v10, v6, v2, s0
-; GFX11-NEXT: v_cmp_class_f64_e64 vcc_lo, v[0:1], 32
-; GFX11-NEXT: v_cmp_class_f64_e64 s0, v[2:3], 32
-; GFX11-NEXT: v_cmp_eq_f64_e64 s3, 0, v[8:9]
-; GFX11-NEXT: v_cmp_eq_f64_e64 s4, 0, v[10:11]
-; GFX11-NEXT: v_dual_cndmask_b32 v0, v8, v0 :: v_dual_cndmask_b32 v1, v9, v1
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_3)
-; GFX11-NEXT: v_cndmask_b32_e64 v2, v10, v2, s0
-; GFX11-NEXT: v_cndmask_b32_e64 v3, v11, v3, s0
-; GFX11-NEXT: v_cndmask_b32_e64 v0, v0, v4, s1
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
-; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, v5, s1
-; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, v6, s2
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
-; GFX11-NEXT: v_cndmask_b32_e64 v3, v3, v7, s2
-; GFX11-NEXT: v_cndmask_b32_e64 v0, v8, v0, s3
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
-; GFX11-NEXT: v_cndmask_b32_e64 v1, v9, v1, s3
-; GFX11-NEXT: v_cndmask_b32_e64 v2, v10, v2, s4
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4)
-; GFX11-NEXT: v_cndmask_b32_e64 v3, v11, v3, s4
+; GFX11-NEXT: v_min_f64 v[0:1], v[0:1], v[4:5]
+; GFX11-NEXT: v_min_f64 v[2:3], v[2:3], v[6:7]
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_minimum_v2f64__nnan:
@@ -961,111 +687,86 @@ define <2 x double> @v_minimum_v2f64__nsz(<2 x double> %src0, <2 x double> %src1
; GFX7-LABEL: v_minimum_v2f64__nsz:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[4:5]
-; GFX7-NEXT: v_cmp_lt_f64_e64 s[6:7], v[2:3], v[6:7]
-; GFX7-NEXT: v_cmp_o_f64_e64 s[4:5], v[0:1], v[4:5]
-; GFX7-NEXT: v_cmp_o_f64_e64 s[8:9], v[2:3], v[6:7]
-; GFX7-NEXT: v_cndmask_b32_e32 v8, v4, v0, vcc
-; GFX7-NEXT: v_cndmask_b32_e64 v4, v6, v2, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
-; GFX7-NEXT: v_mov_b32_e32 v5, 0x7ff80000
-; GFX7-NEXT: v_cndmask_b32_e64 v3, v7, v3, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v0, 0, v8, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v1, v5, v1, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v2, 0, v4, s[8:9]
-; GFX7-NEXT: v_cndmask_b32_e64 v3, v5, v3, s[8:9]
+; GFX7-NEXT: v_min_f64 v[8:9], v[0:1], v[4:5]
+; GFX7-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
+; GFX7-NEXT: v_min_f64 v[4:5], v[2:3], v[6:7]
+; GFX7-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[6:7]
+; GFX7-NEXT: v_mov_b32_e32 v3, 0x7ff80000
+; GFX7-NEXT: v_cndmask_b32_e64 v0, v8, 0, vcc
+; GFX7-NEXT: v_cndmask_b32_e32 v1, v9, v3, vcc
+; GFX7-NEXT: v_cndmask_b32_e64 v2, v4, 0, s[4:5]
+; GFX7-NEXT: v_cndmask_b32_e64 v3, v5, v3, s[4:5]
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_minimum_v2f64__nsz:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[4:5]
-; GFX8-NEXT: v_cmp_lt_f64_e64 s[6:7], v[2:3], v[6:7]
-; GFX8-NEXT: v_cmp_o_f64_e64 s[4:5], v[0:1], v[4:5]
-; GFX8-NEXT: v_cmp_o_f64_e64 s[8:9], v[2:3], v[6:7]
-; GFX8-NEXT: v_cndmask_b32_e32 v8, v4, v0, vcc
-; GFX8-NEXT: v_cndmask_b32_e64 v4, v6, v2, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
-; GFX8-NEXT: v_mov_b32_e32 v5, 0x7ff80000
-; GFX8-NEXT: v_cndmask_b32_e64 v3, v7, v3, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v0, 0, v8, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v1, v5, v1, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, v4, s[8:9]
-; GFX8-NEXT: v_cndmask_b32_e64 v3, v5, v3, s[8:9]
+; GFX8-NEXT: v_min_f64 v[8:9], v[0:1], v[4:5]
+; GFX8-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
+; GFX8-NEXT: v_min_f64 v[4:5], v[2:3], v[6:7]
+; GFX8-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[6:7]
+; GFX8-NEXT: v_mov_b32_e32 v3, 0x7ff80000
+; GFX8-NEXT: v_cndmask_b32_e64 v0, v8, 0, vcc
+; GFX8-NEXT: v_cndmask_b32_e32 v1, v9, v3, vcc
+; GFX8-NEXT: v_cndmask_b32_e64 v2, v4, 0, s[4:5]
+; GFX8-NEXT: v_cndmask_b32_e64 v3, v5, v3, s[4:5]
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_minimum_v2f64__nsz:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[4:5]
-; GFX9-NEXT: v_cmp_lt_f64_e64 s[6:7], v[2:3], v[6:7]
-; GFX9-NEXT: v_cmp_o_f64_e64 s[4:5], v[0:1], v[4:5]
-; GFX9-NEXT: v_cmp_o_f64_e64 s[8:9], v[2:3], v[6:7]
-; GFX9-NEXT: v_cndmask_b32_e32 v8, v4, v0, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v4, v6, v2, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
-; GFX9-NEXT: v_mov_b32_e32 v5, 0x7ff80000
-; GFX9-NEXT: v_cndmask_b32_e64 v3, v7, v3, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v0, 0, v8, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v1, v5, v1, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, v4, s[8:9]
-; GFX9-NEXT: v_cndmask_b32_e64 v3, v5, v3, s[8:9]
+; GFX9-NEXT: v_min_f64 v[8:9], v[0:1], v[4:5]
+; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
+; GFX9-NEXT: v_min_f64 v[4:5], v[2:3], v[6:7]
+; GFX9-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[6:7]
+; GFX9-NEXT: v_mov_b32_e32 v3, 0x7ff80000
+; GFX9-NEXT: v_cndmask_b32_e64 v0, v8, 0, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v9, v3, vcc
+; GFX9-NEXT: v_cndmask_b32_e64 v2, v4, 0, s[4:5]
+; GFX9-NEXT: v_cndmask_b32_e64 v3, v5, v3, s[4:5]
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX940-LABEL: v_minimum_v2f64__nsz:
; GFX940: ; %bb.0:
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[4:5]
-; GFX940-NEXT: v_cmp_o_f64_e64 s[0:1], v[0:1], v[4:5]
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v8, v4, v0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
-; GFX940-NEXT: v_mov_b32_e32 v4, 0x7ff80000
-; GFX940-NEXT: v_cmp_lt_f64_e32 vcc, v[2:3], v[6:7]
-; GFX940-NEXT: v_cndmask_b32_e64 v0, 0, v8, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v1, v4, v1, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v5, v6, v2, vcc
-; GFX940-NEXT: v_cmp_o_f64_e64 s[0:1], v[2:3], v[6:7]
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v7, v3, vcc
+; GFX940-NEXT: v_min_f64 v[8:9], v[0:1], v[4:5]
+; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
+; GFX940-NEXT: v_min_f64 v[4:5], v[2:3], v[6:7]
; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e64 v2, 0, v5, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v3, v4, v3, s[0:1]
+; GFX940-NEXT: v_cndmask_b32_e64 v0, v8, 0, vcc
+; GFX940-NEXT: v_mov_b32_e32 v8, 0x7ff80000
+; GFX940-NEXT: v_cndmask_b32_e32 v1, v9, v8, vcc
+; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[6:7]
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e64 v2, v4, 0, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v3, v5, v8, vcc
; GFX940-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimum_v2f64__nsz:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_cmp_lt_f64_e32 vcc_lo, v[0:1], v[4:5]
-; GFX10-NEXT: v_cmp_lt_f64_e64 s4, v[2:3], v[6:7]
-; GFX10-NEXT: v_cmp_o_f64_e64 s5, v[0:1], v[4:5]
-; GFX10-NEXT: v_cmp_o_f64_e64 s6, v[2:3], v[6:7]
-; GFX10-NEXT: v_cndmask_b32_e32 v8, v4, v0, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v9, v6, v2, s4
-; GFX10-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v3, v7, v3, s4
-; GFX10-NEXT: v_cndmask_b32_e64 v0, 0, v8, s5
-; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, v9, s6
-; GFX10-NEXT: v_cndmask_b32_e64 v1, 0x7ff80000, v1, s5
-; GFX10-NEXT: v_cndmask_b32_e64 v3, 0x7ff80000, v3, s6
+; GFX10-NEXT: v_min_f64 v[8:9], v[0:1], v[4:5]
+; GFX10-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[4:5]
+; GFX10-NEXT: v_min_f64 v[4:5], v[2:3], v[6:7]
+; GFX10-NEXT: v_cmp_u_f64_e64 s4, v[2:3], v[6:7]
+; GFX10-NEXT: v_cndmask_b32_e64 v0, v8, 0, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e64 v1, v9, 0x7ff80000, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e64 v2, v4, 0, s4
+; GFX10-NEXT: v_cndmask_b32_e64 v3, v5, 0x7ff80000, s4
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_minimum_v2f64__nsz:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_cmp_lt_f64_e32 vcc_lo, v[0:1], v[4:5]
-; GFX11-NEXT: v_cmp_lt_f64_e64 s0, v[2:3], v[6:7]
-; GFX11-NEXT: v_cmp_o_f64_e64 s1, v[0:1], v[4:5]
-; GFX11-NEXT: v_cmp_o_f64_e64 s2, v[2:3], v[6:7]
-; GFX11-NEXT: v_dual_cndmask_b32 v8, v4, v0 :: v_dual_cndmask_b32 v1, v5, v1
+; GFX11-NEXT: v_min_f64 v[8:9], v[0:1], v[4:5]
+; GFX11-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[4:5]
+; GFX11-NEXT: v_min_f64 v[4:5], v[2:3], v[6:7]
+; GFX11-NEXT: v_cmp_u_f64_e64 s0, v[2:3], v[6:7]
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_3)
-; GFX11-NEXT: v_cndmask_b32_e64 v9, v6, v2, s0
-; GFX11-NEXT: v_cndmask_b32_e64 v3, v7, v3, s0
-; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, v8, s1
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
-; GFX11-NEXT: v_cndmask_b32_e64 v1, 0x7ff80000, v1, s1
-; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, v9, s2
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4)
-; GFX11-NEXT: v_cndmask_b32_e64 v3, 0x7ff80000, v3, s2
+; GFX11-NEXT: v_cndmask_b32_e64 v0, v8, 0, vcc_lo
+; GFX11-NEXT: v_cndmask_b32_e64 v1, v9, 0x7ff80000, vcc_lo
+; GFX11-NEXT: v_cndmask_b32_e64 v2, v4, 0, s0
+; GFX11-NEXT: v_cndmask_b32_e64 v3, v5, 0x7ff80000, s0
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_minimum_v2f64__nsz:
@@ -1086,69 +787,43 @@ define <2 x double> @v_minimum_v2f64__nnan_nsz(<2 x double> %src0, <2 x double>
; GFX7-LABEL: v_minimum_v2f64__nnan_nsz:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[4:5]
-; GFX7-NEXT: v_cmp_lt_f64_e64 s[4:5], v[2:3], v[6:7]
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
-; GFX7-NEXT: v_cndmask_b32_e64 v2, v6, v2, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v3, v7, v3, s[4:5]
+; GFX7-NEXT: v_min_f64 v[0:1], v[0:1], v[4:5]
+; GFX7-NEXT: v_min_f64 v[2:3], v[2:3], v[6:7]
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_minimum_v2f64__nnan_nsz:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[4:5]
-; GFX8-NEXT: v_cmp_lt_f64_e64 s[4:5], v[2:3], v[6:7]
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
-; GFX8-NEXT: v_cndmask_b32_e64 v2, v6, v2, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v3, v7, v3, s[4:5]
+; GFX8-NEXT: v_min_f64 v[0:1], v[0:1], v[4:5]
+; GFX8-NEXT: v_min_f64 v[2:3], v[2:3], v[6:7]
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_minimum_v2f64__nnan_nsz:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[4:5]
-; GFX9-NEXT: v_cmp_lt_f64_e64 s[4:5], v[2:3], v[6:7]
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v2, v6, v2, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v3, v7, v3, s[4:5]
+; GFX9-NEXT: v_min_f64 v[0:1], v[0:1], v[4:5]
+; GFX9-NEXT: v_min_f64 v[2:3], v[2:3], v[6:7]
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX940-LABEL: v_minimum_v2f64__nnan_nsz:
; GFX940: ; %bb.0:
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[4:5]
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
-; GFX940-NEXT: v_cmp_lt_f64_e32 vcc, v[2:3], v[6:7]
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v6, v2, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v7, v3, vcc
+; GFX940-NEXT: v_min_f64 v[0:1], v[0:1], v[4:5]
+; GFX940-NEXT: v_min_f64 v[2:3], v[2:3], v[6:7]
; GFX940-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimum_v2f64__nnan_nsz:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_cmp_lt_f64_e32 vcc_lo, v[0:1], v[4:5]
-; GFX10-NEXT: v_cmp_lt_f64_e64 s4, v[2:3], v[6:7]
-; GFX10-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v2, v6, v2, s4
-; GFX10-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v3, v7, v3, s4
+; GFX10-NEXT: v_min_f64 v[0:1], v[0:1], v[4:5]
+; GFX10-NEXT: v_min_f64 v[2:3], v[2:3], v[6:7]
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_minimum_v2f64__nnan_nsz:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_cmp_lt_f64_e32 vcc_lo, v[0:1], v[4:5]
-; GFX11-NEXT: v_cmp_lt_f64_e64 s0, v[2:3], v[6:7]
-; GFX11-NEXT: v_dual_cndmask_b32 v0, v4, v0 :: v_dual_cndmask_b32 v1, v5, v1
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX11-NEXT: v_cndmask_b32_e64 v2, v6, v2, s0
-; GFX11-NEXT: v_cndmask_b32_e64 v3, v7, v3, s0
+; GFX11-NEXT: v_min_f64 v[0:1], v[0:1], v[4:5]
+; GFX11-NEXT: v_min_f64 v[2:3], v[2:3], v[6:7]
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_minimum_v2f64__nnan_nsz:
@@ -1170,61 +845,20 @@ define void @s_minimum_v2f64(<2 x double> inreg %src0, <2 x double> inreg %src1)
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7-NEXT: v_mov_b32_e32 v0, s10
+; GFX7-NEXT: v_mov_b32_e32 v4, s8
; GFX7-NEXT: v_mov_b32_e32 v1, s11
-; GFX7-NEXT: v_cmp_lt_f64_e32 vcc, s[6:7], v[0:1]
-; GFX7-NEXT: v_cmp_o_f64_e64 s[12:13], s[6:7], v[0:1]
-; GFX7-NEXT: v_cmp_class_f64_e64 s[18:19], s[10:11], 32
-; GFX7-NEXT: v_mov_b32_e32 v0, s8
-; GFX7-NEXT: v_mov_b32_e32 v1, s9
-; GFX7-NEXT: s_and_b64 s[14:15], vcc, exec
-; GFX7-NEXT: s_cselect_b32 s16, s7, s11
-; GFX7-NEXT: s_and_b64 s[14:15], s[12:13], exec
-; GFX7-NEXT: s_cselect_b32 s15, s16, 0x7ff80000
-; GFX7-NEXT: s_and_b64 s[16:17], vcc, exec
-; GFX7-NEXT: s_cselect_b32 s14, s6, s10
-; GFX7-NEXT: v_cmp_class_f64_e64 s[16:17], s[6:7], 32
-; GFX7-NEXT: s_and_b64 s[12:13], s[12:13], exec
-; GFX7-NEXT: s_cselect_b32 s14, s14, 0
-; GFX7-NEXT: v_cmp_eq_f64_e64 s[20:21], s[14:15], 0
-; GFX7-NEXT: v_cmp_lt_f64_e32 vcc, s[4:5], v[0:1]
-; GFX7-NEXT: s_and_b64 s[12:13], s[16:17], exec
-; GFX7-NEXT: s_cselect_b32 s7, s7, s15
-; GFX7-NEXT: s_and_b64 s[12:13], s[18:19], exec
-; GFX7-NEXT: s_cselect_b32 s7, s11, s7
-; GFX7-NEXT: s_and_b64 s[12:13], s[20:21], exec
-; GFX7-NEXT: s_cselect_b32 s7, s7, s15
-; GFX7-NEXT: s_and_b64 s[12:13], s[16:17], exec
-; GFX7-NEXT: v_cmp_o_f64_e64 s[12:13], s[4:5], v[0:1]
-; GFX7-NEXT: s_cselect_b32 s6, s6, s14
-; GFX7-NEXT: s_and_b64 s[16:17], s[18:19], exec
-; GFX7-NEXT: s_cselect_b32 s6, s10, s6
-; GFX7-NEXT: s_and_b64 s[10:11], s[20:21], exec
-; GFX7-NEXT: s_cselect_b32 s6, s6, s14
-; GFX7-NEXT: s_and_b64 s[10:11], vcc, exec
-; GFX7-NEXT: s_cselect_b32 s14, s5, s9
-; GFX7-NEXT: s_and_b64 s[10:11], s[12:13], exec
-; GFX7-NEXT: s_cselect_b32 s11, s14, 0x7ff80000
-; GFX7-NEXT: s_and_b64 s[14:15], vcc, exec
-; GFX7-NEXT: s_cselect_b32 s10, s4, s8
-; GFX7-NEXT: v_cmp_class_f64_e64 s[14:15], s[4:5], 32
-; GFX7-NEXT: s_and_b64 s[12:13], s[12:13], exec
-; GFX7-NEXT: v_cmp_class_f64_e64 s[12:13], s[8:9], 32
-; GFX7-NEXT: s_cselect_b32 s10, s10, 0
-; GFX7-NEXT: v_cmp_eq_f64_e64 s[16:17], s[10:11], 0
-; GFX7-NEXT: s_and_b64 s[18:19], s[14:15], exec
-; GFX7-NEXT: s_cselect_b32 s5, s5, s11
-; GFX7-NEXT: s_and_b64 s[18:19], s[12:13], exec
-; GFX7-NEXT: s_cselect_b32 s5, s9, s5
-; GFX7-NEXT: s_and_b64 s[18:19], s[16:17], exec
-; GFX7-NEXT: s_cselect_b32 s5, s5, s11
-; GFX7-NEXT: s_and_b64 s[14:15], s[14:15], exec
-; GFX7-NEXT: s_cselect_b32 s4, s4, s10
-; GFX7-NEXT: s_and_b64 s[12:13], s[12:13], exec
-; GFX7-NEXT: s_cselect_b32 s4, s8, s4
-; GFX7-NEXT: s_and_b64 s[8:9], s[16:17], exec
-; GFX7-NEXT: s_cselect_b32 s4, s4, s10
+; GFX7-NEXT: v_mov_b32_e32 v5, s9
+; GFX7-NEXT: v_min_f64 v[2:3], s[6:7], v[0:1]
+; GFX7-NEXT: v_cmp_u_f64_e32 vcc, s[6:7], v[0:1]
+; GFX7-NEXT: v_min_f64 v[0:1], s[4:5], v[4:5]
+; GFX7-NEXT: v_cmp_u_f64_e64 s[4:5], s[4:5], v[4:5]
+; GFX7-NEXT: v_mov_b32_e32 v6, 0x7ff80000
+; GFX7-NEXT: v_cndmask_b32_e32 v3, v3, v6, vcc
+; GFX7-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc
+; GFX7-NEXT: v_cndmask_b32_e64 v1, v1, v6, s[4:5]
+; GFX7-NEXT: v_cndmask_b32_e64 v0, v0, 0, s[4:5]
; GFX7-NEXT: ;;#ASMSTART
-; GFX7-NEXT: ; use s[4:7]
+; GFX7-NEXT: ; use v[0:3]
; GFX7-NEXT: ;;#ASMEND
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
@@ -1232,61 +866,20 @@ define void @s_minimum_v2f64(<2 x double> inreg %src0, <2 x double> inreg %src1)
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_mov_b32_e32 v0, s10
+; GFX8-NEXT: v_mov_b32_e32 v4, s8
; GFX8-NEXT: v_mov_b32_e32 v1, s11
-; GFX8-NEXT: v_cmp_lt_f64_e32 vcc, s[6:7], v[0:1]
-; GFX8-NEXT: v_cmp_o_f64_e64 s[12:13], s[6:7], v[0:1]
-; GFX8-NEXT: v_cmp_class_f64_e64 s[18:19], s[10:11], 32
-; GFX8-NEXT: v_mov_b32_e32 v0, s8
-; GFX8-NEXT: v_mov_b32_e32 v1, s9
-; GFX8-NEXT: s_and_b64 s[14:15], vcc, exec
-; GFX8-NEXT: s_cselect_b32 s16, s7, s11
-; GFX8-NEXT: s_and_b64 s[14:15], s[12:13], exec
-; GFX8-NEXT: s_cselect_b32 s15, s16, 0x7ff80000
-; GFX8-NEXT: s_and_b64 s[16:17], vcc, exec
-; GFX8-NEXT: s_cselect_b32 s14, s6, s10
-; GFX8-NEXT: v_cmp_class_f64_e64 s[16:17], s[6:7], 32
-; GFX8-NEXT: s_and_b64 s[12:13], s[12:13], exec
-; GFX8-NEXT: s_cselect_b32 s14, s14, 0
-; GFX8-NEXT: v_cmp_eq_f64_e64 s[20:21], s[14:15], 0
-; GFX8-NEXT: v_cmp_lt_f64_e32 vcc, s[4:5], v[0:1]
-; GFX8-NEXT: s_and_b64 s[12:13], s[16:17], exec
-; GFX8-NEXT: s_cselect_b32 s7, s7, s15
-; GFX8-NEXT: s_and_b64 s[12:13], s[18:19], exec
-; GFX8-NEXT: s_cselect_b32 s7, s11, s7
-; GFX8-NEXT: s_and_b64 s[12:13], s[20:21], exec
-; GFX8-NEXT: s_cselect_b32 s7, s7, s15
-; GFX8-NEXT: s_and_b64 s[12:13], s[16:17], exec
-; GFX8-NEXT: v_cmp_o_f64_e64 s[12:13], s[4:5], v[0:1]
-; GFX8-NEXT: s_cselect_b32 s6, s6, s14
-; GFX8-NEXT: s_and_b64 s[16:17], s[18:19], exec
-; GFX8-NEXT: s_cselect_b32 s6, s10, s6
-; GFX8-NEXT: s_and_b64 s[10:11], s[20:21], exec
-; GFX8-NEXT: s_cselect_b32 s6, s6, s14
-; GFX8-NEXT: s_and_b64 s[10:11], vcc, exec
-; GFX8-NEXT: s_cselect_b32 s14, s5, s9
-; GFX8-NEXT: s_and_b64 s[10:11], s[12:13], exec
-; GFX8-NEXT: s_cselect_b32 s11, s14, 0x7ff80000
-; GFX8-NEXT: s_and_b64 s[14:15], vcc, exec
-; GFX8-NEXT: s_cselect_b32 s10, s4, s8
-; GFX8-NEXT: v_cmp_class_f64_e64 s[14:15], s[4:5], 32
-; GFX8-NEXT: s_and_b64 s[12:13], s[12:13], exec
-; GFX8-NEXT: v_cmp_class_f64_e64 s[12:13], s[8:9], 32
-; GFX8-NEXT: s_cselect_b32 s10, s10, 0
-; GFX8-NEXT: v_cmp_eq_f64_e64 s[16:17], s[10:11], 0
-; GFX8-NEXT: s_and_b64 s[18:19], s[14:15], exec
-; GFX8-NEXT: s_cselect_b32 s5, s5, s11
-; GFX8-NEXT: s_and_b64 s[18:19], s[12:13], exec
-; GFX8-NEXT: s_cselect_b32 s5, s9, s5
-; GFX8-NEXT: s_and_b64 s[18:19], s[16:17], exec
-; GFX8-NEXT: s_cselect_b32 s5, s5, s11
-; GFX8-NEXT: s_and_b64 s[14:15], s[14:15], exec
-; GFX8-NEXT: s_cselect_b32 s4, s4, s10
-; GFX8-NEXT: s_and_b64 s[12:13], s[12:13], exec
-; GFX8-NEXT: s_cselect_b32 s4, s8, s4
-; GFX8-NEXT: s_and_b64 s[8:9], s[16:17], exec
-; GFX8-NEXT: s_cselect_b32 s4, s4, s10
+; GFX8-NEXT: v_mov_b32_e32 v5, s9
+; GFX8-NEXT: v_min_f64 v[2:3], s[6:7], v[0:1]
+; GFX8-NEXT: v_cmp_u_f64_e32 vcc, s[6:7], v[0:1]
+; GFX8-NEXT: v_min_f64 v[0:1], s[4:5], v[4:5]
+; GFX8-NEXT: v_cmp_u_f64_e64 s[4:5], s[4:5], v[4:5]
+; GFX8-NEXT: v_mov_b32_e32 v6, 0x7ff80000
+; GFX8-NEXT: v_cndmask_b32_e32 v3, v3, v6, vcc
+; GFX8-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc
+; GFX8-NEXT: v_cndmask_b32_e64 v1, v1, v6, s[4:5]
+; GFX8-NEXT: v_cndmask_b32_e64 v0, v0, 0, s[4:5]
; GFX8-NEXT: ;;#ASMSTART
-; GFX8-NEXT: ; use s[4:7]
+; GFX8-NEXT: ; use v[0:3]
; GFX8-NEXT: ;;#ASMEND
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
@@ -1294,61 +887,20 @@ define void @s_minimum_v2f64(<2 x double> inreg %src0, <2 x double> inreg %src1)
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v0, s10
+; GFX9-NEXT: v_mov_b32_e32 v4, s8
; GFX9-NEXT: v_mov_b32_e32 v1, s11
-; GFX9-NEXT: v_cmp_lt_f64_e32 vcc, s[6:7], v[0:1]
-; GFX9-NEXT: v_cmp_o_f64_e64 s[12:13], s[6:7], v[0:1]
-; GFX9-NEXT: v_cmp_class_f64_e64 s[18:19], s[10:11], 32
-; GFX9-NEXT: v_mov_b32_e32 v0, s8
-; GFX9-NEXT: v_mov_b32_e32 v1, s9
-; GFX9-NEXT: s_and_b64 s[14:15], vcc, exec
-; GFX9-NEXT: s_cselect_b32 s16, s7, s11
-; GFX9-NEXT: s_and_b64 s[14:15], s[12:13], exec
-; GFX9-NEXT: s_cselect_b32 s15, s16, 0x7ff80000
-; GFX9-NEXT: s_and_b64 s[16:17], vcc, exec
-; GFX9-NEXT: s_cselect_b32 s14, s6, s10
-; GFX9-NEXT: v_cmp_class_f64_e64 s[16:17], s[6:7], 32
-; GFX9-NEXT: s_and_b64 s[12:13], s[12:13], exec
-; GFX9-NEXT: s_cselect_b32 s14, s14, 0
-; GFX9-NEXT: v_cmp_eq_f64_e64 s[20:21], s[14:15], 0
-; GFX9-NEXT: v_cmp_lt_f64_e32 vcc, s[4:5], v[0:1]
-; GFX9-NEXT: s_and_b64 s[12:13], s[16:17], exec
-; GFX9-NEXT: s_cselect_b32 s7, s7, s15
-; GFX9-NEXT: s_and_b64 s[12:13], s[18:19], exec
-; GFX9-NEXT: s_cselect_b32 s7, s11, s7
-; GFX9-NEXT: s_and_b64 s[12:13], s[20:21], exec
-; GFX9-NEXT: s_cselect_b32 s7, s7, s15
-; GFX9-NEXT: s_and_b64 s[12:13], s[16:17], exec
-; GFX9-NEXT: v_cmp_o_f64_e64 s[12:13], s[4:5], v[0:1]
-; GFX9-NEXT: s_cselect_b32 s6, s6, s14
-; GFX9-NEXT: s_and_b64 s[16:17], s[18:19], exec
-; GFX9-NEXT: s_cselect_b32 s6, s10, s6
-; GFX9-NEXT: s_and_b64 s[10:11], s[20:21], exec
-; GFX9-NEXT: s_cselect_b32 s6, s6, s14
-; GFX9-NEXT: s_and_b64 s[10:11], vcc, exec
-; GFX9-NEXT: s_cselect_b32 s14, s5, s9
-; GFX9-NEXT: s_and_b64 s[10:11], s[12:13], exec
-; GFX9-NEXT: s_cselect_b32 s11, s14, 0x7ff80000
-; GFX9-NEXT: s_and_b64 s[14:15], vcc, exec
-; GFX9-NEXT: s_cselect_b32 s10, s4, s8
-; GFX9-NEXT: v_cmp_class_f64_e64 s[14:15], s[4:5], 32
-; GFX9-NEXT: s_and_b64 s[12:13], s[12:13], exec
-; GFX9-NEXT: v_cmp_class_f64_e64 s[12:13], s[8:9], 32
-; GFX9-NEXT: s_cselect_b32 s10, s10, 0
-; GFX9-NEXT: v_cmp_eq_f64_e64 s[16:17], s[10:11], 0
-; GFX9-NEXT: s_and_b64 s[18:19], s[14:15], exec
-; GFX9-NEXT: s_cselect_b32 s5, s5, s11
-; GFX9-NEXT: s_and_b64 s[18:19], s[12:13], exec
-; GFX9-NEXT: s_cselect_b32 s5, s9, s5
-; GFX9-NEXT: s_and_b64 s[18:19], s[16:17], exec
-; GFX9-NEXT: s_cselect_b32 s5, s5, s11
-; GFX9-NEXT: s_and_b64 s[14:15], s[14:15], exec
-; GFX9-NEXT: s_cselect_b32 s4, s4, s10
-; GFX9-NEXT: s_and_b64 s[12:13], s[12:13], exec
-; GFX9-NEXT: s_cselect_b32 s4, s8, s4
-; GFX9-NEXT: s_and_b64 s[8:9], s[16:17], exec
-; GFX9-NEXT: s_cselect_b32 s4, s4, s10
+; GFX9-NEXT: v_mov_b32_e32 v5, s9
+; GFX9-NEXT: v_min_f64 v[2:3], s[6:7], v[0:1]
+; GFX9-NEXT: v_cmp_u_f64_e32 vcc, s[6:7], v[0:1]
+; GFX9-NEXT: v_min_f64 v[0:1], s[4:5], v[4:5]
+; GFX9-NEXT: v_cmp_u_f64_e64 s[4:5], s[4:5], v[4:5]
+; GFX9-NEXT: v_mov_b32_e32 v6, 0x7ff80000
+; GFX9-NEXT: v_cndmask_b32_e32 v3, v3, v6, vcc
+; GFX9-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc
+; GFX9-NEXT: v_cndmask_b32_e64 v1, v1, v6, s[4:5]
+; GFX9-NEXT: v_cndmask_b32_e64 v0, v0, 0, s[4:5]
; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[4:7]
+; GFX9-NEXT: ; use v[0:3]
; GFX9-NEXT: ;;#ASMEND
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
@@ -1356,179 +908,52 @@ define void @s_minimum_v2f64(<2 x double> inreg %src0, <2 x double> inreg %src1)
; GFX940: ; %bb.0:
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[6:7]
-; GFX940-NEXT: v_cmp_lt_f64_e32 vcc, s[2:3], v[0:1]
-; GFX940-NEXT: s_and_b64 s[8:9], vcc, exec
-; GFX940-NEXT: v_cmp_o_f64_e64 s[8:9], s[2:3], v[0:1]
-; GFX940-NEXT: s_cselect_b32 s12, s3, s7
-; GFX940-NEXT: s_and_b64 s[10:11], s[8:9], exec
-; GFX940-NEXT: s_cselect_b32 s11, s12, 0x7ff80000
-; GFX940-NEXT: s_and_b64 s[12:13], vcc, exec
-; GFX940-NEXT: s_cselect_b32 s10, s2, s6
-; GFX940-NEXT: s_and_b64 s[8:9], s[8:9], exec
-; GFX940-NEXT: v_cmp_class_f64_e64 s[12:13], s[2:3], 32
-; GFX940-NEXT: s_cselect_b32 s10, s10, 0
-; GFX940-NEXT: s_and_b64 s[14:15], s[12:13], exec
-; GFX940-NEXT: v_cmp_class_f64_e64 s[14:15], s[6:7], 32
-; GFX940-NEXT: v_cmp_eq_f64_e64 s[8:9], s[10:11], 0
-; GFX940-NEXT: s_cselect_b32 s3, s3, s11
-; GFX940-NEXT: s_and_b64 s[16:17], s[14:15], exec
-; GFX940-NEXT: s_cselect_b32 s3, s7, s3
-; GFX940-NEXT: s_and_b64 s[16:17], s[8:9], exec
-; GFX940-NEXT: s_cselect_b32 s7, s3, s11
-; GFX940-NEXT: s_and_b64 s[12:13], s[12:13], exec
-; GFX940-NEXT: s_cselect_b32 s11, s2, s10
-; GFX940-NEXT: s_and_b64 s[2:3], s[14:15], exec
+; GFX940-NEXT: v_min_f64 v[2:3], s[2:3], v[0:1]
+; GFX940-NEXT: v_mov_b32_e32 v6, 0x7ff80000
+; GFX940-NEXT: v_cmp_u_f64_e32 vcc, s[2:3], v[0:1]
; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[4:5]
-; GFX940-NEXT: s_cselect_b32 s6, s6, s11
-; GFX940-NEXT: s_and_b64 s[2:3], s[8:9], exec
-; GFX940-NEXT: v_cmp_lt_f64_e32 vcc, s[0:1], v[0:1]
-; GFX940-NEXT: s_cselect_b32 s6, s6, s10
-; GFX940-NEXT: s_and_b64 s[2:3], vcc, exec
-; GFX940-NEXT: v_cmp_o_f64_e64 s[2:3], s[0:1], v[0:1]
-; GFX940-NEXT: s_cselect_b32 s10, s1, s5
-; GFX940-NEXT: s_and_b64 s[8:9], s[2:3], exec
-; GFX940-NEXT: s_cselect_b32 s9, s10, 0x7ff80000
-; GFX940-NEXT: s_and_b64 s[10:11], vcc, exec
-; GFX940-NEXT: s_cselect_b32 s8, s0, s4
-; GFX940-NEXT: s_and_b64 s[2:3], s[2:3], exec
-; GFX940-NEXT: v_cmp_class_f64_e64 s[10:11], s[0:1], 32
-; GFX940-NEXT: s_cselect_b32 s8, s8, 0
-; GFX940-NEXT: s_and_b64 s[12:13], s[10:11], exec
-; GFX940-NEXT: v_cmp_class_f64_e64 s[12:13], s[4:5], 32
-; GFX940-NEXT: v_cmp_eq_f64_e64 s[2:3], s[8:9], 0
-; GFX940-NEXT: s_cselect_b32 s1, s1, s9
-; GFX940-NEXT: s_and_b64 s[14:15], s[12:13], exec
-; GFX940-NEXT: s_cselect_b32 s1, s5, s1
-; GFX940-NEXT: s_and_b64 s[14:15], s[2:3], exec
-; GFX940-NEXT: s_cselect_b32 s5, s1, s9
-; GFX940-NEXT: s_and_b64 s[10:11], s[10:11], exec
-; GFX940-NEXT: s_cselect_b32 s9, s0, s8
-; GFX940-NEXT: s_and_b64 s[0:1], s[12:13], exec
-; GFX940-NEXT: s_cselect_b32 s4, s4, s9
-; GFX940-NEXT: s_and_b64 s[0:1], s[2:3], exec
-; GFX940-NEXT: s_cselect_b32 s4, s4, s8
+; GFX940-NEXT: v_min_f64 v[4:5], s[0:1], v[0:1]
+; GFX940-NEXT: v_cndmask_b32_e32 v3, v3, v6, vcc
+; GFX940-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc
+; GFX940-NEXT: v_cmp_u_f64_e32 vcc, s[0:1], v[0:1]
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e32 v1, v5, v6, vcc
+; GFX940-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc
; GFX940-NEXT: ;;#ASMSTART
-; GFX940-NEXT: ; use s[4:7]
+; GFX940-NEXT: ; use v[0:3]
; GFX940-NEXT: ;;#ASMEND
; GFX940-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: s_minimum_v2f64:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_cmp_lt_f64_e64 s12, s[6:7], s[10:11]
-; GFX10-NEXT: v_cmp_o_f64_e64 s14, s[6:7], s[10:11]
-; GFX10-NEXT: v_cmp_class_f64_e64 s15, s[6:7], 32
-; GFX10-NEXT: v_cmp_class_f64_e64 s16, s[10:11], 32
-; GFX10-NEXT: v_cmp_o_f64_e64 s18, s[4:5], s[8:9]
-; GFX10-NEXT: v_cmp_class_f64_e64 s19, s[4:5], 32
-; GFX10-NEXT: v_cmp_class_f64_e64 s20, s[8:9], 32
-; GFX10-NEXT: s_and_b32 s13, s12, exec_lo
-; GFX10-NEXT: s_cselect_b32 s13, s7, s11
-; GFX10-NEXT: s_and_b32 s17, s14, exec_lo
-; GFX10-NEXT: s_cselect_b32 s13, s13, 0x7ff80000
-; GFX10-NEXT: s_and_b32 s12, s12, exec_lo
-; GFX10-NEXT: s_cselect_b32 s12, s6, s10
-; GFX10-NEXT: s_and_b32 s14, s14, exec_lo
-; GFX10-NEXT: s_cselect_b32 s12, s12, 0
-; GFX10-NEXT: v_cmp_lt_f64_e64 s17, s[4:5], s[8:9]
-; GFX10-NEXT: v_cmp_eq_f64_e64 s14, s[12:13], 0
-; GFX10-NEXT: s_and_b32 s21, s15, exec_lo
-; GFX10-NEXT: s_cselect_b32 s7, s7, s13
-; GFX10-NEXT: s_and_b32 s21, s16, exec_lo
-; GFX10-NEXT: s_cselect_b32 s7, s11, s7
-; GFX10-NEXT: s_and_b32 s11, s14, exec_lo
-; GFX10-NEXT: s_cselect_b32 s7, s7, s13
-; GFX10-NEXT: s_and_b32 s11, s15, exec_lo
-; GFX10-NEXT: s_cselect_b32 s6, s6, s12
-; GFX10-NEXT: s_and_b32 s11, s16, exec_lo
-; GFX10-NEXT: s_cselect_b32 s6, s10, s6
-; GFX10-NEXT: s_and_b32 s10, s14, exec_lo
-; GFX10-NEXT: s_cselect_b32 s6, s6, s12
-; GFX10-NEXT: s_and_b32 s10, s17, exec_lo
-; GFX10-NEXT: s_cselect_b32 s10, s5, s9
-; GFX10-NEXT: s_and_b32 s11, s18, exec_lo
-; GFX10-NEXT: s_cselect_b32 s11, s10, 0x7ff80000
-; GFX10-NEXT: s_and_b32 s10, s17, exec_lo
-; GFX10-NEXT: s_cselect_b32 s10, s4, s8
-; GFX10-NEXT: s_and_b32 s12, s18, exec_lo
-; GFX10-NEXT: s_cselect_b32 s10, s10, 0
-; GFX10-NEXT: s_and_b32 s13, s19, exec_lo
-; GFX10-NEXT: v_cmp_eq_f64_e64 s12, s[10:11], 0
-; GFX10-NEXT: s_cselect_b32 s5, s5, s11
-; GFX10-NEXT: s_and_b32 s13, s20, exec_lo
-; GFX10-NEXT: s_cselect_b32 s5, s9, s5
-; GFX10-NEXT: s_and_b32 s9, s12, exec_lo
-; GFX10-NEXT: s_cselect_b32 s5, s5, s11
-; GFX10-NEXT: s_and_b32 s9, s19, exec_lo
-; GFX10-NEXT: s_cselect_b32 s4, s4, s10
-; GFX10-NEXT: s_and_b32 s9, s20, exec_lo
-; GFX10-NEXT: s_cselect_b32 s4, s8, s4
-; GFX10-NEXT: s_and_b32 s8, s12, exec_lo
-; GFX10-NEXT: s_cselect_b32 s4, s4, s10
+; GFX10-NEXT: v_min_f64 v[0:1], s[6:7], s[10:11]
+; GFX10-NEXT: v_cmp_u_f64_e64 s6, s[6:7], s[10:11]
+; GFX10-NEXT: v_min_f64 v[4:5], s[4:5], s[8:9]
+; GFX10-NEXT: v_cmp_u_f64_e64 s4, s[4:5], s[8:9]
+; GFX10-NEXT: v_cndmask_b32_e64 v3, v1, 0x7ff80000, s6
+; GFX10-NEXT: v_cndmask_b32_e64 v2, v0, 0, s6
+; GFX10-NEXT: v_cndmask_b32_e64 v1, v5, 0x7ff80000, s4
+; GFX10-NEXT: v_cndmask_b32_e64 v0, v4, 0, s4
; GFX10-NEXT: ;;#ASMSTART
-; GFX10-NEXT: ; use s[4:7]
+; GFX10-NEXT: ; use v[0:3]
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: s_minimum_v2f64:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_cmp_lt_f64_e64 s8, s[2:3], s[6:7]
-; GFX11-NEXT: v_cmp_o_f64_e64 s10, s[2:3], s[6:7]
-; GFX11-NEXT: v_cmp_class_f64_e64 s11, s[2:3], 32
-; GFX11-NEXT: v_cmp_class_f64_e64 s12, s[6:7], 32
-; GFX11-NEXT: v_cmp_o_f64_e64 s14, s[0:1], s[4:5]
-; GFX11-NEXT: v_cmp_class_f64_e64 s15, s[0:1], 32
-; GFX11-NEXT: v_cmp_class_f64_e64 s16, s[4:5], 32
-; GFX11-NEXT: s_and_b32 s9, s8, exec_lo
-; GFX11-NEXT: s_cselect_b32 s9, s3, s7
-; GFX11-NEXT: s_and_b32 s13, s10, exec_lo
-; GFX11-NEXT: s_cselect_b32 s9, s9, 0x7ff80000
-; GFX11-NEXT: s_and_b32 s8, s8, exec_lo
-; GFX11-NEXT: s_cselect_b32 s8, s2, s6
-; GFX11-NEXT: s_and_b32 s10, s10, exec_lo
-; GFX11-NEXT: s_cselect_b32 s8, s8, 0
-; GFX11-NEXT: v_cmp_lt_f64_e64 s13, s[0:1], s[4:5]
-; GFX11-NEXT: v_cmp_eq_f64_e64 s10, s[8:9], 0
-; GFX11-NEXT: s_and_b32 s17, s11, exec_lo
-; GFX11-NEXT: s_cselect_b32 s3, s3, s9
-; GFX11-NEXT: s_and_b32 s17, s12, exec_lo
-; GFX11-NEXT: s_cselect_b32 s3, s7, s3
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT: s_and_b32 s7, s10, exec_lo
-; GFX11-NEXT: s_cselect_b32 s3, s3, s9
-; GFX11-NEXT: s_and_b32 s7, s11, exec_lo
-; GFX11-NEXT: s_cselect_b32 s2, s2, s8
-; GFX11-NEXT: s_and_b32 s7, s12, exec_lo
-; GFX11-NEXT: s_cselect_b32 s2, s6, s2
-; GFX11-NEXT: s_and_b32 s6, s10, exec_lo
-; GFX11-NEXT: s_cselect_b32 s2, s2, s8
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX11-NEXT: s_and_b32 s6, s13, exec_lo
-; GFX11-NEXT: s_cselect_b32 s6, s1, s5
-; GFX11-NEXT: s_and_b32 s7, s14, exec_lo
-; GFX11-NEXT: s_cselect_b32 s7, s6, 0x7ff80000
-; GFX11-NEXT: s_and_b32 s6, s13, exec_lo
-; GFX11-NEXT: s_cselect_b32 s6, s0, s4
-; GFX11-NEXT: s_and_b32 s8, s14, exec_lo
-; GFX11-NEXT: s_cselect_b32 s6, s6, 0
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_2) | instid1(VALU_DEP_4)
-; GFX11-NEXT: s_and_b32 s9, s15, exec_lo
-; GFX11-NEXT: v_cmp_eq_f64_e64 s8, s[6:7], 0
-; GFX11-NEXT: s_cselect_b32 s1, s1, s7
-; GFX11-NEXT: s_and_b32 s9, s16, exec_lo
-; GFX11-NEXT: s_cselect_b32 s1, s5, s1
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT: s_and_b32 s5, s8, exec_lo
-; GFX11-NEXT: s_cselect_b32 s1, s1, s7
-; GFX11-NEXT: s_and_b32 s5, s15, exec_lo
-; GFX11-NEXT: s_cselect_b32 s0, s0, s6
-; GFX11-NEXT: s_and_b32 s5, s16, exec_lo
-; GFX11-NEXT: s_cselect_b32 s0, s4, s0
-; GFX11-NEXT: s_and_b32 s4, s8, exec_lo
-; GFX11-NEXT: s_cselect_b32 s0, s0, s6
+; GFX11-NEXT: v_min_f64 v[0:1], s[2:3], s[6:7]
+; GFX11-NEXT: v_cmp_u_f64_e64 s2, s[2:3], s[6:7]
+; GFX11-NEXT: v_min_f64 v[4:5], s[0:1], s[4:5]
+; GFX11-NEXT: v_cmp_u_f64_e64 s0, s[0:1], s[4:5]
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3)
+; GFX11-NEXT: v_cndmask_b32_e64 v3, v1, 0x7ff80000, s2
+; GFX11-NEXT: v_cndmask_b32_e64 v2, v0, 0, s2
+; GFX11-NEXT: v_cndmask_b32_e64 v1, v5, 0x7ff80000, s0
+; GFX11-NEXT: v_cndmask_b32_e64 v0, v4, 0, s0
; GFX11-NEXT: ;;#ASMSTART
-; GFX11-NEXT: ; use s[0:3]
+; GFX11-NEXT: ; use v[0:3]
; GFX11-NEXT: ;;#ASMEND
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
@@ -1554,306 +979,110 @@ define <3 x double> @v_minimum_v3f64(<3 x double> %src0, <3 x double> %src1) {
; GFX7-LABEL: v_minimum_v3f64:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[6:7]
-; GFX7-NEXT: v_cmp_o_f64_e64 s[4:5], v[0:1], v[6:7]
-; GFX7-NEXT: v_mov_b32_e32 v14, 0x7ff80000
-; GFX7-NEXT: v_cmp_lt_f64_e64 s[8:9], v[2:3], v[8:9]
-; GFX7-NEXT: v_cmp_o_f64_e64 s[10:11], v[2:3], v[8:9]
-; GFX7-NEXT: v_cndmask_b32_e32 v12, v7, v1, vcc
-; GFX7-NEXT: v_cndmask_b32_e64 v13, v14, v12, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e32 v12, v6, v0, vcc
-; GFX7-NEXT: v_cmp_class_f64_e64 vcc, v[0:1], 32
-; GFX7-NEXT: v_cndmask_b32_e64 v12, 0, v12, s[4:5]
-; GFX7-NEXT: v_cmp_class_f64_e64 s[4:5], v[6:7], 32
-; GFX7-NEXT: v_cmp_eq_f64_e64 s[6:7], 0, v[12:13]
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v12, v0, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v13, v1, vcc
-; GFX7-NEXT: v_cndmask_b32_e64 v0, v0, v6, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v1, v1, v7, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v6, v9, v3, s[8:9]
-; GFX7-NEXT: v_cndmask_b32_e64 v0, v12, v0, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v1, v13, v1, s[6:7]
-; GFX7-NEXT: v_cmp_class_f64_e64 s[6:7], v[2:3], 32
-; GFX7-NEXT: v_cndmask_b32_e64 v7, v14, v6, s[10:11]
-; GFX7-NEXT: v_cndmask_b32_e64 v6, v8, v2, s[8:9]
-; GFX7-NEXT: v_cmp_lt_f64_e32 vcc, v[4:5], v[10:11]
-; GFX7-NEXT: v_cmp_class_f64_e64 s[8:9], v[8:9], 32
-; GFX7-NEXT: v_cmp_o_f64_e64 s[4:5], v[4:5], v[10:11]
-; GFX7-NEXT: v_cndmask_b32_e64 v6, 0, v6, s[10:11]
-; GFX7-NEXT: v_cmp_eq_f64_e64 s[10:11], 0, v[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v2, v6, v2, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v3, v7, v3, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e32 v12, v11, v5, vcc
-; GFX7-NEXT: v_cndmask_b32_e64 v2, v2, v8, s[8:9]
-; GFX7-NEXT: v_cndmask_b32_e32 v8, v10, v4, vcc
-; GFX7-NEXT: v_cmp_class_f64_e64 vcc, v[4:5], 32
-; GFX7-NEXT: v_cndmask_b32_e64 v13, v14, v12, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v12, 0, v8, s[4:5]
-; GFX7-NEXT: v_cmp_class_f64_e64 s[4:5], v[10:11], 32
-; GFX7-NEXT: v_cmp_eq_f64_e64 s[12:13], 0, v[12:13]
-; GFX7-NEXT: v_cndmask_b32_e64 v3, v3, v9, s[8:9]
-; GFX7-NEXT: v_cndmask_b32_e64 v2, v6, v2, s[10:11]
-; GFX7-NEXT: v_cndmask_b32_e64 v3, v7, v3, s[10:11]
-; GFX7-NEXT: v_cndmask_b32_e32 v4, v12, v4, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v5, v13, v5, vcc
-; GFX7-NEXT: v_cndmask_b32_e64 v4, v4, v10, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v5, v5, v11, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v4, v12, v4, s[12:13]
-; GFX7-NEXT: v_cndmask_b32_e64 v5, v13, v5, s[12:13]
+; GFX7-NEXT: v_min_f64 v[12:13], v[0:1], v[6:7]
+; GFX7-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[6:7]
+; GFX7-NEXT: v_min_f64 v[6:7], v[2:3], v[8:9]
+; GFX7-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[8:9]
+; GFX7-NEXT: v_min_f64 v[8:9], v[4:5], v[10:11]
+; GFX7-NEXT: v_cmp_u_f64_e64 s[6:7], v[4:5], v[10:11]
+; GFX7-NEXT: v_mov_b32_e32 v5, 0x7ff80000
+; GFX7-NEXT: v_cndmask_b32_e64 v0, v12, 0, vcc
+; GFX7-NEXT: v_cndmask_b32_e32 v1, v13, v5, vcc
+; GFX7-NEXT: v_cndmask_b32_e64 v2, v6, 0, s[4:5]
+; GFX7-NEXT: v_cndmask_b32_e64 v3, v7, v5, s[4:5]
+; GFX7-NEXT: v_cndmask_b32_e64 v4, v8, 0, s[6:7]
+; GFX7-NEXT: v_cndmask_b32_e64 v5, v9, v5, s[6:7]
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_minimum_v3f64:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[6:7]
-; GFX8-NEXT: v_cmp_o_f64_e64 s[4:5], v[0:1], v[6:7]
-; GFX8-NEXT: v_mov_b32_e32 v14, 0x7ff80000
-; GFX8-NEXT: v_cmp_lt_f64_e64 s[8:9], v[2:3], v[8:9]
-; GFX8-NEXT: v_cmp_o_f64_e64 s[10:11], v[2:3], v[8:9]
-; GFX8-NEXT: v_cndmask_b32_e32 v12, v7, v1, vcc
-; GFX8-NEXT: v_cndmask_b32_e64 v13, v14, v12, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e32 v12, v6, v0, vcc
-; GFX8-NEXT: v_cmp_class_f64_e64 vcc, v[0:1], 32
-; GFX8-NEXT: v_cndmask_b32_e64 v12, 0, v12, s[4:5]
-; GFX8-NEXT: v_cmp_class_f64_e64 s[4:5], v[6:7], 32
-; GFX8-NEXT: v_cmp_eq_f64_e64 s[6:7], 0, v[12:13]
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v12, v0, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v13, v1, vcc
-; GFX8-NEXT: v_cndmask_b32_e64 v0, v0, v6, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v1, v1, v7, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v6, v9, v3, s[8:9]
-; GFX8-NEXT: v_cndmask_b32_e64 v0, v12, v0, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v1, v13, v1, s[6:7]
-; GFX8-NEXT: v_cmp_class_f64_e64 s[6:7], v[2:3], 32
-; GFX8-NEXT: v_cndmask_b32_e64 v7, v14, v6, s[10:11]
-; GFX8-NEXT: v_cndmask_b32_e64 v6, v8, v2, s[8:9]
-; GFX8-NEXT: v_cmp_lt_f64_e32 vcc, v[4:5], v[10:11]
-; GFX8-NEXT: v_cmp_class_f64_e64 s[8:9], v[8:9], 32
-; GFX8-NEXT: v_cmp_o_f64_e64 s[4:5], v[4:5], v[10:11]
-; GFX8-NEXT: v_cndmask_b32_e64 v6, 0, v6, s[10:11]
-; GFX8-NEXT: v_cmp_eq_f64_e64 s[10:11], 0, v[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v2, v6, v2, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v3, v7, v3, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e32 v12, v11, v5, vcc
-; GFX8-NEXT: v_cndmask_b32_e64 v2, v2, v8, s[8:9]
-; GFX8-NEXT: v_cndmask_b32_e32 v8, v10, v4, vcc
-; GFX8-NEXT: v_cmp_class_f64_e64 vcc, v[4:5], 32
-; GFX8-NEXT: v_cndmask_b32_e64 v13, v14, v12, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v12, 0, v8, s[4:5]
-; GFX8-NEXT: v_cmp_class_f64_e64 s[4:5], v[10:11], 32
-; GFX8-NEXT: v_cmp_eq_f64_e64 s[12:13], 0, v[12:13]
-; GFX8-NEXT: v_cndmask_b32_e64 v3, v3, v9, s[8:9]
-; GFX8-NEXT: v_cndmask_b32_e64 v2, v6, v2, s[10:11]
-; GFX8-NEXT: v_cndmask_b32_e64 v3, v7, v3, s[10:11]
-; GFX8-NEXT: v_cndmask_b32_e32 v4, v12, v4, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v5, v13, v5, vcc
-; GFX8-NEXT: v_cndmask_b32_e64 v4, v4, v10, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v5, v5, v11, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v4, v12, v4, s[12:13]
-; GFX8-NEXT: v_cndmask_b32_e64 v5, v13, v5, s[12:13]
+; GFX8-NEXT: v_min_f64 v[12:13], v[0:1], v[6:7]
+; GFX8-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[6:7]
+; GFX8-NEXT: v_min_f64 v[6:7], v[2:3], v[8:9]
+; GFX8-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[8:9]
+; GFX8-NEXT: v_min_f64 v[8:9], v[4:5], v[10:11]
+; GFX8-NEXT: v_cmp_u_f64_e64 s[6:7], v[4:5], v[10:11]
+; GFX8-NEXT: v_mov_b32_e32 v5, 0x7ff80000
+; GFX8-NEXT: v_cndmask_b32_e64 v0, v12, 0, vcc
+; GFX8-NEXT: v_cndmask_b32_e32 v1, v13, v5, vcc
+; GFX8-NEXT: v_cndmask_b32_e64 v2, v6, 0, s[4:5]
+; GFX8-NEXT: v_cndmask_b32_e64 v3, v7, v5, s[4:5]
+; GFX8-NEXT: v_cndmask_b32_e64 v4, v8, 0, s[6:7]
+; GFX8-NEXT: v_cndmask_b32_e64 v5, v9, v5, s[6:7]
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_minimum_v3f64:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[6:7]
-; GFX9-NEXT: v_cmp_o_f64_e64 s[4:5], v[0:1], v[6:7]
-; GFX9-NEXT: v_mov_b32_e32 v14, 0x7ff80000
-; GFX9-NEXT: v_cmp_lt_f64_e64 s[8:9], v[2:3], v[8:9]
-; GFX9-NEXT: v_cmp_o_f64_e64 s[10:11], v[2:3], v[8:9]
-; GFX9-NEXT: v_cndmask_b32_e32 v12, v7, v1, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v13, v14, v12, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e32 v12, v6, v0, vcc
-; GFX9-NEXT: v_cmp_class_f64_e64 vcc, v[0:1], 32
-; GFX9-NEXT: v_cndmask_b32_e64 v12, 0, v12, s[4:5]
-; GFX9-NEXT: v_cmp_class_f64_e64 s[4:5], v[6:7], 32
-; GFX9-NEXT: v_cmp_eq_f64_e64 s[6:7], 0, v[12:13]
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v12, v0, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v13, v1, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v0, v6, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v1, v1, v7, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v6, v9, v3, s[8:9]
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v12, v0, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v1, v13, v1, s[6:7]
-; GFX9-NEXT: v_cmp_class_f64_e64 s[6:7], v[2:3], 32
-; GFX9-NEXT: v_cndmask_b32_e64 v7, v14, v6, s[10:11]
-; GFX9-NEXT: v_cndmask_b32_e64 v6, v8, v2, s[8:9]
-; GFX9-NEXT: v_cmp_lt_f64_e32 vcc, v[4:5], v[10:11]
-; GFX9-NEXT: v_cmp_class_f64_e64 s[8:9], v[8:9], 32
-; GFX9-NEXT: v_cmp_o_f64_e64 s[4:5], v[4:5], v[10:11]
-; GFX9-NEXT: v_cndmask_b32_e64 v6, 0, v6, s[10:11]
-; GFX9-NEXT: v_cmp_eq_f64_e64 s[10:11], 0, v[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v2, v6, v2, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v3, v7, v3, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e32 v12, v11, v5, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v2, v2, v8, s[8:9]
-; GFX9-NEXT: v_cndmask_b32_e32 v8, v10, v4, vcc
-; GFX9-NEXT: v_cmp_class_f64_e64 vcc, v[4:5], 32
-; GFX9-NEXT: v_cndmask_b32_e64 v13, v14, v12, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v12, 0, v8, s[4:5]
-; GFX9-NEXT: v_cmp_class_f64_e64 s[4:5], v[10:11], 32
-; GFX9-NEXT: v_cmp_eq_f64_e64 s[12:13], 0, v[12:13]
-; GFX9-NEXT: v_cndmask_b32_e64 v3, v3, v9, s[8:9]
-; GFX9-NEXT: v_cndmask_b32_e64 v2, v6, v2, s[10:11]
-; GFX9-NEXT: v_cndmask_b32_e64 v3, v7, v3, s[10:11]
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v12, v4, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v5, v13, v5, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v4, v4, v10, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v5, v5, v11, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v4, v12, v4, s[12:13]
-; GFX9-NEXT: v_cndmask_b32_e64 v5, v13, v5, s[12:13]
+; GFX9-NEXT: v_min_f64 v[12:13], v[0:1], v[6:7]
+; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[6:7]
+; GFX9-NEXT: v_min_f64 v[6:7], v[2:3], v[8:9]
+; GFX9-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[8:9]
+; GFX9-NEXT: v_min_f64 v[8:9], v[4:5], v[10:11]
+; GFX9-NEXT: v_cmp_u_f64_e64 s[6:7], v[4:5], v[10:11]
+; GFX9-NEXT: v_mov_b32_e32 v5, 0x7ff80000
+; GFX9-NEXT: v_cndmask_b32_e64 v0, v12, 0, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v13, v5, vcc
+; GFX9-NEXT: v_cndmask_b32_e64 v2, v6, 0, s[4:5]
+; GFX9-NEXT: v_cndmask_b32_e64 v3, v7, v5, s[4:5]
+; GFX9-NEXT: v_cndmask_b32_e64 v4, v8, 0, s[6:7]
+; GFX9-NEXT: v_cndmask_b32_e64 v5, v9, v5, s[6:7]
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX940-LABEL: v_minimum_v3f64:
; GFX940: ; %bb.0:
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[6:7]
-; GFX940-NEXT: v_mov_b32_e32 v14, 0x7ff80000
-; GFX940-NEXT: v_cmp_o_f64_e64 s[0:1], v[0:1], v[6:7]
-; GFX940-NEXT: v_cndmask_b32_e32 v12, v7, v1, vcc
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e64 v13, v14, v12, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v12, v6, v0, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v12, 0, v12, s[0:1]
-; GFX940-NEXT: v_cmp_class_f64_e64 vcc, v[0:1], 32
-; GFX940-NEXT: v_cmp_class_f64_e64 s[0:1], v[6:7], 32
-; GFX940-NEXT: v_cmp_eq_f64_e64 s[2:3], 0, v[12:13]
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v12, v0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v13, v1, vcc
-; GFX940-NEXT: v_cmp_lt_f64_e32 vcc, v[2:3], v[8:9]
-; GFX940-NEXT: v_cndmask_b32_e64 v0, v0, v6, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v1, v1, v7, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v6, v9, v3, vcc
-; GFX940-NEXT: v_cmp_o_f64_e64 s[0:1], v[2:3], v[8:9]
-; GFX940-NEXT: v_cndmask_b32_e64 v0, v12, v0, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v1, v13, v1, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v7, v14, v6, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v6, v8, v2, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v6, 0, v6, s[0:1]
-; GFX940-NEXT: v_cmp_class_f64_e64 vcc, v[2:3], 32
-; GFX940-NEXT: v_cmp_class_f64_e64 s[0:1], v[8:9], 32
-; GFX940-NEXT: v_cmp_eq_f64_e64 s[2:3], 0, v[6:7]
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v6, v2, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v2, v2, v8, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v7, v3, vcc
-; GFX940-NEXT: v_cmp_lt_f64_e32 vcc, v[4:5], v[10:11]
-; GFX940-NEXT: v_cndmask_b32_e64 v2, v6, v2, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v3, v3, v9, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v6, v11, v5, vcc
-; GFX940-NEXT: v_cmp_o_f64_e64 s[0:1], v[4:5], v[10:11]
-; GFX940-NEXT: v_cndmask_b32_e64 v3, v7, v3, s[2:3]
+; GFX940-NEXT: v_min_f64 v[12:13], v[0:1], v[6:7]
+; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[6:7]
+; GFX940-NEXT: v_min_f64 v[6:7], v[2:3], v[8:9]
; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e64 v7, v14, v6, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v6, v10, v4, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v6, 0, v6, s[0:1]
-; GFX940-NEXT: v_cmp_class_f64_e64 vcc, v[4:5], 32
-; GFX940-NEXT: v_cmp_class_f64_e64 s[0:1], v[10:11], 32
-; GFX940-NEXT: v_cmp_eq_f64_e64 s[2:3], 0, v[6:7]
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v6, v4, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v5, v7, v5, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v4, v4, v10, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v5, v5, v11, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v4, v6, v4, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v5, v7, v5, s[2:3]
+; GFX940-NEXT: v_cndmask_b32_e64 v0, v12, 0, vcc
+; GFX940-NEXT: v_mov_b32_e32 v12, 0x7ff80000
+; GFX940-NEXT: v_cndmask_b32_e32 v1, v13, v12, vcc
+; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[8:9]
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e64 v2, v6, 0, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v3, v7, v12, vcc
+; GFX940-NEXT: v_min_f64 v[6:7], v[4:5], v[10:11]
+; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[4:5], v[10:11]
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e64 v4, v6, 0, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v5, v7, v12, vcc
; GFX940-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimum_v3f64:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_cmp_lt_f64_e32 vcc_lo, v[0:1], v[6:7]
-; GFX10-NEXT: v_cmp_lt_f64_e64 s4, v[2:3], v[8:9]
-; GFX10-NEXT: v_cmp_lt_f64_e64 s5, v[4:5], v[10:11]
-; GFX10-NEXT: v_cmp_o_f64_e64 s6, v[0:1], v[6:7]
-; GFX10-NEXT: v_cmp_o_f64_e64 s7, v[2:3], v[8:9]
-; GFX10-NEXT: v_cmp_o_f64_e64 s8, v[4:5], v[10:11]
-; GFX10-NEXT: v_cndmask_b32_e32 v12, v7, v1, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v14, v9, v3, s4
-; GFX10-NEXT: v_cndmask_b32_e64 v16, v11, v5, s5
-; GFX10-NEXT: v_cndmask_b32_e32 v17, v6, v0, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v18, v8, v2, s4
-; GFX10-NEXT: v_cndmask_b32_e64 v19, v10, v4, s5
-; GFX10-NEXT: v_cmp_class_f64_e64 vcc_lo, v[0:1], 32
-; GFX10-NEXT: v_cmp_class_f64_e64 s4, v[2:3], 32
-; GFX10-NEXT: v_cmp_class_f64_e64 s5, v[4:5], 32
-; GFX10-NEXT: v_cndmask_b32_e64 v13, 0x7ff80000, v12, s6
-; GFX10-NEXT: v_cndmask_b32_e64 v15, 0x7ff80000, v14, s7
-; GFX10-NEXT: v_cndmask_b32_e64 v12, 0, v17, s6
-; GFX10-NEXT: v_cndmask_b32_e64 v17, 0x7ff80000, v16, s8
-; GFX10-NEXT: v_cndmask_b32_e64 v14, 0, v18, s7
-; GFX10-NEXT: v_cndmask_b32_e64 v16, 0, v19, s8
-; GFX10-NEXT: v_cmp_class_f64_e64 s6, v[8:9], 32
-; GFX10-NEXT: v_cmp_class_f64_e64 s7, v[6:7], 32
-; GFX10-NEXT: v_cmp_class_f64_e64 s8, v[10:11], 32
-; GFX10-NEXT: v_cmp_eq_f64_e64 s9, 0, v[12:13]
-; GFX10-NEXT: v_cmp_eq_f64_e64 s10, 0, v[14:15]
-; GFX10-NEXT: v_cmp_eq_f64_e64 s11, 0, v[16:17]
-; GFX10-NEXT: v_cndmask_b32_e32 v0, v12, v0, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v2, v14, v2, s4
-; GFX10-NEXT: v_cndmask_b32_e64 v4, v16, v4, s5
-; GFX10-NEXT: v_cndmask_b32_e32 v1, v13, v1, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v3, v15, v3, s4
-; GFX10-NEXT: v_cndmask_b32_e64 v5, v17, v5, s5
-; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, v8, s6
-; GFX10-NEXT: v_cndmask_b32_e64 v0, v0, v6, s7
-; GFX10-NEXT: v_cndmask_b32_e64 v4, v4, v10, s8
-; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, v7, s7
-; GFX10-NEXT: v_cndmask_b32_e64 v3, v3, v9, s6
-; GFX10-NEXT: v_cndmask_b32_e64 v5, v5, v11, s8
-; GFX10-NEXT: v_cndmask_b32_e64 v0, v12, v0, s9
-; GFX10-NEXT: v_cndmask_b32_e64 v2, v14, v2, s10
-; GFX10-NEXT: v_cndmask_b32_e64 v4, v16, v4, s11
-; GFX10-NEXT: v_cndmask_b32_e64 v1, v13, v1, s9
-; GFX10-NEXT: v_cndmask_b32_e64 v3, v15, v3, s10
-; GFX10-NEXT: v_cndmask_b32_e64 v5, v17, v5, s11
+; GFX10-NEXT: v_min_f64 v[12:13], v[0:1], v[6:7]
+; GFX10-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[6:7]
+; GFX10-NEXT: v_min_f64 v[6:7], v[2:3], v[8:9]
+; GFX10-NEXT: v_cmp_u_f64_e64 s4, v[2:3], v[8:9]
+; GFX10-NEXT: v_min_f64 v[8:9], v[4:5], v[10:11]
+; GFX10-NEXT: v_cmp_u_f64_e64 s5, v[4:5], v[10:11]
+; GFX10-NEXT: v_cndmask_b32_e64 v0, v12, 0, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e64 v1, v13, 0x7ff80000, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e64 v2, v6, 0, s4
+; GFX10-NEXT: v_cndmask_b32_e64 v3, v7, 0x7ff80000, s4
+; GFX10-NEXT: v_cndmask_b32_e64 v4, v8, 0, s5
+; GFX10-NEXT: v_cndmask_b32_e64 v5, v9, 0x7ff80000, s5
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_minimum_v3f64:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_cmp_lt_f64_e32 vcc_lo, v[0:1], v[6:7]
-; GFX11-NEXT: v_cmp_lt_f64_e64 s0, v[2:3], v[8:9]
-; GFX11-NEXT: v_cmp_lt_f64_e64 s1, v[4:5], v[10:11]
-; GFX11-NEXT: v_cmp_o_f64_e64 s2, v[0:1], v[6:7]
-; GFX11-NEXT: v_cmp_o_f64_e64 s3, v[2:3], v[8:9]
-; GFX11-NEXT: v_cmp_o_f64_e64 s4, v[4:5], v[10:11]
-; GFX11-NEXT: v_dual_cndmask_b32 v12, v7, v1 :: v_dual_cndmask_b32 v17, v6, v0
-; GFX11-NEXT: v_cndmask_b32_e64 v14, v9, v3, s0
-; GFX11-NEXT: v_cndmask_b32_e64 v16, v11, v5, s1
-; GFX11-NEXT: v_cndmask_b32_e64 v18, v8, v2, s0
-; GFX11-NEXT: v_cndmask_b32_e64 v19, v10, v4, s1
-; GFX11-NEXT: v_cmp_class_f64_e64 vcc_lo, v[0:1], 32
-; GFX11-NEXT: v_cmp_class_f64_e64 s0, v[2:3], 32
-; GFX11-NEXT: v_cmp_class_f64_e64 s1, v[4:5], 32
-; GFX11-NEXT: v_cndmask_b32_e64 v13, 0x7ff80000, v12, s2
-; GFX11-NEXT: v_cndmask_b32_e64 v15, 0x7ff80000, v14, s3
-; GFX11-NEXT: v_cndmask_b32_e64 v12, 0, v17, s2
-; GFX11-NEXT: v_cndmask_b32_e64 v17, 0x7ff80000, v16, s4
-; GFX11-NEXT: v_cndmask_b32_e64 v14, 0, v18, s3
-; GFX11-NEXT: v_cndmask_b32_e64 v16, 0, v19, s4
-; GFX11-NEXT: v_cmp_class_f64_e64 s2, v[8:9], 32
-; GFX11-NEXT: v_cmp_class_f64_e64 s3, v[6:7], 32
-; GFX11-NEXT: v_cmp_class_f64_e64 s4, v[10:11], 32
-; GFX11-NEXT: v_cmp_eq_f64_e64 s5, 0, v[12:13]
-; GFX11-NEXT: v_cmp_eq_f64_e64 s6, 0, v[14:15]
-; GFX11-NEXT: v_cmp_eq_f64_e64 s7, 0, v[16:17]
-; GFX11-NEXT: v_dual_cndmask_b32 v0, v12, v0 :: v_dual_cndmask_b32 v1, v13, v1
-; GFX11-NEXT: v_cndmask_b32_e64 v2, v14, v2, s0
-; GFX11-NEXT: v_cndmask_b32_e64 v4, v16, v4, s1
-; GFX11-NEXT: v_cndmask_b32_e64 v3, v15, v3, s0
-; GFX11-NEXT: v_cndmask_b32_e64 v5, v17, v5, s1
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4)
-; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, v8, s2
-; GFX11-NEXT: v_cndmask_b32_e64 v0, v0, v6, s3
-; GFX11-NEXT: v_cndmask_b32_e64 v4, v4, v10, s4
-; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, v7, s3
-; GFX11-NEXT: v_cndmask_b32_e64 v3, v3, v9, s2
-; GFX11-NEXT: v_cndmask_b32_e64 v5, v5, v11, s4
-; GFX11-NEXT: v_cndmask_b32_e64 v0, v12, v0, s5
-; GFX11-NEXT: v_cndmask_b32_e64 v2, v14, v2, s6
-; GFX11-NEXT: v_cndmask_b32_e64 v4, v16, v4, s7
-; GFX11-NEXT: v_cndmask_b32_e64 v1, v13, v1, s5
-; GFX11-NEXT: v_cndmask_b32_e64 v3, v15, v3, s6
-; GFX11-NEXT: v_cndmask_b32_e64 v5, v17, v5, s7
+; GFX11-NEXT: v_min_f64 v[12:13], v[0:1], v[6:7]
+; GFX11-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[6:7]
+; GFX11-NEXT: v_min_f64 v[6:7], v[2:3], v[8:9]
+; GFX11-NEXT: v_cmp_u_f64_e64 s0, v[2:3], v[8:9]
+; GFX11-NEXT: v_min_f64 v[8:9], v[4:5], v[10:11]
+; GFX11-NEXT: v_cmp_u_f64_e64 s1, v[4:5], v[10:11]
+; GFX11-NEXT: v_cndmask_b32_e64 v0, v12, 0, vcc_lo
+; GFX11-NEXT: v_cndmask_b32_e64 v1, v13, 0x7ff80000, vcc_lo
+; GFX11-NEXT: v_cndmask_b32_e64 v2, v6, 0, s0
+; GFX11-NEXT: v_cndmask_b32_e64 v3, v7, 0x7ff80000, s0
+; GFX11-NEXT: v_cndmask_b32_e64 v4, v8, 0, s1
+; GFX11-NEXT: v_cndmask_b32_e64 v5, v9, 0x7ff80000, s1
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_minimum_v3f64:
@@ -1875,247 +1104,49 @@ define <3 x double> @v_minimum_v3f64__nnan(<3 x double> %src0, <3 x double> %src
; GFX7-LABEL: v_minimum_v3f64__nnan:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[6:7]
-; GFX7-NEXT: v_cmp_class_f64_e64 s[4:5], v[6:7], 32
-; GFX7-NEXT: v_cmp_lt_f64_e64 s[8:9], v[2:3], v[8:9]
-; GFX7-NEXT: v_cmp_class_f64_e64 s[10:11], v[10:11], 32
-; GFX7-NEXT: v_cndmask_b32_e32 v13, v7, v1, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v12, v6, v0, vcc
-; GFX7-NEXT: v_cmp_class_f64_e64 vcc, v[0:1], 32
-; GFX7-NEXT: v_cmp_eq_f64_e64 s[6:7], 0, v[12:13]
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v12, v0, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v13, v1, vcc
-; GFX7-NEXT: v_cmp_lt_f64_e32 vcc, v[4:5], v[10:11]
-; GFX7-NEXT: v_cndmask_b32_e64 v0, v0, v6, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v1, v1, v7, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v0, v12, v0, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v1, v13, v1, s[6:7]
-; GFX7-NEXT: v_cmp_class_f64_e64 s[4:5], v[2:3], 32
-; GFX7-NEXT: v_cndmask_b32_e64 v7, v9, v3, s[8:9]
-; GFX7-NEXT: v_cndmask_b32_e64 v6, v8, v2, s[8:9]
-; GFX7-NEXT: v_cndmask_b32_e32 v13, v11, v5, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v12, v10, v4, vcc
-; GFX7-NEXT: v_cmp_class_f64_e64 vcc, v[4:5], 32
-; GFX7-NEXT: v_cmp_class_f64_e64 s[6:7], v[8:9], 32
-; GFX7-NEXT: v_cmp_eq_f64_e64 s[8:9], 0, v[6:7]
-; GFX7-NEXT: v_cmp_eq_f64_e64 s[12:13], 0, v[12:13]
-; GFX7-NEXT: v_cndmask_b32_e64 v2, v6, v2, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v3, v7, v3, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e32 v4, v12, v4, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v5, v13, v5, vcc
-; GFX7-NEXT: v_cndmask_b32_e64 v2, v2, v8, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v4, v4, v10, s[10:11]
-; GFX7-NEXT: v_cndmask_b32_e64 v3, v3, v9, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v5, v5, v11, s[10:11]
-; GFX7-NEXT: v_cndmask_b32_e64 v2, v6, v2, s[8:9]
-; GFX7-NEXT: v_cndmask_b32_e64 v3, v7, v3, s[8:9]
-; GFX7-NEXT: v_cndmask_b32_e64 v4, v12, v4, s[12:13]
-; GFX7-NEXT: v_cndmask_b32_e64 v5, v13, v5, s[12:13]
+; GFX7-NEXT: v_min_f64 v[0:1], v[0:1], v[6:7]
+; GFX7-NEXT: v_min_f64 v[2:3], v[2:3], v[8:9]
+; GFX7-NEXT: v_min_f64 v[4:5], v[4:5], v[10:11]
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_minimum_v3f64__nnan:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[6:7]
-; GFX8-NEXT: v_cmp_class_f64_e64 s[4:5], v[6:7], 32
-; GFX8-NEXT: v_cmp_lt_f64_e64 s[8:9], v[2:3], v[8:9]
-; GFX8-NEXT: v_cmp_class_f64_e64 s[10:11], v[10:11], 32
-; GFX8-NEXT: v_cndmask_b32_e32 v13, v7, v1, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v12, v6, v0, vcc
-; GFX8-NEXT: v_cmp_class_f64_e64 vcc, v[0:1], 32
-; GFX8-NEXT: v_cmp_eq_f64_e64 s[6:7], 0, v[12:13]
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v12, v0, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v13, v1, vcc
-; GFX8-NEXT: v_cmp_lt_f64_e32 vcc, v[4:5], v[10:11]
-; GFX8-NEXT: v_cndmask_b32_e64 v0, v0, v6, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v1, v1, v7, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v0, v12, v0, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v1, v13, v1, s[6:7]
-; GFX8-NEXT: v_cmp_class_f64_e64 s[4:5], v[2:3], 32
-; GFX8-NEXT: v_cndmask_b32_e64 v7, v9, v3, s[8:9]
-; GFX8-NEXT: v_cndmask_b32_e64 v6, v8, v2, s[8:9]
-; GFX8-NEXT: v_cndmask_b32_e32 v13, v11, v5, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v12, v10, v4, vcc
-; GFX8-NEXT: v_cmp_class_f64_e64 vcc, v[4:5], 32
-; GFX8-NEXT: v_cmp_class_f64_e64 s[6:7], v[8:9], 32
-; GFX8-NEXT: v_cmp_eq_f64_e64 s[8:9], 0, v[6:7]
-; GFX8-NEXT: v_cmp_eq_f64_e64 s[12:13], 0, v[12:13]
-; GFX8-NEXT: v_cndmask_b32_e64 v2, v6, v2, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v3, v7, v3, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e32 v4, v12, v4, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v5, v13, v5, vcc
-; GFX8-NEXT: v_cndmask_b32_e64 v2, v2, v8, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v4, v4, v10, s[10:11]
-; GFX8-NEXT: v_cndmask_b32_e64 v3, v3, v9, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v5, v5, v11, s[10:11]
-; GFX8-NEXT: v_cndmask_b32_e64 v2, v6, v2, s[8:9]
-; GFX8-NEXT: v_cndmask_b32_e64 v3, v7, v3, s[8:9]
-; GFX8-NEXT: v_cndmask_b32_e64 v4, v12, v4, s[12:13]
-; GFX8-NEXT: v_cndmask_b32_e64 v5, v13, v5, s[12:13]
+; GFX8-NEXT: v_min_f64 v[0:1], v[0:1], v[6:7]
+; GFX8-NEXT: v_min_f64 v[2:3], v[2:3], v[8:9]
+; GFX8-NEXT: v_min_f64 v[4:5], v[4:5], v[10:11]
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_minimum_v3f64__nnan:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[6:7]
-; GFX9-NEXT: v_cmp_class_f64_e64 s[4:5], v[6:7], 32
-; GFX9-NEXT: v_cmp_lt_f64_e64 s[8:9], v[2:3], v[8:9]
-; GFX9-NEXT: v_cmp_class_f64_e64 s[10:11], v[10:11], 32
-; GFX9-NEXT: v_cndmask_b32_e32 v13, v7, v1, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v12, v6, v0, vcc
-; GFX9-NEXT: v_cmp_class_f64_e64 vcc, v[0:1], 32
-; GFX9-NEXT: v_cmp_eq_f64_e64 s[6:7], 0, v[12:13]
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v12, v0, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v13, v1, vcc
-; GFX9-NEXT: v_cmp_lt_f64_e32 vcc, v[4:5], v[10:11]
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v0, v6, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v1, v1, v7, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v12, v0, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v1, v13, v1, s[6:7]
-; GFX9-NEXT: v_cmp_class_f64_e64 s[4:5], v[2:3], 32
-; GFX9-NEXT: v_cndmask_b32_e64 v7, v9, v3, s[8:9]
-; GFX9-NEXT: v_cndmask_b32_e64 v6, v8, v2, s[8:9]
-; GFX9-NEXT: v_cndmask_b32_e32 v13, v11, v5, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v12, v10, v4, vcc
-; GFX9-NEXT: v_cmp_class_f64_e64 vcc, v[4:5], 32
-; GFX9-NEXT: v_cmp_class_f64_e64 s[6:7], v[8:9], 32
-; GFX9-NEXT: v_cmp_eq_f64_e64 s[8:9], 0, v[6:7]
-; GFX9-NEXT: v_cmp_eq_f64_e64 s[12:13], 0, v[12:13]
-; GFX9-NEXT: v_cndmask_b32_e64 v2, v6, v2, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v3, v7, v3, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v12, v4, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v5, v13, v5, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v2, v2, v8, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v4, v4, v10, s[10:11]
-; GFX9-NEXT: v_cndmask_b32_e64 v3, v3, v9, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v5, v5, v11, s[10:11]
-; GFX9-NEXT: v_cndmask_b32_e64 v2, v6, v2, s[8:9]
-; GFX9-NEXT: v_cndmask_b32_e64 v3, v7, v3, s[8:9]
-; GFX9-NEXT: v_cndmask_b32_e64 v4, v12, v4, s[12:13]
-; GFX9-NEXT: v_cndmask_b32_e64 v5, v13, v5, s[12:13]
+; GFX9-NEXT: v_min_f64 v[0:1], v[0:1], v[6:7]
+; GFX9-NEXT: v_min_f64 v[2:3], v[2:3], v[8:9]
+; GFX9-NEXT: v_min_f64 v[4:5], v[4:5], v[10:11]
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX940-LABEL: v_minimum_v3f64__nnan:
; GFX940: ; %bb.0:
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[6:7]
-; GFX940-NEXT: v_cmp_class_f64_e64 s[0:1], v[6:7], 32
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v13, v7, v1, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v12, v6, v0, vcc
-; GFX940-NEXT: v_cmp_class_f64_e64 vcc, v[0:1], 32
-; GFX940-NEXT: v_cmp_eq_f64_e64 s[2:3], 0, v[12:13]
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v12, v0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v13, v1, vcc
-; GFX940-NEXT: v_cmp_lt_f64_e32 vcc, v[2:3], v[8:9]
-; GFX940-NEXT: v_cndmask_b32_e64 v0, v0, v6, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v1, v1, v7, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v7, v9, v3, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v6, v8, v2, vcc
-; GFX940-NEXT: v_cmp_class_f64_e64 vcc, v[2:3], 32
-; GFX940-NEXT: v_cmp_class_f64_e64 s[0:1], v[8:9], 32
-; GFX940-NEXT: v_cndmask_b32_e64 v0, v12, v0, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v6, v2, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v7, v3, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v1, v13, v1, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v2, v2, v8, s[0:1]
-; GFX940-NEXT: v_cmp_eq_f64_e64 s[2:3], 0, v[6:7]
-; GFX940-NEXT: v_cndmask_b32_e64 v3, v3, v9, s[0:1]
-; GFX940-NEXT: v_cmp_lt_f64_e32 vcc, v[4:5], v[10:11]
-; GFX940-NEXT: v_cndmask_b32_e64 v2, v6, v2, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v3, v7, v3, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e32 v7, v11, v5, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v6, v10, v4, vcc
-; GFX940-NEXT: v_cmp_class_f64_e64 vcc, v[4:5], 32
-; GFX940-NEXT: v_cmp_class_f64_e64 s[0:1], v[10:11], 32
-; GFX940-NEXT: v_cmp_eq_f64_e64 s[2:3], 0, v[6:7]
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v6, v4, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v5, v7, v5, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v4, v4, v10, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v5, v5, v11, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v4, v6, v4, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v5, v7, v5, s[2:3]
+; GFX940-NEXT: v_min_f64 v[0:1], v[0:1], v[6:7]
+; GFX940-NEXT: v_min_f64 v[2:3], v[2:3], v[8:9]
+; GFX940-NEXT: v_min_f64 v[4:5], v[4:5], v[10:11]
; GFX940-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimum_v3f64__nnan:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_cmp_lt_f64_e32 vcc_lo, v[0:1], v[6:7]
-; GFX10-NEXT: v_cmp_lt_f64_e64 s4, v[2:3], v[8:9]
-; GFX10-NEXT: v_cmp_lt_f64_e64 s5, v[4:5], v[10:11]
-; GFX10-NEXT: v_cmp_class_f64_e64 s6, v[8:9], 32
-; GFX10-NEXT: v_cmp_class_f64_e64 s7, v[6:7], 32
-; GFX10-NEXT: v_cmp_class_f64_e64 s8, v[10:11], 32
-; GFX10-NEXT: v_cndmask_b32_e32 v13, v7, v1, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v15, v9, v3, s4
-; GFX10-NEXT: v_cndmask_b32_e32 v12, v6, v0, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v17, v11, v5, s5
-; GFX10-NEXT: v_cndmask_b32_e64 v14, v8, v2, s4
-; GFX10-NEXT: v_cndmask_b32_e64 v16, v10, v4, s5
-; GFX10-NEXT: v_cmp_class_f64_e64 vcc_lo, v[0:1], 32
-; GFX10-NEXT: v_cmp_class_f64_e64 s4, v[2:3], 32
-; GFX10-NEXT: v_cmp_class_f64_e64 s5, v[4:5], 32
-; GFX10-NEXT: v_cmp_eq_f64_e64 s9, 0, v[12:13]
-; GFX10-NEXT: v_cmp_eq_f64_e64 s10, 0, v[14:15]
-; GFX10-NEXT: v_cmp_eq_f64_e64 s11, 0, v[16:17]
-; GFX10-NEXT: v_cndmask_b32_e32 v0, v12, v0, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v2, v14, v2, s4
-; GFX10-NEXT: v_cndmask_b32_e64 v4, v16, v4, s5
-; GFX10-NEXT: v_cndmask_b32_e32 v1, v13, v1, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v3, v15, v3, s4
-; GFX10-NEXT: v_cndmask_b32_e64 v5, v17, v5, s5
-; GFX10-NEXT: v_cndmask_b32_e64 v0, v0, v6, s7
-; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, v8, s6
-; GFX10-NEXT: v_cndmask_b32_e64 v4, v4, v10, s8
-; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, v7, s7
-; GFX10-NEXT: v_cndmask_b32_e64 v3, v3, v9, s6
-; GFX10-NEXT: v_cndmask_b32_e64 v5, v5, v11, s8
-; GFX10-NEXT: v_cndmask_b32_e64 v0, v12, v0, s9
-; GFX10-NEXT: v_cndmask_b32_e64 v2, v14, v2, s10
-; GFX10-NEXT: v_cndmask_b32_e64 v4, v16, v4, s11
-; GFX10-NEXT: v_cndmask_b32_e64 v1, v13, v1, s9
-; GFX10-NEXT: v_cndmask_b32_e64 v3, v15, v3, s10
-; GFX10-NEXT: v_cndmask_b32_e64 v5, v17, v5, s11
+; GFX10-NEXT: v_min_f64 v[0:1], v[0:1], v[6:7]
+; GFX10-NEXT: v_min_f64 v[2:3], v[2:3], v[8:9]
+; GFX10-NEXT: v_min_f64 v[4:5], v[4:5], v[10:11]
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_minimum_v3f64__nnan:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_cmp_lt_f64_e32 vcc_lo, v[0:1], v[6:7]
-; GFX11-NEXT: v_cmp_lt_f64_e64 s0, v[2:3], v[8:9]
-; GFX11-NEXT: v_cmp_lt_f64_e64 s1, v[4:5], v[10:11]
-; GFX11-NEXT: v_cmp_class_f64_e64 s2, v[8:9], 32
-; GFX11-NEXT: v_cmp_class_f64_e64 s3, v[6:7], 32
-; GFX11-NEXT: v_cmp_class_f64_e64 s4, v[10:11], 32
-; GFX11-NEXT: v_dual_cndmask_b32 v13, v7, v1 :: v_dual_cndmask_b32 v12, v6, v0
-; GFX11-NEXT: v_cndmask_b32_e64 v15, v9, v3, s0
-; GFX11-NEXT: v_cndmask_b32_e64 v17, v11, v5, s1
-; GFX11-NEXT: v_cndmask_b32_e64 v14, v8, v2, s0
-; GFX11-NEXT: v_cndmask_b32_e64 v16, v10, v4, s1
-; GFX11-NEXT: v_cmp_class_f64_e64 vcc_lo, v[0:1], 32
-; GFX11-NEXT: v_cmp_class_f64_e64 s0, v[2:3], 32
-; GFX11-NEXT: v_cmp_class_f64_e64 s1, v[4:5], 32
-; GFX11-NEXT: v_cmp_eq_f64_e64 s5, 0, v[12:13]
-; GFX11-NEXT: v_cmp_eq_f64_e64 s6, 0, v[14:15]
-; GFX11-NEXT: v_cmp_eq_f64_e64 s7, 0, v[16:17]
-; GFX11-NEXT: v_dual_cndmask_b32 v0, v12, v0 :: v_dual_cndmask_b32 v1, v13, v1
-; GFX11-NEXT: v_cndmask_b32_e64 v2, v14, v2, s0
-; GFX11-NEXT: v_cndmask_b32_e64 v4, v16, v4, s1
-; GFX11-NEXT: v_cndmask_b32_e64 v3, v15, v3, s0
-; GFX11-NEXT: v_cndmask_b32_e64 v5, v17, v5, s1
-; GFX11-NEXT: v_cndmask_b32_e64 v0, v0, v6, s3
-; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, v8, s2
-; GFX11-NEXT: v_cndmask_b32_e64 v4, v4, v10, s4
-; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, v7, s3
-; GFX11-NEXT: v_cndmask_b32_e64 v3, v3, v9, s2
-; GFX11-NEXT: v_cndmask_b32_e64 v5, v5, v11, s4
-; GFX11-NEXT: v_cndmask_b32_e64 v0, v12, v0, s5
-; GFX11-NEXT: v_cndmask_b32_e64 v2, v14, v2, s6
-; GFX11-NEXT: v_cndmask_b32_e64 v4, v16, v4, s7
-; GFX11-NEXT: v_cndmask_b32_e64 v1, v13, v1, s5
-; GFX11-NEXT: v_cndmask_b32_e64 v3, v15, v3, s6
-; GFX11-NEXT: v_cndmask_b32_e64 v5, v17, v5, s7
+; GFX11-NEXT: v_min_f64 v[0:1], v[0:1], v[6:7]
+; GFX11-NEXT: v_min_f64 v[2:3], v[2:3], v[8:9]
+; GFX11-NEXT: v_min_f64 v[4:5], v[4:5], v[10:11]
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_minimum_v3f64__nnan:
@@ -2137,144 +1168,110 @@ define <3 x double> @v_minimum_v3f64__nsz(<3 x double> %src0, <3 x double> %src1
; GFX7-LABEL: v_minimum_v3f64__nsz:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[6:7]
-; GFX7-NEXT: v_cmp_o_f64_e64 s[4:5], v[0:1], v[6:7]
-; GFX7-NEXT: v_cmp_lt_f64_e64 s[8:9], v[4:5], v[10:11]
-; GFX7-NEXT: v_cmp_o_f64_e64 s[6:7], v[2:3], v[8:9]
-; GFX7-NEXT: v_cmp_o_f64_e64 s[10:11], v[4:5], v[10:11]
-; GFX7-NEXT: v_cndmask_b32_e32 v12, v6, v0, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc
-; GFX7-NEXT: v_cmp_lt_f64_e32 vcc, v[2:3], v[8:9]
-; GFX7-NEXT: v_cndmask_b32_e64 v7, v10, v4, s[8:9]
-; GFX7-NEXT: v_cndmask_b32_e64 v5, v11, v5, s[8:9]
-; GFX7-NEXT: v_cndmask_b32_e64 v0, 0, v12, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v4, 0, v7, s[10:11]
-; GFX7-NEXT: v_cndmask_b32_e32 v6, v8, v2, vcc
-; GFX7-NEXT: v_mov_b32_e32 v8, 0x7ff80000
-; GFX7-NEXT: v_cndmask_b32_e32 v3, v9, v3, vcc
-; GFX7-NEXT: v_cndmask_b32_e64 v1, v8, v1, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v2, 0, v6, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v3, v8, v3, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v5, v8, v5, s[10:11]
+; GFX7-NEXT: v_min_f64 v[12:13], v[0:1], v[6:7]
+; GFX7-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[6:7]
+; GFX7-NEXT: v_min_f64 v[6:7], v[2:3], v[8:9]
+; GFX7-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[8:9]
+; GFX7-NEXT: v_min_f64 v[8:9], v[4:5], v[10:11]
+; GFX7-NEXT: v_cmp_u_f64_e64 s[6:7], v[4:5], v[10:11]
+; GFX7-NEXT: v_mov_b32_e32 v5, 0x7ff80000
+; GFX7-NEXT: v_cndmask_b32_e64 v0, v12, 0, vcc
+; GFX7-NEXT: v_cndmask_b32_e32 v1, v13, v5, vcc
+; GFX7-NEXT: v_cndmask_b32_e64 v2, v6, 0, s[4:5]
+; GFX7-NEXT: v_cndmask_b32_e64 v3, v7, v5, s[4:5]
+; GFX7-NEXT: v_cndmask_b32_e64 v4, v8, 0, s[6:7]
+; GFX7-NEXT: v_cndmask_b32_e64 v5, v9, v5, s[6:7]
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_minimum_v3f64__nsz:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[6:7]
-; GFX8-NEXT: v_cmp_o_f64_e64 s[4:5], v[0:1], v[6:7]
-; GFX8-NEXT: v_cmp_lt_f64_e64 s[8:9], v[4:5], v[10:11]
-; GFX8-NEXT: v_cmp_o_f64_e64 s[6:7], v[2:3], v[8:9]
-; GFX8-NEXT: v_cmp_o_f64_e64 s[10:11], v[4:5], v[10:11]
-; GFX8-NEXT: v_cndmask_b32_e32 v12, v6, v0, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc
-; GFX8-NEXT: v_cmp_lt_f64_e32 vcc, v[2:3], v[8:9]
-; GFX8-NEXT: v_cndmask_b32_e64 v7, v10, v4, s[8:9]
-; GFX8-NEXT: v_cndmask_b32_e64 v5, v11, v5, s[8:9]
-; GFX8-NEXT: v_cndmask_b32_e64 v0, 0, v12, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v4, 0, v7, s[10:11]
-; GFX8-NEXT: v_cndmask_b32_e32 v6, v8, v2, vcc
-; GFX8-NEXT: v_mov_b32_e32 v8, 0x7ff80000
-; GFX8-NEXT: v_cndmask_b32_e32 v3, v9, v3, vcc
-; GFX8-NEXT: v_cndmask_b32_e64 v1, v8, v1, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, v6, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v3, v8, v3, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v5, v8, v5, s[10:11]
+; GFX8-NEXT: v_min_f64 v[12:13], v[0:1], v[6:7]
+; GFX8-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[6:7]
+; GFX8-NEXT: v_min_f64 v[6:7], v[2:3], v[8:9]
+; GFX8-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[8:9]
+; GFX8-NEXT: v_min_f64 v[8:9], v[4:5], v[10:11]
+; GFX8-NEXT: v_cmp_u_f64_e64 s[6:7], v[4:5], v[10:11]
+; GFX8-NEXT: v_mov_b32_e32 v5, 0x7ff80000
+; GFX8-NEXT: v_cndmask_b32_e64 v0, v12, 0, vcc
+; GFX8-NEXT: v_cndmask_b32_e32 v1, v13, v5, vcc
+; GFX8-NEXT: v_cndmask_b32_e64 v2, v6, 0, s[4:5]
+; GFX8-NEXT: v_cndmask_b32_e64 v3, v7, v5, s[4:5]
+; GFX8-NEXT: v_cndmask_b32_e64 v4, v8, 0, s[6:7]
+; GFX8-NEXT: v_cndmask_b32_e64 v5, v9, v5, s[6:7]
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_minimum_v3f64__nsz:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[6:7]
-; GFX9-NEXT: v_cmp_o_f64_e64 s[4:5], v[0:1], v[6:7]
-; GFX9-NEXT: v_cmp_lt_f64_e64 s[8:9], v[4:5], v[10:11]
-; GFX9-NEXT: v_cmp_o_f64_e64 s[6:7], v[2:3], v[8:9]
-; GFX9-NEXT: v_cmp_o_f64_e64 s[10:11], v[4:5], v[10:11]
-; GFX9-NEXT: v_cndmask_b32_e32 v12, v6, v0, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc
-; GFX9-NEXT: v_cmp_lt_f64_e32 vcc, v[2:3], v[8:9]
-; GFX9-NEXT: v_cndmask_b32_e64 v7, v10, v4, s[8:9]
-; GFX9-NEXT: v_cndmask_b32_e64 v5, v11, v5, s[8:9]
-; GFX9-NEXT: v_cndmask_b32_e64 v0, 0, v12, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v4, 0, v7, s[10:11]
-; GFX9-NEXT: v_cndmask_b32_e32 v6, v8, v2, vcc
-; GFX9-NEXT: v_mov_b32_e32 v8, 0x7ff80000
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v9, v3, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v1, v8, v1, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, v6, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v3, v8, v3, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v5, v8, v5, s[10:11]
+; GFX9-NEXT: v_min_f64 v[12:13], v[0:1], v[6:7]
+; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[6:7]
+; GFX9-NEXT: v_min_f64 v[6:7], v[2:3], v[8:9]
+; GFX9-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[8:9]
+; GFX9-NEXT: v_min_f64 v[8:9], v[4:5], v[10:11]
+; GFX9-NEXT: v_cmp_u_f64_e64 s[6:7], v[4:5], v[10:11]
+; GFX9-NEXT: v_mov_b32_e32 v5, 0x7ff80000
+; GFX9-NEXT: v_cndmask_b32_e64 v0, v12, 0, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v13, v5, vcc
+; GFX9-NEXT: v_cndmask_b32_e64 v2, v6, 0, s[4:5]
+; GFX9-NEXT: v_cndmask_b32_e64 v3, v7, v5, s[4:5]
+; GFX9-NEXT: v_cndmask_b32_e64 v4, v8, 0, s[6:7]
+; GFX9-NEXT: v_cndmask_b32_e64 v5, v9, v5, s[6:7]
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX940-LABEL: v_minimum_v3f64__nsz:
; GFX940: ; %bb.0:
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[6:7]
-; GFX940-NEXT: v_cmp_o_f64_e64 s[0:1], v[0:1], v[6:7]
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v12, v6, v0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc
-; GFX940-NEXT: v_mov_b32_e32 v6, 0x7ff80000
-; GFX940-NEXT: v_cmp_lt_f64_e32 vcc, v[2:3], v[8:9]
-; GFX940-NEXT: v_cndmask_b32_e64 v0, 0, v12, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v1, v6, v1, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v7, v8, v2, vcc
-; GFX940-NEXT: v_cmp_o_f64_e64 s[0:1], v[2:3], v[8:9]
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v9, v3, vcc
-; GFX940-NEXT: v_cmp_lt_f64_e32 vcc, v[4:5], v[10:11]
-; GFX940-NEXT: v_cndmask_b32_e64 v2, 0, v7, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v3, v6, v3, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v7, v10, v4, vcc
-; GFX940-NEXT: v_cmp_o_f64_e64 s[0:1], v[4:5], v[10:11]
-; GFX940-NEXT: v_cndmask_b32_e32 v5, v11, v5, vcc
+; GFX940-NEXT: v_min_f64 v[12:13], v[0:1], v[6:7]
+; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[6:7]
+; GFX940-NEXT: v_min_f64 v[6:7], v[2:3], v[8:9]
; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e64 v4, 0, v7, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v5, v6, v5, s[0:1]
+; GFX940-NEXT: v_cndmask_b32_e64 v0, v12, 0, vcc
+; GFX940-NEXT: v_mov_b32_e32 v12, 0x7ff80000
+; GFX940-NEXT: v_cndmask_b32_e32 v1, v13, v12, vcc
+; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[8:9]
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e64 v2, v6, 0, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v3, v7, v12, vcc
+; GFX940-NEXT: v_min_f64 v[6:7], v[4:5], v[10:11]
+; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[4:5], v[10:11]
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e64 v4, v6, 0, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v5, v7, v12, vcc
; GFX940-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimum_v3f64__nsz:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_cmp_lt_f64_e32 vcc_lo, v[0:1], v[6:7]
-; GFX10-NEXT: v_cmp_lt_f64_e64 s4, v[2:3], v[8:9]
-; GFX10-NEXT: v_cmp_lt_f64_e64 s5, v[4:5], v[10:11]
-; GFX10-NEXT: v_cmp_o_f64_e64 s6, v[0:1], v[6:7]
-; GFX10-NEXT: v_cmp_o_f64_e64 s7, v[2:3], v[8:9]
-; GFX10-NEXT: v_cmp_o_f64_e64 s8, v[4:5], v[10:11]
-; GFX10-NEXT: v_cndmask_b32_e32 v12, v6, v0, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v6, v8, v2, s4
-; GFX10-NEXT: v_cndmask_b32_e64 v8, v10, v4, s5
-; GFX10-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v3, v9, v3, s4
-; GFX10-NEXT: v_cndmask_b32_e64 v5, v11, v5, s5
-; GFX10-NEXT: v_cndmask_b32_e64 v0, 0, v12, s6
-; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, v6, s7
-; GFX10-NEXT: v_cndmask_b32_e64 v4, 0, v8, s8
-; GFX10-NEXT: v_cndmask_b32_e64 v1, 0x7ff80000, v1, s6
-; GFX10-NEXT: v_cndmask_b32_e64 v3, 0x7ff80000, v3, s7
-; GFX10-NEXT: v_cndmask_b32_e64 v5, 0x7ff80000, v5, s8
+; GFX10-NEXT: v_min_f64 v[12:13], v[0:1], v[6:7]
+; GFX10-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[6:7]
+; GFX10-NEXT: v_min_f64 v[6:7], v[2:3], v[8:9]
+; GFX10-NEXT: v_cmp_u_f64_e64 s4, v[2:3], v[8:9]
+; GFX10-NEXT: v_min_f64 v[8:9], v[4:5], v[10:11]
+; GFX10-NEXT: v_cmp_u_f64_e64 s5, v[4:5], v[10:11]
+; GFX10-NEXT: v_cndmask_b32_e64 v0, v12, 0, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e64 v1, v13, 0x7ff80000, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e64 v2, v6, 0, s4
+; GFX10-NEXT: v_cndmask_b32_e64 v3, v7, 0x7ff80000, s4
+; GFX10-NEXT: v_cndmask_b32_e64 v4, v8, 0, s5
+; GFX10-NEXT: v_cndmask_b32_e64 v5, v9, 0x7ff80000, s5
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_minimum_v3f64__nsz:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_cmp_lt_f64_e32 vcc_lo, v[0:1], v[6:7]
-; GFX11-NEXT: v_cmp_lt_f64_e64 s0, v[2:3], v[8:9]
-; GFX11-NEXT: v_cmp_lt_f64_e64 s1, v[4:5], v[10:11]
-; GFX11-NEXT: v_cmp_o_f64_e64 s2, v[0:1], v[6:7]
-; GFX11-NEXT: v_cmp_o_f64_e64 s3, v[2:3], v[8:9]
-; GFX11-NEXT: v_cmp_o_f64_e64 s4, v[4:5], v[10:11]
-; GFX11-NEXT: v_dual_cndmask_b32 v12, v6, v0 :: v_dual_cndmask_b32 v1, v7, v1
-; GFX11-NEXT: v_cndmask_b32_e64 v6, v8, v2, s0
-; GFX11-NEXT: v_cndmask_b32_e64 v8, v10, v4, s1
-; GFX11-NEXT: v_cndmask_b32_e64 v3, v9, v3, s0
-; GFX11-NEXT: v_cndmask_b32_e64 v5, v11, v5, s1
-; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, v12, s2
-; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, v6, s3
-; GFX11-NEXT: v_cndmask_b32_e64 v4, 0, v8, s4
-; GFX11-NEXT: v_cndmask_b32_e64 v1, 0x7ff80000, v1, s2
-; GFX11-NEXT: v_cndmask_b32_e64 v3, 0x7ff80000, v3, s3
-; GFX11-NEXT: v_cndmask_b32_e64 v5, 0x7ff80000, v5, s4
+; GFX11-NEXT: v_min_f64 v[12:13], v[0:1], v[6:7]
+; GFX11-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[6:7]
+; GFX11-NEXT: v_min_f64 v[6:7], v[2:3], v[8:9]
+; GFX11-NEXT: v_cmp_u_f64_e64 s0, v[2:3], v[8:9]
+; GFX11-NEXT: v_min_f64 v[8:9], v[4:5], v[10:11]
+; GFX11-NEXT: v_cmp_u_f64_e64 s1, v[4:5], v[10:11]
+; GFX11-NEXT: v_cndmask_b32_e64 v0, v12, 0, vcc_lo
+; GFX11-NEXT: v_cndmask_b32_e64 v1, v13, 0x7ff80000, vcc_lo
+; GFX11-NEXT: v_cndmask_b32_e64 v2, v6, 0, s0
+; GFX11-NEXT: v_cndmask_b32_e64 v3, v7, 0x7ff80000, s0
+; GFX11-NEXT: v_cndmask_b32_e64 v4, v8, 0, s1
+; GFX11-NEXT: v_cndmask_b32_e64 v5, v9, 0x7ff80000, s1
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_minimum_v3f64__nsz:
@@ -2296,88 +1293,49 @@ define <3 x double> @v_minimum_v3f64__nnan_nsz(<3 x double> %src0, <3 x double>
; GFX7-LABEL: v_minimum_v3f64__nnan_nsz:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[6:7]
-; GFX7-NEXT: v_cmp_lt_f64_e64 s[4:5], v[2:3], v[8:9]
-; GFX7-NEXT: v_cmp_lt_f64_e64 s[6:7], v[4:5], v[10:11]
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc
-; GFX7-NEXT: v_cndmask_b32_e64 v2, v8, v2, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v3, v9, v3, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v4, v10, v4, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v5, v11, v5, s[6:7]
+; GFX7-NEXT: v_min_f64 v[0:1], v[0:1], v[6:7]
+; GFX7-NEXT: v_min_f64 v[2:3], v[2:3], v[8:9]
+; GFX7-NEXT: v_min_f64 v[4:5], v[4:5], v[10:11]
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_minimum_v3f64__nnan_nsz:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[6:7]
-; GFX8-NEXT: v_cmp_lt_f64_e64 s[4:5], v[2:3], v[8:9]
-; GFX8-NEXT: v_cmp_lt_f64_e64 s[6:7], v[4:5], v[10:11]
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc
-; GFX8-NEXT: v_cndmask_b32_e64 v2, v8, v2, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v3, v9, v3, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v4, v10, v4, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v5, v11, v5, s[6:7]
+; GFX8-NEXT: v_min_f64 v[0:1], v[0:1], v[6:7]
+; GFX8-NEXT: v_min_f64 v[2:3], v[2:3], v[8:9]
+; GFX8-NEXT: v_min_f64 v[4:5], v[4:5], v[10:11]
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_minimum_v3f64__nnan_nsz:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[6:7]
-; GFX9-NEXT: v_cmp_lt_f64_e64 s[4:5], v[2:3], v[8:9]
-; GFX9-NEXT: v_cmp_lt_f64_e64 s[6:7], v[4:5], v[10:11]
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v2, v8, v2, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v3, v9, v3, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v4, v10, v4, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v5, v11, v5, s[6:7]
+; GFX9-NEXT: v_min_f64 v[0:1], v[0:1], v[6:7]
+; GFX9-NEXT: v_min_f64 v[2:3], v[2:3], v[8:9]
+; GFX9-NEXT: v_min_f64 v[4:5], v[4:5], v[10:11]
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX940-LABEL: v_minimum_v3f64__nnan_nsz:
; GFX940: ; %bb.0:
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[6:7]
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc
-; GFX940-NEXT: v_cmp_lt_f64_e32 vcc, v[2:3], v[8:9]
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v9, v3, vcc
-; GFX940-NEXT: v_cmp_lt_f64_e32 vcc, v[4:5], v[10:11]
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v10, v4, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v5, v11, v5, vcc
+; GFX940-NEXT: v_min_f64 v[0:1], v[0:1], v[6:7]
+; GFX940-NEXT: v_min_f64 v[2:3], v[2:3], v[8:9]
+; GFX940-NEXT: v_min_f64 v[4:5], v[4:5], v[10:11]
; GFX940-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimum_v3f64__nnan_nsz:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_cmp_lt_f64_e32 vcc_lo, v[0:1], v[6:7]
-; GFX10-NEXT: v_cmp_lt_f64_e64 s4, v[2:3], v[8:9]
-; GFX10-NEXT: v_cmp_lt_f64_e64 s5, v[4:5], v[10:11]
-; GFX10-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v2, v8, v2, s4
-; GFX10-NEXT: v_cndmask_b32_e64 v4, v10, v4, s5
-; GFX10-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v3, v9, v3, s4
-; GFX10-NEXT: v_cndmask_b32_e64 v5, v11, v5, s5
+; GFX10-NEXT: v_min_f64 v[0:1], v[0:1], v[6:7]
+; GFX10-NEXT: v_min_f64 v[2:3], v[2:3], v[8:9]
+; GFX10-NEXT: v_min_f64 v[4:5], v[4:5], v[10:11]
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_minimum_v3f64__nnan_nsz:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_cmp_lt_f64_e32 vcc_lo, v[0:1], v[6:7]
-; GFX11-NEXT: v_cmp_lt_f64_e64 s0, v[2:3], v[8:9]
-; GFX11-NEXT: v_cmp_lt_f64_e64 s1, v[4:5], v[10:11]
-; GFX11-NEXT: v_dual_cndmask_b32 v0, v6, v0 :: v_dual_cndmask_b32 v1, v7, v1
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NEXT: v_cndmask_b32_e64 v2, v8, v2, s0
-; GFX11-NEXT: v_cndmask_b32_e64 v4, v10, v4, s1
-; GFX11-NEXT: v_cndmask_b32_e64 v3, v9, v3, s0
-; GFX11-NEXT: v_cndmask_b32_e64 v5, v11, v5, s1
+; GFX11-NEXT: v_min_f64 v[0:1], v[0:1], v[6:7]
+; GFX11-NEXT: v_min_f64 v[2:3], v[2:3], v[8:9]
+; GFX11-NEXT: v_min_f64 v[4:5], v[4:5], v[10:11]
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_minimum_v3f64__nnan_nsz:
@@ -2399,404 +1357,135 @@ define <4 x double> @v_minimum_v4f64(<4 x double> %src0, <4 x double> %src1) {
; GFX7-LABEL: v_minimum_v4f64:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[8:9]
-; GFX7-NEXT: v_cmp_o_f64_e64 s[4:5], v[0:1], v[8:9]
-; GFX7-NEXT: v_mov_b32_e32 v18, 0x7ff80000
-; GFX7-NEXT: v_cmp_lt_f64_e64 s[6:7], v[2:3], v[10:11]
-; GFX7-NEXT: v_cmp_o_f64_e64 s[8:9], v[2:3], v[10:11]
-; GFX7-NEXT: v_cmp_o_f64_e64 s[12:13], v[4:5], v[12:13]
-; GFX7-NEXT: v_cndmask_b32_e32 v16, v9, v1, vcc
-; GFX7-NEXT: v_cndmask_b32_e64 v17, v18, v16, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e32 v16, v8, v0, vcc
-; GFX7-NEXT: v_cmp_class_f64_e64 vcc, v[0:1], 32
-; GFX7-NEXT: v_cndmask_b32_e64 v16, 0, v16, s[4:5]
-; GFX7-NEXT: v_cmp_class_f64_e64 s[4:5], v[8:9], 32
-; GFX7-NEXT: v_cndmask_b32_e64 v19, v11, v3, s[6:7]
-; GFX7-NEXT: v_cmp_eq_f64_e64 s[10:11], 0, v[16:17]
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v16, v0, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v17, v1, vcc
-; GFX7-NEXT: v_cmp_class_f64_e64 vcc, v[2:3], 32
-; GFX7-NEXT: v_cndmask_b32_e64 v0, v0, v8, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v1, v1, v9, s[4:5]
-; GFX7-NEXT: v_cmp_class_f64_e64 s[4:5], v[10:11], 32
-; GFX7-NEXT: v_cndmask_b32_e64 v8, v10, v2, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v9, v18, v19, s[8:9]
-; GFX7-NEXT: v_cndmask_b32_e64 v8, 0, v8, s[8:9]
-; GFX7-NEXT: v_cmp_lt_f64_e64 s[8:9], v[4:5], v[12:13]
-; GFX7-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc
-; GFX7-NEXT: v_cmp_eq_f64_e64 s[6:7], 0, v[8:9]
-; GFX7-NEXT: v_cndmask_b32_e32 v3, v9, v3, vcc
-; GFX7-NEXT: v_cmp_lt_f64_e32 vcc, v[6:7], v[14:15]
-; GFX7-NEXT: v_cndmask_b32_e64 v2, v2, v10, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v3, v3, v11, s[4:5]
-; GFX7-NEXT: v_cmp_o_f64_e64 s[4:5], v[6:7], v[14:15]
-; GFX7-NEXT: v_cndmask_b32_e64 v10, v13, v5, s[8:9]
-; GFX7-NEXT: v_cndmask_b32_e64 v0, v16, v0, s[10:11]
-; GFX7-NEXT: v_cndmask_b32_e64 v3, v9, v3, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v9, v18, v10, s[12:13]
-; GFX7-NEXT: v_cndmask_b32_e32 v10, v15, v7, vcc
-; GFX7-NEXT: v_cndmask_b32_e64 v2, v8, v2, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v8, v12, v4, s[8:9]
-; GFX7-NEXT: v_cmp_class_f64_e64 s[6:7], v[4:5], 32
-; GFX7-NEXT: v_cndmask_b32_e64 v11, v18, v10, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e32 v10, v14, v6, vcc
-; GFX7-NEXT: v_cmp_class_f64_e64 vcc, v[6:7], 32
-; GFX7-NEXT: v_cndmask_b32_e64 v8, 0, v8, s[12:13]
-; GFX7-NEXT: v_cmp_class_f64_e64 s[8:9], v[12:13], 32
-; GFX7-NEXT: v_cndmask_b32_e64 v10, 0, v10, s[4:5]
-; GFX7-NEXT: v_cmp_class_f64_e64 s[4:5], v[14:15], 32
-; GFX7-NEXT: v_cndmask_b32_e64 v1, v17, v1, s[10:11]
-; GFX7-NEXT: v_cmp_eq_f64_e64 s[10:11], 0, v[8:9]
-; GFX7-NEXT: v_cmp_eq_f64_e64 s[12:13], 0, v[10:11]
-; GFX7-NEXT: v_cndmask_b32_e64 v4, v8, v4, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e32 v6, v10, v6, vcc
-; GFX7-NEXT: v_cndmask_b32_e64 v5, v9, v5, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e32 v7, v11, v7, vcc
-; GFX7-NEXT: v_cndmask_b32_e64 v4, v4, v12, s[8:9]
-; GFX7-NEXT: v_cndmask_b32_e64 v6, v6, v14, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v5, v5, v13, s[8:9]
-; GFX7-NEXT: v_cndmask_b32_e64 v7, v7, v15, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v4, v8, v4, s[10:11]
-; GFX7-NEXT: v_cndmask_b32_e64 v5, v9, v5, s[10:11]
-; GFX7-NEXT: v_cndmask_b32_e64 v6, v10, v6, s[12:13]
-; GFX7-NEXT: v_cndmask_b32_e64 v7, v11, v7, s[12:13]
+; GFX7-NEXT: v_min_f64 v[16:17], v[0:1], v[8:9]
+; GFX7-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[8:9]
+; GFX7-NEXT: v_min_f64 v[8:9], v[2:3], v[10:11]
+; GFX7-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[10:11]
+; GFX7-NEXT: v_min_f64 v[10:11], v[4:5], v[12:13]
+; GFX7-NEXT: v_cmp_u_f64_e64 s[6:7], v[4:5], v[12:13]
+; GFX7-NEXT: v_min_f64 v[12:13], v[6:7], v[14:15]
+; GFX7-NEXT: v_cmp_u_f64_e64 s[8:9], v[6:7], v[14:15]
+; GFX7-NEXT: v_mov_b32_e32 v7, 0x7ff80000
+; GFX7-NEXT: v_cndmask_b32_e64 v0, v16, 0, vcc
+; GFX7-NEXT: v_cndmask_b32_e32 v1, v17, v7, vcc
+; GFX7-NEXT: v_cndmask_b32_e64 v2, v8, 0, s[4:5]
+; GFX7-NEXT: v_cndmask_b32_e64 v3, v9, v7, s[4:5]
+; GFX7-NEXT: v_cndmask_b32_e64 v4, v10, 0, s[6:7]
+; GFX7-NEXT: v_cndmask_b32_e64 v5, v11, v7, s[6:7]
+; GFX7-NEXT: v_cndmask_b32_e64 v6, v12, 0, s[8:9]
+; GFX7-NEXT: v_cndmask_b32_e64 v7, v13, v7, s[8:9]
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_minimum_v4f64:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[8:9]
-; GFX8-NEXT: v_cmp_o_f64_e64 s[4:5], v[0:1], v[8:9]
-; GFX8-NEXT: v_mov_b32_e32 v18, 0x7ff80000
-; GFX8-NEXT: v_cmp_lt_f64_e64 s[6:7], v[2:3], v[10:11]
-; GFX8-NEXT: v_cmp_o_f64_e64 s[8:9], v[2:3], v[10:11]
-; GFX8-NEXT: v_cmp_o_f64_e64 s[12:13], v[4:5], v[12:13]
-; GFX8-NEXT: v_cndmask_b32_e32 v16, v9, v1, vcc
-; GFX8-NEXT: v_cndmask_b32_e64 v17, v18, v16, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e32 v16, v8, v0, vcc
-; GFX8-NEXT: v_cmp_class_f64_e64 vcc, v[0:1], 32
-; GFX8-NEXT: v_cndmask_b32_e64 v16, 0, v16, s[4:5]
-; GFX8-NEXT: v_cmp_class_f64_e64 s[4:5], v[8:9], 32
-; GFX8-NEXT: v_cndmask_b32_e64 v19, v11, v3, s[6:7]
-; GFX8-NEXT: v_cmp_eq_f64_e64 s[10:11], 0, v[16:17]
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v16, v0, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v17, v1, vcc
-; GFX8-NEXT: v_cmp_class_f64_e64 vcc, v[2:3], 32
-; GFX8-NEXT: v_cndmask_b32_e64 v0, v0, v8, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v1, v1, v9, s[4:5]
-; GFX8-NEXT: v_cmp_class_f64_e64 s[4:5], v[10:11], 32
-; GFX8-NEXT: v_cndmask_b32_e64 v8, v10, v2, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v9, v18, v19, s[8:9]
-; GFX8-NEXT: v_cndmask_b32_e64 v8, 0, v8, s[8:9]
-; GFX8-NEXT: v_cmp_lt_f64_e64 s[8:9], v[4:5], v[12:13]
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc
-; GFX8-NEXT: v_cmp_eq_f64_e64 s[6:7], 0, v[8:9]
-; GFX8-NEXT: v_cndmask_b32_e32 v3, v9, v3, vcc
-; GFX8-NEXT: v_cmp_lt_f64_e32 vcc, v[6:7], v[14:15]
-; GFX8-NEXT: v_cndmask_b32_e64 v2, v2, v10, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v3, v3, v11, s[4:5]
-; GFX8-NEXT: v_cmp_o_f64_e64 s[4:5], v[6:7], v[14:15]
-; GFX8-NEXT: v_cndmask_b32_e64 v10, v13, v5, s[8:9]
-; GFX8-NEXT: v_cndmask_b32_e64 v0, v16, v0, s[10:11]
-; GFX8-NEXT: v_cndmask_b32_e64 v3, v9, v3, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v9, v18, v10, s[12:13]
-; GFX8-NEXT: v_cndmask_b32_e32 v10, v15, v7, vcc
-; GFX8-NEXT: v_cndmask_b32_e64 v2, v8, v2, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v8, v12, v4, s[8:9]
-; GFX8-NEXT: v_cmp_class_f64_e64 s[6:7], v[4:5], 32
-; GFX8-NEXT: v_cndmask_b32_e64 v11, v18, v10, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e32 v10, v14, v6, vcc
-; GFX8-NEXT: v_cmp_class_f64_e64 vcc, v[6:7], 32
-; GFX8-NEXT: v_cndmask_b32_e64 v8, 0, v8, s[12:13]
-; GFX8-NEXT: v_cmp_class_f64_e64 s[8:9], v[12:13], 32
-; GFX8-NEXT: v_cndmask_b32_e64 v10, 0, v10, s[4:5]
-; GFX8-NEXT: v_cmp_class_f64_e64 s[4:5], v[14:15], 32
-; GFX8-NEXT: v_cndmask_b32_e64 v1, v17, v1, s[10:11]
-; GFX8-NEXT: v_cmp_eq_f64_e64 s[10:11], 0, v[8:9]
-; GFX8-NEXT: v_cmp_eq_f64_e64 s[12:13], 0, v[10:11]
-; GFX8-NEXT: v_cndmask_b32_e64 v4, v8, v4, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e32 v6, v10, v6, vcc
-; GFX8-NEXT: v_cndmask_b32_e64 v5, v9, v5, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e32 v7, v11, v7, vcc
-; GFX8-NEXT: v_cndmask_b32_e64 v4, v4, v12, s[8:9]
-; GFX8-NEXT: v_cndmask_b32_e64 v6, v6, v14, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v5, v5, v13, s[8:9]
-; GFX8-NEXT: v_cndmask_b32_e64 v7, v7, v15, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v4, v8, v4, s[10:11]
-; GFX8-NEXT: v_cndmask_b32_e64 v5, v9, v5, s[10:11]
-; GFX8-NEXT: v_cndmask_b32_e64 v6, v10, v6, s[12:13]
-; GFX8-NEXT: v_cndmask_b32_e64 v7, v11, v7, s[12:13]
+; GFX8-NEXT: v_min_f64 v[16:17], v[0:1], v[8:9]
+; GFX8-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[8:9]
+; GFX8-NEXT: v_min_f64 v[8:9], v[2:3], v[10:11]
+; GFX8-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[10:11]
+; GFX8-NEXT: v_min_f64 v[10:11], v[4:5], v[12:13]
+; GFX8-NEXT: v_cmp_u_f64_e64 s[6:7], v[4:5], v[12:13]
+; GFX8-NEXT: v_min_f64 v[12:13], v[6:7], v[14:15]
+; GFX8-NEXT: v_cmp_u_f64_e64 s[8:9], v[6:7], v[14:15]
+; GFX8-NEXT: v_mov_b32_e32 v7, 0x7ff80000
+; GFX8-NEXT: v_cndmask_b32_e64 v0, v16, 0, vcc
+; GFX8-NEXT: v_cndmask_b32_e32 v1, v17, v7, vcc
+; GFX8-NEXT: v_cndmask_b32_e64 v2, v8, 0, s[4:5]
+; GFX8-NEXT: v_cndmask_b32_e64 v3, v9, v7, s[4:5]
+; GFX8-NEXT: v_cndmask_b32_e64 v4, v10, 0, s[6:7]
+; GFX8-NEXT: v_cndmask_b32_e64 v5, v11, v7, s[6:7]
+; GFX8-NEXT: v_cndmask_b32_e64 v6, v12, 0, s[8:9]
+; GFX8-NEXT: v_cndmask_b32_e64 v7, v13, v7, s[8:9]
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_minimum_v4f64:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[8:9]
-; GFX9-NEXT: v_cmp_o_f64_e64 s[4:5], v[0:1], v[8:9]
-; GFX9-NEXT: v_mov_b32_e32 v18, 0x7ff80000
-; GFX9-NEXT: v_cmp_lt_f64_e64 s[6:7], v[2:3], v[10:11]
-; GFX9-NEXT: v_cmp_o_f64_e64 s[8:9], v[2:3], v[10:11]
-; GFX9-NEXT: v_cmp_o_f64_e64 s[12:13], v[4:5], v[12:13]
-; GFX9-NEXT: v_cndmask_b32_e32 v16, v9, v1, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v17, v18, v16, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e32 v16, v8, v0, vcc
-; GFX9-NEXT: v_cmp_class_f64_e64 vcc, v[0:1], 32
-; GFX9-NEXT: v_cndmask_b32_e64 v16, 0, v16, s[4:5]
-; GFX9-NEXT: v_cmp_class_f64_e64 s[4:5], v[8:9], 32
-; GFX9-NEXT: v_cndmask_b32_e64 v19, v11, v3, s[6:7]
-; GFX9-NEXT: v_cmp_eq_f64_e64 s[10:11], 0, v[16:17]
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v16, v0, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v17, v1, vcc
-; GFX9-NEXT: v_cmp_class_f64_e64 vcc, v[2:3], 32
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v0, v8, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v1, v1, v9, s[4:5]
-; GFX9-NEXT: v_cmp_class_f64_e64 s[4:5], v[10:11], 32
-; GFX9-NEXT: v_cndmask_b32_e64 v8, v10, v2, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v9, v18, v19, s[8:9]
-; GFX9-NEXT: v_cndmask_b32_e64 v8, 0, v8, s[8:9]
-; GFX9-NEXT: v_cmp_lt_f64_e64 s[8:9], v[4:5], v[12:13]
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc
-; GFX9-NEXT: v_cmp_eq_f64_e64 s[6:7], 0, v[8:9]
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v9, v3, vcc
-; GFX9-NEXT: v_cmp_lt_f64_e32 vcc, v[6:7], v[14:15]
-; GFX9-NEXT: v_cndmask_b32_e64 v2, v2, v10, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v3, v3, v11, s[4:5]
-; GFX9-NEXT: v_cmp_o_f64_e64 s[4:5], v[6:7], v[14:15]
-; GFX9-NEXT: v_cndmask_b32_e64 v10, v13, v5, s[8:9]
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v16, v0, s[10:11]
-; GFX9-NEXT: v_cndmask_b32_e64 v3, v9, v3, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v9, v18, v10, s[12:13]
-; GFX9-NEXT: v_cndmask_b32_e32 v10, v15, v7, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v2, v8, v2, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v8, v12, v4, s[8:9]
-; GFX9-NEXT: v_cmp_class_f64_e64 s[6:7], v[4:5], 32
-; GFX9-NEXT: v_cndmask_b32_e64 v11, v18, v10, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e32 v10, v14, v6, vcc
-; GFX9-NEXT: v_cmp_class_f64_e64 vcc, v[6:7], 32
-; GFX9-NEXT: v_cndmask_b32_e64 v8, 0, v8, s[12:13]
-; GFX9-NEXT: v_cmp_class_f64_e64 s[8:9], v[12:13], 32
-; GFX9-NEXT: v_cndmask_b32_e64 v10, 0, v10, s[4:5]
-; GFX9-NEXT: v_cmp_class_f64_e64 s[4:5], v[14:15], 32
-; GFX9-NEXT: v_cndmask_b32_e64 v1, v17, v1, s[10:11]
-; GFX9-NEXT: v_cmp_eq_f64_e64 s[10:11], 0, v[8:9]
-; GFX9-NEXT: v_cmp_eq_f64_e64 s[12:13], 0, v[10:11]
-; GFX9-NEXT: v_cndmask_b32_e64 v4, v8, v4, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e32 v6, v10, v6, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v5, v9, v5, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e32 v7, v11, v7, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v4, v4, v12, s[8:9]
-; GFX9-NEXT: v_cndmask_b32_e64 v6, v6, v14, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v5, v5, v13, s[8:9]
-; GFX9-NEXT: v_cndmask_b32_e64 v7, v7, v15, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v4, v8, v4, s[10:11]
-; GFX9-NEXT: v_cndmask_b32_e64 v5, v9, v5, s[10:11]
-; GFX9-NEXT: v_cndmask_b32_e64 v6, v10, v6, s[12:13]
-; GFX9-NEXT: v_cndmask_b32_e64 v7, v11, v7, s[12:13]
+; GFX9-NEXT: v_min_f64 v[16:17], v[0:1], v[8:9]
+; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[8:9]
+; GFX9-NEXT: v_min_f64 v[8:9], v[2:3], v[10:11]
+; GFX9-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[10:11]
+; GFX9-NEXT: v_min_f64 v[10:11], v[4:5], v[12:13]
+; GFX9-NEXT: v_cmp_u_f64_e64 s[6:7], v[4:5], v[12:13]
+; GFX9-NEXT: v_min_f64 v[12:13], v[6:7], v[14:15]
+; GFX9-NEXT: v_cmp_u_f64_e64 s[8:9], v[6:7], v[14:15]
+; GFX9-NEXT: v_mov_b32_e32 v7, 0x7ff80000
+; GFX9-NEXT: v_cndmask_b32_e64 v0, v16, 0, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v17, v7, vcc
+; GFX9-NEXT: v_cndmask_b32_e64 v2, v8, 0, s[4:5]
+; GFX9-NEXT: v_cndmask_b32_e64 v3, v9, v7, s[4:5]
+; GFX9-NEXT: v_cndmask_b32_e64 v4, v10, 0, s[6:7]
+; GFX9-NEXT: v_cndmask_b32_e64 v5, v11, v7, s[6:7]
+; GFX9-NEXT: v_cndmask_b32_e64 v6, v12, 0, s[8:9]
+; GFX9-NEXT: v_cndmask_b32_e64 v7, v13, v7, s[8:9]
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX940-LABEL: v_minimum_v4f64:
; GFX940: ; %bb.0:
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[8:9]
-; GFX940-NEXT: v_mov_b32_e32 v18, 0x7ff80000
-; GFX940-NEXT: v_cmp_o_f64_e64 s[0:1], v[0:1], v[8:9]
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v9, v1, vcc
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e64 v17, v18, v16, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v8, v0, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v16, 0, v16, s[0:1]
-; GFX940-NEXT: v_cmp_class_f64_e64 vcc, v[0:1], 32
-; GFX940-NEXT: v_cmp_class_f64_e64 s[0:1], v[8:9], 32
-; GFX940-NEXT: v_cmp_eq_f64_e64 s[2:3], 0, v[16:17]
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v16, v0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v17, v1, vcc
-; GFX940-NEXT: v_cmp_lt_f64_e32 vcc, v[2:3], v[10:11]
-; GFX940-NEXT: v_cndmask_b32_e64 v0, v0, v8, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v1, v1, v9, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v8, v11, v3, vcc
-; GFX940-NEXT: v_cmp_o_f64_e64 s[0:1], v[2:3], v[10:11]
-; GFX940-NEXT: v_cndmask_b32_e64 v0, v16, v0, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v1, v17, v1, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v9, v18, v8, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v8, v10, v2, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v8, 0, v8, s[0:1]
-; GFX940-NEXT: v_cmp_class_f64_e64 vcc, v[2:3], 32
-; GFX940-NEXT: v_cmp_class_f64_e64 s[0:1], v[10:11], 32
-; GFX940-NEXT: v_cmp_eq_f64_e64 s[2:3], 0, v[8:9]
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v2, v2, v10, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v9, v3, vcc
-; GFX940-NEXT: v_cmp_lt_f64_e32 vcc, v[4:5], v[12:13]
-; GFX940-NEXT: v_cndmask_b32_e64 v2, v8, v2, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v3, v3, v11, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v8, v13, v5, vcc
-; GFX940-NEXT: v_cmp_o_f64_e64 s[0:1], v[4:5], v[12:13]
-; GFX940-NEXT: v_cndmask_b32_e64 v3, v9, v3, s[2:3]
+; GFX940-NEXT: v_min_f64 v[16:17], v[0:1], v[8:9]
+; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[8:9]
+; GFX940-NEXT: v_min_f64 v[8:9], v[2:3], v[10:11]
; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e64 v9, v18, v8, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v8, v12, v4, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v8, 0, v8, s[0:1]
-; GFX940-NEXT: v_cmp_class_f64_e64 vcc, v[4:5], 32
-; GFX940-NEXT: v_cmp_class_f64_e64 s[0:1], v[12:13], 32
-; GFX940-NEXT: v_cmp_eq_f64_e64 s[2:3], 0, v[8:9]
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v8, v4, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v4, v4, v12, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v5, v9, v5, vcc
-; GFX940-NEXT: v_cmp_lt_f64_e32 vcc, v[6:7], v[14:15]
-; GFX940-NEXT: v_cndmask_b32_e64 v4, v8, v4, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v5, v5, v13, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v8, v15, v7, vcc
-; GFX940-NEXT: v_cmp_o_f64_e64 s[0:1], v[6:7], v[14:15]
-; GFX940-NEXT: v_cndmask_b32_e64 v5, v9, v5, s[2:3]
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e64 v9, v18, v8, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v8, v14, v6, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v8, 0, v8, s[0:1]
-; GFX940-NEXT: v_cmp_class_f64_e64 vcc, v[6:7], 32
-; GFX940-NEXT: v_cmp_class_f64_e64 s[0:1], v[14:15], 32
-; GFX940-NEXT: v_cmp_eq_f64_e64 s[2:3], 0, v[8:9]
-; GFX940-NEXT: v_cndmask_b32_e32 v6, v8, v6, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v7, v9, v7, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v6, v6, v14, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v7, v7, v15, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v6, v8, v6, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v7, v9, v7, s[2:3]
+; GFX940-NEXT: v_cndmask_b32_e64 v0, v16, 0, vcc
+; GFX940-NEXT: v_mov_b32_e32 v16, 0x7ff80000
+; GFX940-NEXT: v_cndmask_b32_e32 v1, v17, v16, vcc
+; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[10:11]
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e64 v2, v8, 0, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v3, v9, v16, vcc
+; GFX940-NEXT: v_min_f64 v[8:9], v[4:5], v[12:13]
+; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[4:5], v[12:13]
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e64 v4, v8, 0, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v5, v9, v16, vcc
+; GFX940-NEXT: v_min_f64 v[8:9], v[6:7], v[14:15]
+; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[6:7], v[14:15]
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e64 v6, v8, 0, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v7, v9, v16, vcc
; GFX940-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimum_v4f64:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_cmp_lt_f64_e32 vcc_lo, v[0:1], v[8:9]
-; GFX10-NEXT: v_cmp_lt_f64_e64 s4, v[2:3], v[10:11]
-; GFX10-NEXT: v_cmp_o_f64_e64 s5, v[0:1], v[8:9]
-; GFX10-NEXT: v_cmp_lt_f64_e64 s6, v[4:5], v[12:13]
-; GFX10-NEXT: v_cmp_o_f64_e64 s7, v[2:3], v[10:11]
-; GFX10-NEXT: v_cmp_lt_f64_e64 s8, v[6:7], v[14:15]
-; GFX10-NEXT: v_cmp_o_f64_e64 s9, v[4:5], v[12:13]
-; GFX10-NEXT: v_cmp_o_f64_e64 s10, v[6:7], v[14:15]
-; GFX10-NEXT: v_cmp_class_f64_e64 s11, v[14:15], 32
-; GFX10-NEXT: v_cndmask_b32_e32 v16, v9, v1, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v18, v11, v3, s4
-; GFX10-NEXT: v_cndmask_b32_e32 v19, v8, v0, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v21, v10, v2, s4
-; GFX10-NEXT: v_cndmask_b32_e64 v20, v13, v5, s6
-; GFX10-NEXT: v_cndmask_b32_e64 v22, v15, v7, s8
-; GFX10-NEXT: v_cndmask_b32_e64 v17, 0x7ff80000, v16, s5
-; GFX10-NEXT: v_cndmask_b32_e64 v16, 0, v19, s5
-; GFX10-NEXT: v_cndmask_b32_e64 v19, 0x7ff80000, v18, s7
-; GFX10-NEXT: v_cndmask_b32_e64 v23, v12, v4, s6
-; GFX10-NEXT: v_cndmask_b32_e64 v18, 0, v21, s7
-; GFX10-NEXT: v_cmp_class_f64_e64 vcc_lo, v[0:1], 32
-; GFX10-NEXT: v_cndmask_b32_e64 v24, v14, v6, s8
-; GFX10-NEXT: v_cmp_class_f64_e64 s4, v[2:3], 32
-; GFX10-NEXT: v_cmp_class_f64_e64 s7, v[4:5], 32
-; GFX10-NEXT: v_cmp_class_f64_e64 s8, v[6:7], 32
-; GFX10-NEXT: v_cmp_class_f64_e64 s5, v[8:9], 32
-; GFX10-NEXT: v_cndmask_b32_e64 v21, 0x7ff80000, v20, s9
-; GFX10-NEXT: v_cndmask_b32_e64 v20, 0, v23, s9
-; GFX10-NEXT: v_cndmask_b32_e64 v23, 0x7ff80000, v22, s10
-; GFX10-NEXT: v_cndmask_b32_e64 v22, 0, v24, s10
-; GFX10-NEXT: v_cmp_class_f64_e64 s9, v[10:11], 32
-; GFX10-NEXT: v_cmp_class_f64_e64 s10, v[12:13], 32
-; GFX10-NEXT: v_cmp_eq_f64_e64 s6, 0, v[16:17]
-; GFX10-NEXT: v_cmp_eq_f64_e64 s12, 0, v[18:19]
-; GFX10-NEXT: v_cmp_eq_f64_e64 s13, 0, v[20:21]
-; GFX10-NEXT: v_cmp_eq_f64_e64 s14, 0, v[22:23]
-; GFX10-NEXT: v_cndmask_b32_e32 v0, v16, v0, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e32 v1, v17, v1, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v2, v18, v2, s4
-; GFX10-NEXT: v_cndmask_b32_e64 v4, v20, v4, s7
-; GFX10-NEXT: v_cndmask_b32_e64 v6, v22, v6, s8
-; GFX10-NEXT: v_cndmask_b32_e64 v3, v19, v3, s4
-; GFX10-NEXT: v_cndmask_b32_e64 v5, v21, v5, s7
-; GFX10-NEXT: v_cndmask_b32_e64 v7, v23, v7, s8
-; GFX10-NEXT: v_cndmask_b32_e64 v0, v0, v8, s5
-; GFX10-NEXT: v_cndmask_b32_e64 v6, v6, v14, s11
-; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, v10, s9
-; GFX10-NEXT: v_cndmask_b32_e64 v4, v4, v12, s10
-; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, v9, s5
-; GFX10-NEXT: v_cndmask_b32_e64 v3, v3, v11, s9
-; GFX10-NEXT: v_cndmask_b32_e64 v5, v5, v13, s10
-; GFX10-NEXT: v_cndmask_b32_e64 v7, v7, v15, s11
-; GFX10-NEXT: v_cndmask_b32_e64 v0, v16, v0, s6
-; GFX10-NEXT: v_cndmask_b32_e64 v1, v17, v1, s6
-; GFX10-NEXT: v_cndmask_b32_e64 v2, v18, v2, s12
-; GFX10-NEXT: v_cndmask_b32_e64 v3, v19, v3, s12
-; GFX10-NEXT: v_cndmask_b32_e64 v4, v20, v4, s13
-; GFX10-NEXT: v_cndmask_b32_e64 v5, v21, v5, s13
-; GFX10-NEXT: v_cndmask_b32_e64 v6, v22, v6, s14
-; GFX10-NEXT: v_cndmask_b32_e64 v7, v23, v7, s14
+; GFX10-NEXT: v_min_f64 v[16:17], v[0:1], v[8:9]
+; GFX10-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[8:9]
+; GFX10-NEXT: v_min_f64 v[8:9], v[2:3], v[10:11]
+; GFX10-NEXT: v_cmp_u_f64_e64 s4, v[2:3], v[10:11]
+; GFX10-NEXT: v_min_f64 v[10:11], v[4:5], v[12:13]
+; GFX10-NEXT: v_cmp_u_f64_e64 s5, v[4:5], v[12:13]
+; GFX10-NEXT: v_min_f64 v[12:13], v[6:7], v[14:15]
+; GFX10-NEXT: v_cmp_u_f64_e64 s6, v[6:7], v[14:15]
+; GFX10-NEXT: v_cndmask_b32_e64 v0, v16, 0, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e64 v1, v17, 0x7ff80000, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e64 v2, v8, 0, s4
+; GFX10-NEXT: v_cndmask_b32_e64 v3, v9, 0x7ff80000, s4
+; GFX10-NEXT: v_cndmask_b32_e64 v4, v10, 0, s5
+; GFX10-NEXT: v_cndmask_b32_e64 v5, v11, 0x7ff80000, s5
+; GFX10-NEXT: v_cndmask_b32_e64 v6, v12, 0, s6
+; GFX10-NEXT: v_cndmask_b32_e64 v7, v13, 0x7ff80000, s6
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_minimum_v4f64:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_cmp_lt_f64_e32 vcc_lo, v[0:1], v[8:9]
-; GFX11-NEXT: v_cmp_o_f64_e64 s0, v[0:1], v[8:9]
-; GFX11-NEXT: v_cmp_lt_f64_e64 s1, v[2:3], v[10:11]
-; GFX11-NEXT: v_cmp_lt_f64_e64 s2, v[4:5], v[12:13]
-; GFX11-NEXT: v_cmp_lt_f64_e64 s3, v[6:7], v[14:15]
-; GFX11-NEXT: v_cmp_o_f64_e64 s4, v[2:3], v[10:11]
-; GFX11-NEXT: v_cmp_o_f64_e64 s5, v[4:5], v[12:13]
-; GFX11-NEXT: v_cmp_o_f64_e64 s6, v[6:7], v[14:15]
-; GFX11-NEXT: v_cndmask_b32_e32 v16, v9, v1, vcc_lo
-; GFX11-NEXT: v_cndmask_b32_e64 v18, v11, v3, s1
-; GFX11-NEXT: v_cndmask_b32_e64 v20, v13, v5, s2
-; GFX11-NEXT: v_cndmask_b32_e64 v22, v15, v7, s3
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4)
-; GFX11-NEXT: v_cndmask_b32_e64 v17, 0x7ff80000, v16, s0
-; GFX11-NEXT: v_cndmask_b32_e32 v16, v8, v0, vcc_lo
-; GFX11-NEXT: v_cmp_class_f64_e64 vcc_lo, v[0:1], 32
-; GFX11-NEXT: v_cndmask_b32_e64 v19, 0x7ff80000, v18, s4
-; GFX11-NEXT: v_cndmask_b32_e64 v21, 0x7ff80000, v20, s5
-; GFX11-NEXT: v_cndmask_b32_e64 v18, v10, v2, s1
-; GFX11-NEXT: v_cndmask_b32_e64 v20, v12, v4, s2
-; GFX11-NEXT: v_cndmask_b32_e64 v24, v14, v6, s3
-; GFX11-NEXT: v_cmp_class_f64_e64 s1, v[4:5], 32
-; GFX11-NEXT: v_cmp_class_f64_e64 s3, v[6:7], 32
-; GFX11-NEXT: v_cndmask_b32_e64 v23, 0x7ff80000, v22, s6
-; GFX11-NEXT: v_cndmask_b32_e64 v18, 0, v18, s4
-; GFX11-NEXT: v_cndmask_b32_e64 v20, 0, v20, s5
-; GFX11-NEXT: v_cndmask_b32_e64 v22, 0, v24, s6
-; GFX11-NEXT: v_cmp_class_f64_e64 s2, v[12:13], 32
-; GFX11-NEXT: v_cmp_class_f64_e64 s4, v[14:15], 32
-; GFX11-NEXT: v_cmp_class_f64_e64 s5, v[8:9], 32
-; GFX11-NEXT: v_cmp_class_f64_e64 s6, v[10:11], 32
-; GFX11-NEXT: v_cmp_eq_f64_e64 s8, 0, v[18:19]
-; GFX11-NEXT: v_cmp_eq_f64_e64 s9, 0, v[20:21]
-; GFX11-NEXT: v_cmp_eq_f64_e64 s10, 0, v[22:23]
-; GFX11-NEXT: v_cndmask_b32_e32 v1, v17, v1, vcc_lo
-; GFX11-NEXT: v_cndmask_b32_e64 v16, 0, v16, s0
-; GFX11-NEXT: v_cmp_class_f64_e64 s0, v[2:3], 32
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX11-NEXT: v_cmp_eq_f64_e64 s7, 0, v[16:17]
-; GFX11-NEXT: v_cndmask_b32_e32 v0, v16, v0, vcc_lo
-; GFX11-NEXT: v_cndmask_b32_e64 v4, v20, v4, s1
-; GFX11-NEXT: v_cndmask_b32_e64 v6, v22, v6, s3
-; GFX11-NEXT: v_cndmask_b32_e64 v5, v21, v5, s1
-; GFX11-NEXT: v_cndmask_b32_e64 v7, v23, v7, s3
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4)
-; GFX11-NEXT: v_cndmask_b32_e64 v4, v4, v12, s2
-; GFX11-NEXT: v_cndmask_b32_e64 v0, v0, v8, s5
-; GFX11-NEXT: v_cndmask_b32_e64 v6, v6, v14, s4
-; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, v9, s5
-; GFX11-NEXT: v_cndmask_b32_e64 v5, v5, v13, s2
-; GFX11-NEXT: v_cndmask_b32_e64 v7, v7, v15, s4
-; GFX11-NEXT: v_cndmask_b32_e64 v4, v20, v4, s9
-; GFX11-NEXT: v_cndmask_b32_e64 v6, v22, v6, s10
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
-; GFX11-NEXT: v_cndmask_b32_e64 v5, v21, v5, s9
-; GFX11-NEXT: v_cndmask_b32_e64 v7, v23, v7, s10
-; GFX11-NEXT: v_cndmask_b32_e64 v2, v18, v2, s0
-; GFX11-NEXT: v_cndmask_b32_e64 v3, v19, v3, s0
-; GFX11-NEXT: v_cndmask_b32_e64 v0, v16, v0, s7
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, v10, s6
-; GFX11-NEXT: v_cndmask_b32_e64 v3, v3, v11, s6
-; GFX11-NEXT: v_cndmask_b32_e64 v1, v17, v1, s7
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NEXT: v_cndmask_b32_e64 v2, v18, v2, s8
-; GFX11-NEXT: v_cndmask_b32_e64 v3, v19, v3, s8
+; GFX11-NEXT: v_min_f64 v[16:17], v[0:1], v[8:9]
+; GFX11-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[8:9]
+; GFX11-NEXT: v_min_f64 v[8:9], v[2:3], v[10:11]
+; GFX11-NEXT: v_cmp_u_f64_e64 s0, v[2:3], v[10:11]
+; GFX11-NEXT: v_min_f64 v[10:11], v[4:5], v[12:13]
+; GFX11-NEXT: v_cmp_u_f64_e64 s1, v[4:5], v[12:13]
+; GFX11-NEXT: v_min_f64 v[12:13], v[6:7], v[14:15]
+; GFX11-NEXT: v_cmp_u_f64_e64 s2, v[6:7], v[14:15]
+; GFX11-NEXT: v_cndmask_b32_e64 v0, v16, 0, vcc_lo
+; GFX11-NEXT: v_cndmask_b32_e64 v1, v17, 0x7ff80000, vcc_lo
+; GFX11-NEXT: v_cndmask_b32_e64 v2, v8, 0, s0
+; GFX11-NEXT: v_cndmask_b32_e64 v3, v9, 0x7ff80000, s0
+; GFX11-NEXT: v_cndmask_b32_e64 v4, v10, 0, s1
+; GFX11-NEXT: v_cndmask_b32_e64 v5, v11, 0x7ff80000, s1
+; GFX11-NEXT: v_cndmask_b32_e64 v6, v12, 0, s2
+; GFX11-NEXT: v_cndmask_b32_e64 v7, v13, 0x7ff80000, s2
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_minimum_v4f64:
@@ -2819,320 +1508,55 @@ define <4 x double> @v_minimum_v4f64__nnan(<4 x double> %src0, <4 x double> %src
; GFX7-LABEL: v_minimum_v4f64__nnan:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[8:9]
-; GFX7-NEXT: v_cmp_class_f64_e64 s[4:5], v[0:1], 32
-; GFX7-NEXT: v_cmp_class_f64_e64 s[6:7], v[8:9], 32
-; GFX7-NEXT: v_cmp_class_f64_e64 s[10:11], v[10:11], 32
-; GFX7-NEXT: v_cndmask_b32_e32 v17, v9, v1, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v16, v8, v0, vcc
-; GFX7-NEXT: v_cmp_lt_f64_e32 vcc, v[2:3], v[10:11]
-; GFX7-NEXT: v_cndmask_b32_e64 v0, v16, v0, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v1, v17, v1, s[4:5]
-; GFX7-NEXT: v_cmp_lt_f64_e64 s[4:5], v[4:5], v[12:13]
-; GFX7-NEXT: v_cmp_eq_f64_e64 s[8:9], 0, v[16:17]
-; GFX7-NEXT: v_cndmask_b32_e64 v0, v0, v8, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v1, v1, v9, s[6:7]
-; GFX7-NEXT: v_cmp_class_f64_e64 s[6:7], v[12:13], 32
-; GFX7-NEXT: v_cndmask_b32_e32 v19, v11, v3, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v18, v10, v2, vcc
-; GFX7-NEXT: v_cmp_class_f64_e64 vcc, v[2:3], 32
-; GFX7-NEXT: v_cmp_eq_f64_e64 s[12:13], 0, v[18:19]
-; GFX7-NEXT: v_cndmask_b32_e64 v9, v13, v5, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v8, v12, v4, s[4:5]
-; GFX7-NEXT: v_cmp_class_f64_e64 s[4:5], v[4:5], 32
-; GFX7-NEXT: v_cndmask_b32_e64 v0, v16, v0, s[8:9]
-; GFX7-NEXT: v_cndmask_b32_e64 v1, v17, v1, s[8:9]
-; GFX7-NEXT: v_cmp_eq_f64_e64 s[8:9], 0, v[8:9]
-; GFX7-NEXT: v_cndmask_b32_e32 v2, v18, v2, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v3, v19, v3, vcc
-; GFX7-NEXT: v_cmp_lt_f64_e32 vcc, v[6:7], v[14:15]
-; GFX7-NEXT: v_cndmask_b32_e64 v2, v2, v10, s[10:11]
-; GFX7-NEXT: v_cndmask_b32_e64 v3, v3, v11, s[10:11]
-; GFX7-NEXT: v_cmp_class_f64_e64 s[10:11], v[14:15], 32
-; GFX7-NEXT: v_cndmask_b32_e64 v2, v18, v2, s[12:13]
-; GFX7-NEXT: v_cndmask_b32_e64 v3, v19, v3, s[12:13]
-; GFX7-NEXT: v_cndmask_b32_e64 v4, v8, v4, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v5, v9, v5, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e32 v11, v15, v7, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v10, v14, v6, vcc
-; GFX7-NEXT: v_cmp_class_f64_e64 vcc, v[6:7], 32
-; GFX7-NEXT: v_cmp_eq_f64_e64 s[12:13], 0, v[10:11]
-; GFX7-NEXT: v_cndmask_b32_e64 v4, v4, v12, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v5, v5, v13, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v4, v8, v4, s[8:9]
-; GFX7-NEXT: v_cndmask_b32_e64 v5, v9, v5, s[8:9]
-; GFX7-NEXT: v_cndmask_b32_e32 v6, v10, v6, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v7, v11, v7, vcc
-; GFX7-NEXT: v_cndmask_b32_e64 v6, v6, v14, s[10:11]
-; GFX7-NEXT: v_cndmask_b32_e64 v7, v7, v15, s[10:11]
-; GFX7-NEXT: v_cndmask_b32_e64 v6, v10, v6, s[12:13]
-; GFX7-NEXT: v_cndmask_b32_e64 v7, v11, v7, s[12:13]
+; GFX7-NEXT: v_min_f64 v[0:1], v[0:1], v[8:9]
+; GFX7-NEXT: v_min_f64 v[2:3], v[2:3], v[10:11]
+; GFX7-NEXT: v_min_f64 v[4:5], v[4:5], v[12:13]
+; GFX7-NEXT: v_min_f64 v[6:7], v[6:7], v[14:15]
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_minimum_v4f64__nnan:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[8:9]
-; GFX8-NEXT: v_cmp_class_f64_e64 s[4:5], v[0:1], 32
-; GFX8-NEXT: v_cmp_class_f64_e64 s[6:7], v[8:9], 32
-; GFX8-NEXT: v_cmp_class_f64_e64 s[10:11], v[10:11], 32
-; GFX8-NEXT: v_cndmask_b32_e32 v17, v9, v1, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v16, v8, v0, vcc
-; GFX8-NEXT: v_cmp_lt_f64_e32 vcc, v[2:3], v[10:11]
-; GFX8-NEXT: v_cndmask_b32_e64 v0, v16, v0, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v1, v17, v1, s[4:5]
-; GFX8-NEXT: v_cmp_lt_f64_e64 s[4:5], v[4:5], v[12:13]
-; GFX8-NEXT: v_cmp_eq_f64_e64 s[8:9], 0, v[16:17]
-; GFX8-NEXT: v_cndmask_b32_e64 v0, v0, v8, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v1, v1, v9, s[6:7]
-; GFX8-NEXT: v_cmp_class_f64_e64 s[6:7], v[12:13], 32
-; GFX8-NEXT: v_cndmask_b32_e32 v19, v11, v3, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v18, v10, v2, vcc
-; GFX8-NEXT: v_cmp_class_f64_e64 vcc, v[2:3], 32
-; GFX8-NEXT: v_cmp_eq_f64_e64 s[12:13], 0, v[18:19]
-; GFX8-NEXT: v_cndmask_b32_e64 v9, v13, v5, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v8, v12, v4, s[4:5]
-; GFX8-NEXT: v_cmp_class_f64_e64 s[4:5], v[4:5], 32
-; GFX8-NEXT: v_cndmask_b32_e64 v0, v16, v0, s[8:9]
-; GFX8-NEXT: v_cndmask_b32_e64 v1, v17, v1, s[8:9]
-; GFX8-NEXT: v_cmp_eq_f64_e64 s[8:9], 0, v[8:9]
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v18, v2, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v3, v19, v3, vcc
-; GFX8-NEXT: v_cmp_lt_f64_e32 vcc, v[6:7], v[14:15]
-; GFX8-NEXT: v_cndmask_b32_e64 v2, v2, v10, s[10:11]
-; GFX8-NEXT: v_cndmask_b32_e64 v3, v3, v11, s[10:11]
-; GFX8-NEXT: v_cmp_class_f64_e64 s[10:11], v[14:15], 32
-; GFX8-NEXT: v_cndmask_b32_e64 v2, v18, v2, s[12:13]
-; GFX8-NEXT: v_cndmask_b32_e64 v3, v19, v3, s[12:13]
-; GFX8-NEXT: v_cndmask_b32_e64 v4, v8, v4, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v5, v9, v5, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e32 v11, v15, v7, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v10, v14, v6, vcc
-; GFX8-NEXT: v_cmp_class_f64_e64 vcc, v[6:7], 32
-; GFX8-NEXT: v_cmp_eq_f64_e64 s[12:13], 0, v[10:11]
-; GFX8-NEXT: v_cndmask_b32_e64 v4, v4, v12, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v5, v5, v13, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v4, v8, v4, s[8:9]
-; GFX8-NEXT: v_cndmask_b32_e64 v5, v9, v5, s[8:9]
-; GFX8-NEXT: v_cndmask_b32_e32 v6, v10, v6, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v7, v11, v7, vcc
-; GFX8-NEXT: v_cndmask_b32_e64 v6, v6, v14, s[10:11]
-; GFX8-NEXT: v_cndmask_b32_e64 v7, v7, v15, s[10:11]
-; GFX8-NEXT: v_cndmask_b32_e64 v6, v10, v6, s[12:13]
-; GFX8-NEXT: v_cndmask_b32_e64 v7, v11, v7, s[12:13]
+; GFX8-NEXT: v_min_f64 v[0:1], v[0:1], v[8:9]
+; GFX8-NEXT: v_min_f64 v[2:3], v[2:3], v[10:11]
+; GFX8-NEXT: v_min_f64 v[4:5], v[4:5], v[12:13]
+; GFX8-NEXT: v_min_f64 v[6:7], v[6:7], v[14:15]
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_minimum_v4f64__nnan:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[8:9]
-; GFX9-NEXT: v_cmp_class_f64_e64 s[4:5], v[0:1], 32
-; GFX9-NEXT: v_cmp_class_f64_e64 s[6:7], v[8:9], 32
-; GFX9-NEXT: v_cmp_class_f64_e64 s[10:11], v[10:11], 32
-; GFX9-NEXT: v_cndmask_b32_e32 v17, v9, v1, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v16, v8, v0, vcc
-; GFX9-NEXT: v_cmp_lt_f64_e32 vcc, v[2:3], v[10:11]
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v16, v0, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v1, v17, v1, s[4:5]
-; GFX9-NEXT: v_cmp_lt_f64_e64 s[4:5], v[4:5], v[12:13]
-; GFX9-NEXT: v_cmp_eq_f64_e64 s[8:9], 0, v[16:17]
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v0, v8, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v1, v1, v9, s[6:7]
-; GFX9-NEXT: v_cmp_class_f64_e64 s[6:7], v[12:13], 32
-; GFX9-NEXT: v_cndmask_b32_e32 v19, v11, v3, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v18, v10, v2, vcc
-; GFX9-NEXT: v_cmp_class_f64_e64 vcc, v[2:3], 32
-; GFX9-NEXT: v_cmp_eq_f64_e64 s[12:13], 0, v[18:19]
-; GFX9-NEXT: v_cndmask_b32_e64 v9, v13, v5, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v8, v12, v4, s[4:5]
-; GFX9-NEXT: v_cmp_class_f64_e64 s[4:5], v[4:5], 32
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v16, v0, s[8:9]
-; GFX9-NEXT: v_cndmask_b32_e64 v1, v17, v1, s[8:9]
-; GFX9-NEXT: v_cmp_eq_f64_e64 s[8:9], 0, v[8:9]
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v18, v2, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v19, v3, vcc
-; GFX9-NEXT: v_cmp_lt_f64_e32 vcc, v[6:7], v[14:15]
-; GFX9-NEXT: v_cndmask_b32_e64 v2, v2, v10, s[10:11]
-; GFX9-NEXT: v_cndmask_b32_e64 v3, v3, v11, s[10:11]
-; GFX9-NEXT: v_cmp_class_f64_e64 s[10:11], v[14:15], 32
-; GFX9-NEXT: v_cndmask_b32_e64 v2, v18, v2, s[12:13]
-; GFX9-NEXT: v_cndmask_b32_e64 v3, v19, v3, s[12:13]
-; GFX9-NEXT: v_cndmask_b32_e64 v4, v8, v4, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v5, v9, v5, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e32 v11, v15, v7, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v10, v14, v6, vcc
-; GFX9-NEXT: v_cmp_class_f64_e64 vcc, v[6:7], 32
-; GFX9-NEXT: v_cmp_eq_f64_e64 s[12:13], 0, v[10:11]
-; GFX9-NEXT: v_cndmask_b32_e64 v4, v4, v12, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v5, v5, v13, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v4, v8, v4, s[8:9]
-; GFX9-NEXT: v_cndmask_b32_e64 v5, v9, v5, s[8:9]
-; GFX9-NEXT: v_cndmask_b32_e32 v6, v10, v6, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v7, v11, v7, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v6, v6, v14, s[10:11]
-; GFX9-NEXT: v_cndmask_b32_e64 v7, v7, v15, s[10:11]
-; GFX9-NEXT: v_cndmask_b32_e64 v6, v10, v6, s[12:13]
-; GFX9-NEXT: v_cndmask_b32_e64 v7, v11, v7, s[12:13]
+; GFX9-NEXT: v_min_f64 v[0:1], v[0:1], v[8:9]
+; GFX9-NEXT: v_min_f64 v[2:3], v[2:3], v[10:11]
+; GFX9-NEXT: v_min_f64 v[4:5], v[4:5], v[12:13]
+; GFX9-NEXT: v_min_f64 v[6:7], v[6:7], v[14:15]
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX940-LABEL: v_minimum_v4f64__nnan:
; GFX940: ; %bb.0:
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[8:9]
-; GFX940-NEXT: v_cmp_class_f64_e64 s[0:1], v[8:9], 32
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v17, v9, v1, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v8, v0, vcc
-; GFX940-NEXT: v_cmp_class_f64_e64 vcc, v[0:1], 32
-; GFX940-NEXT: v_cmp_eq_f64_e64 s[2:3], 0, v[16:17]
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v16, v0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v17, v1, vcc
-; GFX940-NEXT: v_cmp_lt_f64_e32 vcc, v[2:3], v[10:11]
-; GFX940-NEXT: v_cndmask_b32_e64 v0, v0, v8, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v1, v1, v9, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v9, v11, v3, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v8, v10, v2, vcc
-; GFX940-NEXT: v_cmp_class_f64_e64 vcc, v[2:3], 32
-; GFX940-NEXT: v_cmp_class_f64_e64 s[0:1], v[10:11], 32
-; GFX940-NEXT: v_cndmask_b32_e64 v0, v16, v0, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v9, v3, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v1, v17, v1, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v2, v2, v10, s[0:1]
-; GFX940-NEXT: v_cmp_eq_f64_e64 s[2:3], 0, v[8:9]
-; GFX940-NEXT: v_cndmask_b32_e64 v3, v3, v11, s[0:1]
-; GFX940-NEXT: v_cmp_lt_f64_e32 vcc, v[4:5], v[12:13]
-; GFX940-NEXT: v_cndmask_b32_e64 v2, v8, v2, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v3, v9, v3, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e32 v9, v13, v5, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v8, v12, v4, vcc
-; GFX940-NEXT: v_cmp_class_f64_e64 vcc, v[4:5], 32
-; GFX940-NEXT: v_cmp_class_f64_e64 s[0:1], v[12:13], 32
-; GFX940-NEXT: v_cmp_eq_f64_e64 s[2:3], 0, v[8:9]
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v8, v4, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v5, v9, v5, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v4, v4, v12, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v5, v5, v13, s[0:1]
-; GFX940-NEXT: v_cmp_lt_f64_e32 vcc, v[6:7], v[14:15]
-; GFX940-NEXT: v_cndmask_b32_e64 v4, v8, v4, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v5, v9, v5, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e32 v9, v15, v7, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v8, v14, v6, vcc
-; GFX940-NEXT: v_cmp_class_f64_e64 vcc, v[6:7], 32
-; GFX940-NEXT: v_cmp_class_f64_e64 s[0:1], v[14:15], 32
-; GFX940-NEXT: v_cmp_eq_f64_e64 s[2:3], 0, v[8:9]
-; GFX940-NEXT: v_cndmask_b32_e32 v6, v8, v6, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v7, v9, v7, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v6, v6, v14, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v7, v7, v15, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v6, v8, v6, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v7, v9, v7, s[2:3]
+; GFX940-NEXT: v_min_f64 v[0:1], v[0:1], v[8:9]
+; GFX940-NEXT: v_min_f64 v[2:3], v[2:3], v[10:11]
+; GFX940-NEXT: v_min_f64 v[4:5], v[4:5], v[12:13]
+; GFX940-NEXT: v_min_f64 v[6:7], v[6:7], v[14:15]
; GFX940-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimum_v4f64__nnan:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_cmp_lt_f64_e32 vcc_lo, v[0:1], v[8:9]
-; GFX10-NEXT: v_cmp_lt_f64_e64 s4, v[2:3], v[10:11]
-; GFX10-NEXT: v_cmp_lt_f64_e64 s5, v[4:5], v[12:13]
-; GFX10-NEXT: v_cmp_lt_f64_e64 s6, v[6:7], v[14:15]
-; GFX10-NEXT: v_cmp_class_f64_e64 s7, v[10:11], 32
-; GFX10-NEXT: v_cmp_class_f64_e64 s8, v[8:9], 32
-; GFX10-NEXT: v_cmp_class_f64_e64 s10, v[12:13], 32
-; GFX10-NEXT: v_cmp_class_f64_e64 s11, v[14:15], 32
-; GFX10-NEXT: v_cndmask_b32_e32 v17, v9, v1, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v19, v11, v3, s4
-; GFX10-NEXT: v_cndmask_b32_e64 v21, v13, v5, s5
-; GFX10-NEXT: v_cndmask_b32_e64 v23, v15, v7, s6
-; GFX10-NEXT: v_cndmask_b32_e32 v16, v8, v0, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v18, v10, v2, s4
-; GFX10-NEXT: v_cndmask_b32_e64 v20, v12, v4, s5
-; GFX10-NEXT: v_cmp_class_f64_e64 vcc_lo, v[4:5], 32
-; GFX10-NEXT: v_cndmask_b32_e64 v22, v14, v6, s6
-; GFX10-NEXT: v_cmp_class_f64_e64 s4, v[6:7], 32
-; GFX10-NEXT: v_cmp_class_f64_e64 s5, v[0:1], 32
-; GFX10-NEXT: v_cmp_class_f64_e64 s6, v[2:3], 32
-; GFX10-NEXT: v_cmp_eq_f64_e64 s9, 0, v[16:17]
-; GFX10-NEXT: v_cmp_eq_f64_e64 s12, 0, v[18:19]
-; GFX10-NEXT: v_cmp_eq_f64_e64 s13, 0, v[20:21]
-; GFX10-NEXT: v_cmp_eq_f64_e64 s14, 0, v[22:23]
-; GFX10-NEXT: v_cndmask_b32_e32 v4, v20, v4, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e32 v5, v21, v5, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v6, v22, v6, s4
-; GFX10-NEXT: v_cndmask_b32_e64 v0, v16, v0, s5
-; GFX10-NEXT: v_cndmask_b32_e64 v2, v18, v2, s6
-; GFX10-NEXT: v_cndmask_b32_e64 v1, v17, v1, s5
-; GFX10-NEXT: v_cndmask_b32_e64 v3, v19, v3, s6
-; GFX10-NEXT: v_cndmask_b32_e64 v7, v23, v7, s4
-; GFX10-NEXT: v_cndmask_b32_e64 v0, v0, v8, s8
-; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, v10, s7
-; GFX10-NEXT: v_cndmask_b32_e64 v4, v4, v12, s10
-; GFX10-NEXT: v_cndmask_b32_e64 v6, v6, v14, s11
-; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, v9, s8
-; GFX10-NEXT: v_cndmask_b32_e64 v3, v3, v11, s7
-; GFX10-NEXT: v_cndmask_b32_e64 v5, v5, v13, s10
-; GFX10-NEXT: v_cndmask_b32_e64 v7, v7, v15, s11
-; GFX10-NEXT: v_cndmask_b32_e64 v0, v16, v0, s9
-; GFX10-NEXT: v_cndmask_b32_e64 v2, v18, v2, s12
-; GFX10-NEXT: v_cndmask_b32_e64 v1, v17, v1, s9
-; GFX10-NEXT: v_cndmask_b32_e64 v3, v19, v3, s12
-; GFX10-NEXT: v_cndmask_b32_e64 v4, v20, v4, s13
-; GFX10-NEXT: v_cndmask_b32_e64 v5, v21, v5, s13
-; GFX10-NEXT: v_cndmask_b32_e64 v6, v22, v6, s14
-; GFX10-NEXT: v_cndmask_b32_e64 v7, v23, v7, s14
+; GFX10-NEXT: v_min_f64 v[0:1], v[0:1], v[8:9]
+; GFX10-NEXT: v_min_f64 v[2:3], v[2:3], v[10:11]
+; GFX10-NEXT: v_min_f64 v[4:5], v[4:5], v[12:13]
+; GFX10-NEXT: v_min_f64 v[6:7], v[6:7], v[14:15]
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_minimum_v4f64__nnan:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_cmp_lt_f64_e32 vcc_lo, v[0:1], v[8:9]
-; GFX11-NEXT: v_cmp_lt_f64_e64 s0, v[2:3], v[10:11]
-; GFX11-NEXT: v_cmp_lt_f64_e64 s1, v[4:5], v[12:13]
-; GFX11-NEXT: v_cmp_lt_f64_e64 s2, v[6:7], v[14:15]
-; GFX11-NEXT: v_cmp_class_f64_e64 s3, v[6:7], 32
-; GFX11-NEXT: v_cmp_class_f64_e64 s4, v[14:15], 32
-; GFX11-NEXT: v_cmp_class_f64_e64 s5, v[8:9], 32
-; GFX11-NEXT: v_cmp_class_f64_e64 s6, v[10:11], 32
-; GFX11-NEXT: v_dual_cndmask_b32 v17, v9, v1 :: v_dual_cndmask_b32 v16, v8, v0
-; GFX11-NEXT: v_cndmask_b32_e64 v19, v11, v3, s0
-; GFX11-NEXT: v_cndmask_b32_e64 v21, v13, v5, s1
-; GFX11-NEXT: v_cndmask_b32_e64 v18, v10, v2, s0
-; GFX11-NEXT: v_cndmask_b32_e64 v20, v12, v4, s1
-; GFX11-NEXT: v_cmp_class_f64_e64 vcc_lo, v[0:1], 32
-; GFX11-NEXT: v_cmp_class_f64_e64 s0, v[2:3], 32
-; GFX11-NEXT: v_cmp_class_f64_e64 s1, v[4:5], 32
-; GFX11-NEXT: v_cndmask_b32_e64 v23, v15, v7, s2
-; GFX11-NEXT: v_cndmask_b32_e64 v22, v14, v6, s2
-; GFX11-NEXT: v_cmp_class_f64_e64 s2, v[12:13], 32
-; GFX11-NEXT: v_cmp_eq_f64_e64 s7, 0, v[16:17]
-; GFX11-NEXT: v_cmp_eq_f64_e64 s8, 0, v[18:19]
-; GFX11-NEXT: v_cmp_eq_f64_e64 s9, 0, v[20:21]
-; GFX11-NEXT: v_cmp_eq_f64_e64 s10, 0, v[22:23]
-; GFX11-NEXT: v_cndmask_b32_e64 v6, v22, v6, s3
-; GFX11-NEXT: v_cndmask_b32_e64 v7, v23, v7, s3
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NEXT: v_cndmask_b32_e64 v6, v6, v14, s4
-; GFX11-NEXT: v_cndmask_b32_e64 v7, v7, v15, s4
-; GFX11-NEXT: v_dual_cndmask_b32 v0, v16, v0 :: v_dual_cndmask_b32 v1, v17, v1
-; GFX11-NEXT: v_cndmask_b32_e64 v2, v18, v2, s0
-; GFX11-NEXT: v_cndmask_b32_e64 v4, v20, v4, s1
-; GFX11-NEXT: v_cndmask_b32_e64 v3, v19, v3, s0
-; GFX11-NEXT: v_cndmask_b32_e64 v5, v21, v5, s1
-; GFX11-NEXT: v_cndmask_b32_e64 v0, v0, v8, s5
-; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, v10, s6
-; GFX11-NEXT: v_cndmask_b32_e64 v4, v4, v12, s2
-; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, v9, s5
-; GFX11-NEXT: v_cndmask_b32_e64 v3, v3, v11, s6
-; GFX11-NEXT: v_cndmask_b32_e64 v5, v5, v13, s2
-; GFX11-NEXT: v_cndmask_b32_e64 v0, v16, v0, s7
-; GFX11-NEXT: v_cndmask_b32_e64 v2, v18, v2, s8
-; GFX11-NEXT: v_cndmask_b32_e64 v4, v20, v4, s9
-; GFX11-NEXT: v_cndmask_b32_e64 v6, v22, v6, s10
-; GFX11-NEXT: v_cndmask_b32_e64 v1, v17, v1, s7
-; GFX11-NEXT: v_cndmask_b32_e64 v3, v19, v3, s8
-; GFX11-NEXT: v_cndmask_b32_e64 v5, v21, v5, s9
-; GFX11-NEXT: v_cndmask_b32_e64 v7, v23, v7, s10
+; GFX11-NEXT: v_min_f64 v[0:1], v[0:1], v[8:9]
+; GFX11-NEXT: v_min_f64 v[2:3], v[2:3], v[10:11]
+; GFX11-NEXT: v_min_f64 v[4:5], v[4:5], v[12:13]
+; GFX11-NEXT: v_min_f64 v[6:7], v[6:7], v[14:15]
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_minimum_v4f64__nnan:
@@ -3155,180 +1579,135 @@ define <4 x double> @v_minimum_v4f64__nsz(<4 x double> %src0, <4 x double> %src1
; GFX7-LABEL: v_minimum_v4f64__nsz:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[8:9]
-; GFX7-NEXT: v_cmp_o_f64_e64 s[4:5], v[0:1], v[8:9]
-; GFX7-NEXT: v_cmp_lt_f64_e64 s[6:7], v[4:5], v[12:13]
-; GFX7-NEXT: v_cmp_lt_f64_e64 s[10:11], v[6:7], v[14:15]
-; GFX7-NEXT: v_cmp_o_f64_e64 s[8:9], v[4:5], v[12:13]
-; GFX7-NEXT: v_cmp_o_f64_e64 s[12:13], v[6:7], v[14:15]
-; GFX7-NEXT: v_cndmask_b32_e32 v16, v8, v0, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v9, v1, vcc
-; GFX7-NEXT: v_mov_b32_e32 v8, 0x7ff80000
-; GFX7-NEXT: v_cmp_lt_f64_e32 vcc, v[2:3], v[10:11]
-; GFX7-NEXT: v_cndmask_b32_e64 v0, 0, v16, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v1, v8, v1, s[4:5]
-; GFX7-NEXT: v_cmp_o_f64_e64 s[4:5], v[2:3], v[10:11]
-; GFX7-NEXT: v_cndmask_b32_e64 v5, v13, v5, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v7, v15, v7, s[10:11]
-; GFX7-NEXT: v_cndmask_b32_e64 v5, v8, v5, s[8:9]
-; GFX7-NEXT: v_cndmask_b32_e64 v7, v8, v7, s[12:13]
-; GFX7-NEXT: v_cndmask_b32_e32 v9, v10, v2, vcc
-; GFX7-NEXT: v_cndmask_b32_e64 v10, v14, v6, s[10:11]
-; GFX7-NEXT: v_cndmask_b32_e32 v3, v11, v3, vcc
-; GFX7-NEXT: v_cndmask_b32_e64 v2, 0, v9, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v9, v12, v4, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v3, v8, v3, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v4, 0, v9, s[8:9]
-; GFX7-NEXT: v_cndmask_b32_e64 v6, 0, v10, s[12:13]
+; GFX7-NEXT: v_min_f64 v[16:17], v[0:1], v[8:9]
+; GFX7-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[8:9]
+; GFX7-NEXT: v_min_f64 v[8:9], v[2:3], v[10:11]
+; GFX7-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[10:11]
+; GFX7-NEXT: v_min_f64 v[10:11], v[4:5], v[12:13]
+; GFX7-NEXT: v_cmp_u_f64_e64 s[6:7], v[4:5], v[12:13]
+; GFX7-NEXT: v_min_f64 v[12:13], v[6:7], v[14:15]
+; GFX7-NEXT: v_cmp_u_f64_e64 s[8:9], v[6:7], v[14:15]
+; GFX7-NEXT: v_mov_b32_e32 v7, 0x7ff80000
+; GFX7-NEXT: v_cndmask_b32_e64 v0, v16, 0, vcc
+; GFX7-NEXT: v_cndmask_b32_e32 v1, v17, v7, vcc
+; GFX7-NEXT: v_cndmask_b32_e64 v2, v8, 0, s[4:5]
+; GFX7-NEXT: v_cndmask_b32_e64 v3, v9, v7, s[4:5]
+; GFX7-NEXT: v_cndmask_b32_e64 v4, v10, 0, s[6:7]
+; GFX7-NEXT: v_cndmask_b32_e64 v5, v11, v7, s[6:7]
+; GFX7-NEXT: v_cndmask_b32_e64 v6, v12, 0, s[8:9]
+; GFX7-NEXT: v_cndmask_b32_e64 v7, v13, v7, s[8:9]
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_minimum_v4f64__nsz:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[8:9]
-; GFX8-NEXT: v_cmp_o_f64_e64 s[4:5], v[0:1], v[8:9]
-; GFX8-NEXT: v_cmp_lt_f64_e64 s[6:7], v[4:5], v[12:13]
-; GFX8-NEXT: v_cmp_lt_f64_e64 s[10:11], v[6:7], v[14:15]
-; GFX8-NEXT: v_cmp_o_f64_e64 s[8:9], v[4:5], v[12:13]
-; GFX8-NEXT: v_cmp_o_f64_e64 s[12:13], v[6:7], v[14:15]
-; GFX8-NEXT: v_cndmask_b32_e32 v16, v8, v0, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v9, v1, vcc
-; GFX8-NEXT: v_mov_b32_e32 v8, 0x7ff80000
-; GFX8-NEXT: v_cmp_lt_f64_e32 vcc, v[2:3], v[10:11]
-; GFX8-NEXT: v_cndmask_b32_e64 v0, 0, v16, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v1, v8, v1, s[4:5]
-; GFX8-NEXT: v_cmp_o_f64_e64 s[4:5], v[2:3], v[10:11]
-; GFX8-NEXT: v_cndmask_b32_e64 v5, v13, v5, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v7, v15, v7, s[10:11]
-; GFX8-NEXT: v_cndmask_b32_e64 v5, v8, v5, s[8:9]
-; GFX8-NEXT: v_cndmask_b32_e64 v7, v8, v7, s[12:13]
-; GFX8-NEXT: v_cndmask_b32_e32 v9, v10, v2, vcc
-; GFX8-NEXT: v_cndmask_b32_e64 v10, v14, v6, s[10:11]
-; GFX8-NEXT: v_cndmask_b32_e32 v3, v11, v3, vcc
-; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, v9, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v9, v12, v4, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v3, v8, v3, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v4, 0, v9, s[8:9]
-; GFX8-NEXT: v_cndmask_b32_e64 v6, 0, v10, s[12:13]
+; GFX8-NEXT: v_min_f64 v[16:17], v[0:1], v[8:9]
+; GFX8-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[8:9]
+; GFX8-NEXT: v_min_f64 v[8:9], v[2:3], v[10:11]
+; GFX8-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[10:11]
+; GFX8-NEXT: v_min_f64 v[10:11], v[4:5], v[12:13]
+; GFX8-NEXT: v_cmp_u_f64_e64 s[6:7], v[4:5], v[12:13]
+; GFX8-NEXT: v_min_f64 v[12:13], v[6:7], v[14:15]
+; GFX8-NEXT: v_cmp_u_f64_e64 s[8:9], v[6:7], v[14:15]
+; GFX8-NEXT: v_mov_b32_e32 v7, 0x7ff80000
+; GFX8-NEXT: v_cndmask_b32_e64 v0, v16, 0, vcc
+; GFX8-NEXT: v_cndmask_b32_e32 v1, v17, v7, vcc
+; GFX8-NEXT: v_cndmask_b32_e64 v2, v8, 0, s[4:5]
+; GFX8-NEXT: v_cndmask_b32_e64 v3, v9, v7, s[4:5]
+; GFX8-NEXT: v_cndmask_b32_e64 v4, v10, 0, s[6:7]
+; GFX8-NEXT: v_cndmask_b32_e64 v5, v11, v7, s[6:7]
+; GFX8-NEXT: v_cndmask_b32_e64 v6, v12, 0, s[8:9]
+; GFX8-NEXT: v_cndmask_b32_e64 v7, v13, v7, s[8:9]
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_minimum_v4f64__nsz:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[8:9]
-; GFX9-NEXT: v_cmp_o_f64_e64 s[4:5], v[0:1], v[8:9]
-; GFX9-NEXT: v_cmp_lt_f64_e64 s[6:7], v[4:5], v[12:13]
-; GFX9-NEXT: v_cmp_lt_f64_e64 s[10:11], v[6:7], v[14:15]
-; GFX9-NEXT: v_cmp_o_f64_e64 s[8:9], v[4:5], v[12:13]
-; GFX9-NEXT: v_cmp_o_f64_e64 s[12:13], v[6:7], v[14:15]
-; GFX9-NEXT: v_cndmask_b32_e32 v16, v8, v0, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v9, v1, vcc
-; GFX9-NEXT: v_mov_b32_e32 v8, 0x7ff80000
-; GFX9-NEXT: v_cmp_lt_f64_e32 vcc, v[2:3], v[10:11]
-; GFX9-NEXT: v_cndmask_b32_e64 v0, 0, v16, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v1, v8, v1, s[4:5]
-; GFX9-NEXT: v_cmp_o_f64_e64 s[4:5], v[2:3], v[10:11]
-; GFX9-NEXT: v_cndmask_b32_e64 v5, v13, v5, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v7, v15, v7, s[10:11]
-; GFX9-NEXT: v_cndmask_b32_e64 v5, v8, v5, s[8:9]
-; GFX9-NEXT: v_cndmask_b32_e64 v7, v8, v7, s[12:13]
-; GFX9-NEXT: v_cndmask_b32_e32 v9, v10, v2, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v10, v14, v6, s[10:11]
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v11, v3, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, v9, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v9, v12, v4, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v3, v8, v3, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v4, 0, v9, s[8:9]
-; GFX9-NEXT: v_cndmask_b32_e64 v6, 0, v10, s[12:13]
+; GFX9-NEXT: v_min_f64 v[16:17], v[0:1], v[8:9]
+; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[8:9]
+; GFX9-NEXT: v_min_f64 v[8:9], v[2:3], v[10:11]
+; GFX9-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[10:11]
+; GFX9-NEXT: v_min_f64 v[10:11], v[4:5], v[12:13]
+; GFX9-NEXT: v_cmp_u_f64_e64 s[6:7], v[4:5], v[12:13]
+; GFX9-NEXT: v_min_f64 v[12:13], v[6:7], v[14:15]
+; GFX9-NEXT: v_cmp_u_f64_e64 s[8:9], v[6:7], v[14:15]
+; GFX9-NEXT: v_mov_b32_e32 v7, 0x7ff80000
+; GFX9-NEXT: v_cndmask_b32_e64 v0, v16, 0, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v17, v7, vcc
+; GFX9-NEXT: v_cndmask_b32_e64 v2, v8, 0, s[4:5]
+; GFX9-NEXT: v_cndmask_b32_e64 v3, v9, v7, s[4:5]
+; GFX9-NEXT: v_cndmask_b32_e64 v4, v10, 0, s[6:7]
+; GFX9-NEXT: v_cndmask_b32_e64 v5, v11, v7, s[6:7]
+; GFX9-NEXT: v_cndmask_b32_e64 v6, v12, 0, s[8:9]
+; GFX9-NEXT: v_cndmask_b32_e64 v7, v13, v7, s[8:9]
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX940-LABEL: v_minimum_v4f64__nsz:
; GFX940: ; %bb.0:
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[8:9]
-; GFX940-NEXT: v_cmp_o_f64_e64 s[0:1], v[0:1], v[8:9]
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v8, v0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v9, v1, vcc
-; GFX940-NEXT: v_mov_b32_e32 v8, 0x7ff80000
-; GFX940-NEXT: v_cmp_lt_f64_e32 vcc, v[2:3], v[10:11]
-; GFX940-NEXT: v_cndmask_b32_e64 v0, 0, v16, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v1, v8, v1, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v9, v10, v2, vcc
-; GFX940-NEXT: v_cmp_o_f64_e64 s[0:1], v[2:3], v[10:11]
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v11, v3, vcc
-; GFX940-NEXT: v_cmp_lt_f64_e32 vcc, v[4:5], v[12:13]
-; GFX940-NEXT: v_cndmask_b32_e64 v2, 0, v9, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v3, v8, v3, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v9, v12, v4, vcc
-; GFX940-NEXT: v_cmp_o_f64_e64 s[0:1], v[4:5], v[12:13]
-; GFX940-NEXT: v_cndmask_b32_e32 v5, v13, v5, vcc
-; GFX940-NEXT: v_cmp_lt_f64_e32 vcc, v[6:7], v[14:15]
-; GFX940-NEXT: v_cndmask_b32_e64 v4, 0, v9, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v5, v8, v5, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v9, v14, v6, vcc
-; GFX940-NEXT: v_cmp_o_f64_e64 s[0:1], v[6:7], v[14:15]
-; GFX940-NEXT: v_cndmask_b32_e32 v7, v15, v7, vcc
+; GFX940-NEXT: v_min_f64 v[16:17], v[0:1], v[8:9]
+; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[8:9]
+; GFX940-NEXT: v_min_f64 v[8:9], v[2:3], v[10:11]
; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e64 v6, 0, v9, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v7, v8, v7, s[0:1]
+; GFX940-NEXT: v_cndmask_b32_e64 v0, v16, 0, vcc
+; GFX940-NEXT: v_mov_b32_e32 v16, 0x7ff80000
+; GFX940-NEXT: v_cndmask_b32_e32 v1, v17, v16, vcc
+; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[10:11]
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e64 v2, v8, 0, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v3, v9, v16, vcc
+; GFX940-NEXT: v_min_f64 v[8:9], v[4:5], v[12:13]
+; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[4:5], v[12:13]
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e64 v4, v8, 0, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v5, v9, v16, vcc
+; GFX940-NEXT: v_min_f64 v[8:9], v[6:7], v[14:15]
+; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[6:7], v[14:15]
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e64 v6, v8, 0, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v7, v9, v16, vcc
; GFX940-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimum_v4f64__nsz:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_cmp_lt_f64_e32 vcc_lo, v[0:1], v[8:9]
-; GFX10-NEXT: v_cmp_lt_f64_e64 s4, v[2:3], v[10:11]
-; GFX10-NEXT: v_cmp_lt_f64_e64 s5, v[4:5], v[12:13]
-; GFX10-NEXT: v_cmp_lt_f64_e64 s7, v[6:7], v[14:15]
-; GFX10-NEXT: v_cmp_o_f64_e64 s6, v[0:1], v[8:9]
-; GFX10-NEXT: v_cmp_o_f64_e64 s8, v[2:3], v[10:11]
-; GFX10-NEXT: v_cmp_o_f64_e64 s9, v[4:5], v[12:13]
-; GFX10-NEXT: v_cmp_o_f64_e64 s10, v[6:7], v[14:15]
-; GFX10-NEXT: v_cndmask_b32_e32 v16, v8, v0, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v8, v10, v2, s4
-; GFX10-NEXT: v_cndmask_b32_e64 v10, v12, v4, s5
-; GFX10-NEXT: v_cndmask_b32_e64 v12, v14, v6, s7
-; GFX10-NEXT: v_cndmask_b32_e32 v1, v9, v1, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v3, v11, v3, s4
-; GFX10-NEXT: v_cndmask_b32_e64 v5, v13, v5, s5
-; GFX10-NEXT: v_cndmask_b32_e64 v7, v15, v7, s7
-; GFX10-NEXT: v_cndmask_b32_e64 v0, 0, v16, s6
-; GFX10-NEXT: v_cndmask_b32_e64 v1, 0x7ff80000, v1, s6
-; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, v8, s8
-; GFX10-NEXT: v_cndmask_b32_e64 v3, 0x7ff80000, v3, s8
-; GFX10-NEXT: v_cndmask_b32_e64 v4, 0, v10, s9
-; GFX10-NEXT: v_cndmask_b32_e64 v5, 0x7ff80000, v5, s9
-; GFX10-NEXT: v_cndmask_b32_e64 v6, 0, v12, s10
-; GFX10-NEXT: v_cndmask_b32_e64 v7, 0x7ff80000, v7, s10
+; GFX10-NEXT: v_min_f64 v[16:17], v[0:1], v[8:9]
+; GFX10-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[8:9]
+; GFX10-NEXT: v_min_f64 v[8:9], v[2:3], v[10:11]
+; GFX10-NEXT: v_cmp_u_f64_e64 s4, v[2:3], v[10:11]
+; GFX10-NEXT: v_min_f64 v[10:11], v[4:5], v[12:13]
+; GFX10-NEXT: v_cmp_u_f64_e64 s5, v[4:5], v[12:13]
+; GFX10-NEXT: v_min_f64 v[12:13], v[6:7], v[14:15]
+; GFX10-NEXT: v_cmp_u_f64_e64 s6, v[6:7], v[14:15]
+; GFX10-NEXT: v_cndmask_b32_e64 v0, v16, 0, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e64 v1, v17, 0x7ff80000, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e64 v2, v8, 0, s4
+; GFX10-NEXT: v_cndmask_b32_e64 v3, v9, 0x7ff80000, s4
+; GFX10-NEXT: v_cndmask_b32_e64 v4, v10, 0, s5
+; GFX10-NEXT: v_cndmask_b32_e64 v5, v11, 0x7ff80000, s5
+; GFX10-NEXT: v_cndmask_b32_e64 v6, v12, 0, s6
+; GFX10-NEXT: v_cndmask_b32_e64 v7, v13, 0x7ff80000, s6
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_minimum_v4f64__nsz:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_cmp_lt_f64_e32 vcc_lo, v[0:1], v[8:9]
-; GFX11-NEXT: v_cmp_lt_f64_e64 s1, v[2:3], v[10:11]
-; GFX11-NEXT: v_cmp_lt_f64_e64 s2, v[4:5], v[12:13]
-; GFX11-NEXT: v_cmp_lt_f64_e64 s3, v[6:7], v[14:15]
-; GFX11-NEXT: v_cmp_o_f64_e64 s0, v[0:1], v[8:9]
-; GFX11-NEXT: v_cmp_o_f64_e64 s4, v[2:3], v[10:11]
-; GFX11-NEXT: v_cmp_o_f64_e64 s5, v[4:5], v[12:13]
-; GFX11-NEXT: v_cmp_o_f64_e64 s6, v[6:7], v[14:15]
-; GFX11-NEXT: v_dual_cndmask_b32 v16, v8, v0 :: v_dual_cndmask_b32 v1, v9, v1
-; GFX11-NEXT: v_cndmask_b32_e64 v8, v10, v2, s1
-; GFX11-NEXT: v_cndmask_b32_e64 v10, v12, v4, s2
-; GFX11-NEXT: v_cndmask_b32_e64 v12, v14, v6, s3
-; GFX11-NEXT: v_cndmask_b32_e64 v3, v11, v3, s1
-; GFX11-NEXT: v_cndmask_b32_e64 v5, v13, v5, s2
-; GFX11-NEXT: v_cndmask_b32_e64 v7, v15, v7, s3
-; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, v16, s0
-; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, v8, s4
-; GFX11-NEXT: v_cndmask_b32_e64 v4, 0, v10, s5
-; GFX11-NEXT: v_cndmask_b32_e64 v6, 0, v12, s6
-; GFX11-NEXT: v_cndmask_b32_e64 v1, 0x7ff80000, v1, s0
-; GFX11-NEXT: v_cndmask_b32_e64 v3, 0x7ff80000, v3, s4
-; GFX11-NEXT: v_cndmask_b32_e64 v5, 0x7ff80000, v5, s5
-; GFX11-NEXT: v_cndmask_b32_e64 v7, 0x7ff80000, v7, s6
+; GFX11-NEXT: v_min_f64 v[16:17], v[0:1], v[8:9]
+; GFX11-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[8:9]
+; GFX11-NEXT: v_min_f64 v[8:9], v[2:3], v[10:11]
+; GFX11-NEXT: v_cmp_u_f64_e64 s0, v[2:3], v[10:11]
+; GFX11-NEXT: v_min_f64 v[10:11], v[4:5], v[12:13]
+; GFX11-NEXT: v_cmp_u_f64_e64 s1, v[4:5], v[12:13]
+; GFX11-NEXT: v_min_f64 v[12:13], v[6:7], v[14:15]
+; GFX11-NEXT: v_cmp_u_f64_e64 s2, v[6:7], v[14:15]
+; GFX11-NEXT: v_cndmask_b32_e64 v0, v16, 0, vcc_lo
+; GFX11-NEXT: v_cndmask_b32_e64 v1, v17, 0x7ff80000, vcc_lo
+; GFX11-NEXT: v_cndmask_b32_e64 v2, v8, 0, s0
+; GFX11-NEXT: v_cndmask_b32_e64 v3, v9, 0x7ff80000, s0
+; GFX11-NEXT: v_cndmask_b32_e64 v4, v10, 0, s1
+; GFX11-NEXT: v_cndmask_b32_e64 v5, v11, 0x7ff80000, s1
+; GFX11-NEXT: v_cndmask_b32_e64 v6, v12, 0, s2
+; GFX11-NEXT: v_cndmask_b32_e64 v7, v13, 0x7ff80000, s2
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_minimum_v4f64__nsz:
@@ -3351,108 +1730,55 @@ define <4 x double> @v_minimum_v4f64__nnan_nsz(<4 x double> %src0, <4 x double>
; GFX7-LABEL: v_minimum_v4f64__nnan_nsz:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[8:9]
-; GFX7-NEXT: v_cmp_lt_f64_e64 s[4:5], v[2:3], v[10:11]
-; GFX7-NEXT: v_cmp_lt_f64_e64 s[6:7], v[4:5], v[12:13]
-; GFX7-NEXT: v_cmp_lt_f64_e64 s[8:9], v[6:7], v[14:15]
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v9, v1, vcc
-; GFX7-NEXT: v_cndmask_b32_e64 v2, v10, v2, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v3, v11, v3, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v4, v12, v4, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v5, v13, v5, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v6, v14, v6, s[8:9]
-; GFX7-NEXT: v_cndmask_b32_e64 v7, v15, v7, s[8:9]
+; GFX7-NEXT: v_min_f64 v[0:1], v[0:1], v[8:9]
+; GFX7-NEXT: v_min_f64 v[2:3], v[2:3], v[10:11]
+; GFX7-NEXT: v_min_f64 v[4:5], v[4:5], v[12:13]
+; GFX7-NEXT: v_min_f64 v[6:7], v[6:7], v[14:15]
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_minimum_v4f64__nnan_nsz:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[8:9]
-; GFX8-NEXT: v_cmp_lt_f64_e64 s[4:5], v[2:3], v[10:11]
-; GFX8-NEXT: v_cmp_lt_f64_e64 s[6:7], v[4:5], v[12:13]
-; GFX8-NEXT: v_cmp_lt_f64_e64 s[8:9], v[6:7], v[14:15]
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v9, v1, vcc
-; GFX8-NEXT: v_cndmask_b32_e64 v2, v10, v2, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v3, v11, v3, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v4, v12, v4, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v5, v13, v5, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v6, v14, v6, s[8:9]
-; GFX8-NEXT: v_cndmask_b32_e64 v7, v15, v7, s[8:9]
+; GFX8-NEXT: v_min_f64 v[0:1], v[0:1], v[8:9]
+; GFX8-NEXT: v_min_f64 v[2:3], v[2:3], v[10:11]
+; GFX8-NEXT: v_min_f64 v[4:5], v[4:5], v[12:13]
+; GFX8-NEXT: v_min_f64 v[6:7], v[6:7], v[14:15]
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_minimum_v4f64__nnan_nsz:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[8:9]
-; GFX9-NEXT: v_cmp_lt_f64_e64 s[4:5], v[2:3], v[10:11]
-; GFX9-NEXT: v_cmp_lt_f64_e64 s[6:7], v[4:5], v[12:13]
-; GFX9-NEXT: v_cmp_lt_f64_e64 s[8:9], v[6:7], v[14:15]
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v9, v1, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v2, v10, v2, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v3, v11, v3, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v4, v12, v4, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v5, v13, v5, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v6, v14, v6, s[8:9]
-; GFX9-NEXT: v_cndmask_b32_e64 v7, v15, v7, s[8:9]
+; GFX9-NEXT: v_min_f64 v[0:1], v[0:1], v[8:9]
+; GFX9-NEXT: v_min_f64 v[2:3], v[2:3], v[10:11]
+; GFX9-NEXT: v_min_f64 v[4:5], v[4:5], v[12:13]
+; GFX9-NEXT: v_min_f64 v[6:7], v[6:7], v[14:15]
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX940-LABEL: v_minimum_v4f64__nnan_nsz:
; GFX940: ; %bb.0:
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[8:9]
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v9, v1, vcc
-; GFX940-NEXT: v_cmp_lt_f64_e32 vcc, v[2:3], v[10:11]
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v10, v2, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v11, v3, vcc
-; GFX940-NEXT: v_cmp_lt_f64_e32 vcc, v[4:5], v[12:13]
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v12, v4, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v5, v13, v5, vcc
-; GFX940-NEXT: v_cmp_lt_f64_e32 vcc, v[6:7], v[14:15]
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v6, v14, v6, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v7, v15, v7, vcc
+; GFX940-NEXT: v_min_f64 v[0:1], v[0:1], v[8:9]
+; GFX940-NEXT: v_min_f64 v[2:3], v[2:3], v[10:11]
+; GFX940-NEXT: v_min_f64 v[4:5], v[4:5], v[12:13]
+; GFX940-NEXT: v_min_f64 v[6:7], v[6:7], v[14:15]
; GFX940-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimum_v4f64__nnan_nsz:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_cmp_lt_f64_e32 vcc_lo, v[0:1], v[8:9]
-; GFX10-NEXT: v_cmp_lt_f64_e64 s4, v[2:3], v[10:11]
-; GFX10-NEXT: v_cmp_lt_f64_e64 s5, v[4:5], v[12:13]
-; GFX10-NEXT: v_cmp_lt_f64_e64 s6, v[6:7], v[14:15]
-; GFX10-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v2, v10, v2, s4
-; GFX10-NEXT: v_cndmask_b32_e64 v4, v12, v4, s5
-; GFX10-NEXT: v_cndmask_b32_e64 v6, v14, v6, s6
-; GFX10-NEXT: v_cndmask_b32_e32 v1, v9, v1, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v3, v11, v3, s4
-; GFX10-NEXT: v_cndmask_b32_e64 v5, v13, v5, s5
-; GFX10-NEXT: v_cndmask_b32_e64 v7, v15, v7, s6
+; GFX10-NEXT: v_min_f64 v[0:1], v[0:1], v[8:9]
+; GFX10-NEXT: v_min_f64 v[2:3], v[2:3], v[10:11]
+; GFX10-NEXT: v_min_f64 v[4:5], v[4:5], v[12:13]
+; GFX10-NEXT: v_min_f64 v[6:7], v[6:7], v[14:15]
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_minimum_v4f64__nnan_nsz:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_cmp_lt_f64_e32 vcc_lo, v[0:1], v[8:9]
-; GFX11-NEXT: v_cmp_lt_f64_e64 s0, v[2:3], v[10:11]
-; GFX11-NEXT: v_cmp_lt_f64_e64 s1, v[4:5], v[12:13]
-; GFX11-NEXT: v_cmp_lt_f64_e64 s2, v[6:7], v[14:15]
-; GFX11-NEXT: v_dual_cndmask_b32 v0, v8, v0 :: v_dual_cndmask_b32 v1, v9, v1
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
-; GFX11-NEXT: v_cndmask_b32_e64 v2, v10, v2, s0
-; GFX11-NEXT: v_cndmask_b32_e64 v4, v12, v4, s1
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4)
-; GFX11-NEXT: v_cndmask_b32_e64 v6, v14, v6, s2
-; GFX11-NEXT: v_cndmask_b32_e64 v3, v11, v3, s0
-; GFX11-NEXT: v_cndmask_b32_e64 v5, v13, v5, s1
-; GFX11-NEXT: v_cndmask_b32_e64 v7, v15, v7, s2
+; GFX11-NEXT: v_min_f64 v[0:1], v[0:1], v[8:9]
+; GFX11-NEXT: v_min_f64 v[2:3], v[2:3], v[10:11]
+; GFX11-NEXT: v_min_f64 v[4:5], v[4:5], v[12:13]
+; GFX11-NEXT: v_min_f64 v[6:7], v[6:7], v[14:15]
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_minimum_v4f64__nnan_nsz:
@@ -3475,782 +1801,244 @@ define <8 x double> @v_minimum_v8f64(<8 x double> %src0, <8 x double> %src1) {
; GFX7-LABEL: v_minimum_v8f64:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[16:17]
-; GFX7-NEXT: v_cmp_o_f64_e64 s[4:5], v[0:1], v[16:17]
-; GFX7-NEXT: v_mov_b32_e32 v32, 0x7ff80000
-; GFX7-NEXT: v_cmp_o_f64_e64 s[6:7], v[4:5], v[20:21]
-; GFX7-NEXT: v_cmp_o_f64_e64 s[8:9], v[8:9], v[24:25]
-; GFX7-NEXT: v_cmp_lt_f64_e64 s[10:11], v[12:13], v[28:29]
-; GFX7-NEXT: v_cmp_o_f64_e64 s[12:13], v[12:13], v[28:29]
-; GFX7-NEXT: v_cndmask_b32_e32 v31, v17, v1, vcc
-; GFX7-NEXT: v_cndmask_b32_e64 v34, v32, v31, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e32 v31, v16, v0, vcc
-; GFX7-NEXT: v_cmp_class_f64_e64 vcc, v[0:1], 32
-; GFX7-NEXT: v_cndmask_b32_e64 v33, 0, v31, s[4:5]
-; GFX7-NEXT: v_cmp_o_f64_e64 s[4:5], v[2:3], v[18:19]
; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v33, v0, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v34, v1, vcc
-; GFX7-NEXT: v_cmp_class_f64_e64 vcc, v[16:17], 32
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v0, v16, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v1, v17, vcc
-; GFX7-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[33:34]
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v33, v0, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v34, v1, vcc
-; GFX7-NEXT: v_cmp_lt_f64_e32 vcc, v[2:3], v[18:19]
-; GFX7-NEXT: v_cndmask_b32_e32 v16, v19, v3, vcc
-; GFX7-NEXT: v_cndmask_b32_e64 v17, v32, v16, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e32 v16, v18, v2, vcc
-; GFX7-NEXT: v_cmp_class_f64_e64 vcc, v[2:3], 32
-; GFX7-NEXT: v_cndmask_b32_e64 v16, 0, v16, s[4:5]
-; GFX7-NEXT: v_cmp_lt_f64_e64 s[4:5], v[4:5], v[20:21]
-; GFX7-NEXT: v_cndmask_b32_e32 v2, v16, v2, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v3, v17, v3, vcc
-; GFX7-NEXT: v_cmp_class_f64_e64 vcc, v[18:19], 32
-; GFX7-NEXT: v_cndmask_b32_e32 v2, v2, v18, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v3, v3, v19, vcc
-; GFX7-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[16:17]
-; GFX7-NEXT: v_cndmask_b32_e64 v18, v21, v5, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e32 v2, v16, v2, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v3, v17, v3, vcc
-; GFX7-NEXT: v_cmp_class_f64_e64 vcc, v[4:5], 32
-; GFX7-NEXT: v_cndmask_b32_e64 v16, v20, v4, s[4:5]
-; GFX7-NEXT: v_cmp_class_f64_e64 s[4:5], v[20:21], 32
-; GFX7-NEXT: v_cndmask_b32_e64 v17, v32, v18, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v16, 0, v16, s[6:7]
-; GFX7-NEXT: v_cmp_o_f64_e64 s[6:7], v[6:7], v[22:23]
-; GFX7-NEXT: v_cndmask_b32_e32 v4, v16, v4, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v5, v17, v5, vcc
-; GFX7-NEXT: v_cndmask_b32_e64 v4, v4, v20, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v5, v5, v21, s[4:5]
-; GFX7-NEXT: v_cmp_eq_f64_e64 s[4:5], 0, v[16:17]
-; GFX7-NEXT: v_cmp_lt_f64_e32 vcc, v[6:7], v[22:23]
-; GFX7-NEXT: v_cndmask_b32_e64 v4, v16, v4, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v5, v17, v5, s[4:5]
-; GFX7-NEXT: v_cmp_lt_f64_e64 s[4:5], v[8:9], v[24:25]
-; GFX7-NEXT: v_cndmask_b32_e32 v18, v23, v7, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v16, v22, v6, vcc
-; GFX7-NEXT: v_cmp_class_f64_e64 vcc, v[6:7], 32
-; GFX7-NEXT: v_cndmask_b32_e64 v19, v32, v18, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v18, 0, v16, s[6:7]
-; GFX7-NEXT: v_cmp_class_f64_e64 s[6:7], v[22:23], 32
-; GFX7-NEXT: v_cndmask_b32_e64 v16, v25, v9, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v17, v32, v16, s[8:9]
-; GFX7-NEXT: v_cndmask_b32_e64 v16, v24, v8, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e32 v6, v18, v6, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v7, v19, v7, vcc
-; GFX7-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[18:19]
-; GFX7-NEXT: v_cmp_class_f64_e64 s[4:5], v[8:9], 32
-; GFX7-NEXT: v_cndmask_b32_e64 v6, v6, v22, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v7, v7, v23, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v16, 0, v16, s[8:9]
-; GFX7-NEXT: v_cmp_class_f64_e64 s[6:7], v[24:25], 32
-; GFX7-NEXT: v_cmp_o_f64_e64 s[8:9], v[10:11], v[26:27]
-; GFX7-NEXT: v_cndmask_b32_e32 v6, v18, v6, vcc
-; GFX7-NEXT: v_cndmask_b32_e64 v8, v16, v8, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e32 v7, v19, v7, vcc
-; GFX7-NEXT: v_cmp_lt_f64_e32 vcc, v[10:11], v[26:27]
-; GFX7-NEXT: v_cndmask_b32_e64 v9, v17, v9, s[4:5]
-; GFX7-NEXT: v_cmp_eq_f64_e64 s[4:5], 0, v[16:17]
-; GFX7-NEXT: v_cndmask_b32_e64 v8, v8, v24, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v9, v9, v25, s[6:7]
-; GFX7-NEXT: v_cmp_class_f64_e64 s[6:7], v[26:27], 32
-; GFX7-NEXT: v_cndmask_b32_e32 v18, v27, v11, vcc
-; GFX7-NEXT: v_cndmask_b32_e64 v19, v32, v18, s[8:9]
-; GFX7-NEXT: v_cndmask_b32_e64 v8, v16, v8, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e32 v16, v26, v10, vcc
-; GFX7-NEXT: v_cmp_class_f64_e64 vcc, v[10:11], 32
-; GFX7-NEXT: v_cndmask_b32_e64 v18, 0, v16, s[8:9]
-; GFX7-NEXT: v_cmp_eq_f64_e64 s[8:9], 0, v[18:19]
-; GFX7-NEXT: v_cndmask_b32_e64 v9, v17, v9, s[4:5]
+; GFX7-NEXT: v_min_f64 v[32:33], v[2:3], v[18:19]
+; GFX7-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[18:19]
+; GFX7-NEXT: v_min_f64 v[18:19], v[4:5], v[20:21]
+; GFX7-NEXT: v_cmp_u_f64_e64 s[4:5], v[4:5], v[20:21]
+; GFX7-NEXT: v_min_f64 v[2:3], v[0:1], v[16:17]
+; GFX7-NEXT: v_cmp_u_f64_e64 s[8:9], v[0:1], v[16:17]
+; GFX7-NEXT: v_mov_b32_e32 v34, 0x7ff80000
+; GFX7-NEXT: v_min_f64 v[20:21], v[6:7], v[22:23]
+; GFX7-NEXT: v_cmp_u_f64_e64 s[6:7], v[6:7], v[22:23]
+; GFX7-NEXT: v_min_f64 v[16:17], v[8:9], v[24:25]
+; GFX7-NEXT: v_cmp_u_f64_e64 s[10:11], v[8:9], v[24:25]
+; GFX7-NEXT: v_min_f64 v[22:23], v[10:11], v[26:27]
+; GFX7-NEXT: v_cmp_u_f64_e64 s[12:13], v[10:11], v[26:27]
+; GFX7-NEXT: v_min_f64 v[24:25], v[12:13], v[28:29]
+; GFX7-NEXT: v_cmp_u_f64_e64 s[14:15], v[12:13], v[28:29]
+; GFX7-NEXT: v_cndmask_b32_e64 v0, v2, 0, s[8:9]
+; GFX7-NEXT: v_cndmask_b32_e64 v1, v3, v34, s[8:9]
+; GFX7-NEXT: v_cndmask_b32_e64 v2, v32, 0, vcc
+; GFX7-NEXT: v_cndmask_b32_e32 v3, v33, v34, vcc
+; GFX7-NEXT: v_cndmask_b32_e64 v4, v18, 0, s[4:5]
+; GFX7-NEXT: v_cndmask_b32_e64 v5, v19, v34, s[4:5]
+; GFX7-NEXT: v_cndmask_b32_e64 v6, v20, 0, s[6:7]
+; GFX7-NEXT: v_cndmask_b32_e64 v7, v21, v34, s[6:7]
+; GFX7-NEXT: v_cndmask_b32_e64 v8, v16, 0, s[10:11]
+; GFX7-NEXT: v_cndmask_b32_e64 v9, v17, v34, s[10:11]
+; GFX7-NEXT: v_cndmask_b32_e64 v10, v22, 0, s[12:13]
+; GFX7-NEXT: v_cndmask_b32_e64 v11, v23, v34, s[12:13]
+; GFX7-NEXT: v_cndmask_b32_e64 v12, v24, 0, s[14:15]
+; GFX7-NEXT: v_cndmask_b32_e64 v13, v25, v34, s[14:15]
; GFX7-NEXT: s_waitcnt vmcnt(0)
-; GFX7-NEXT: v_cmp_o_f64_e64 s[4:5], v[14:15], v[30:31]
-; GFX7-NEXT: v_cndmask_b32_e64 v16, v29, v13, s[10:11]
-; GFX7-NEXT: v_cndmask_b32_e64 v17, v32, v16, s[12:13]
-; GFX7-NEXT: v_cndmask_b32_e64 v16, v28, v12, s[10:11]
-; GFX7-NEXT: v_cndmask_b32_e32 v10, v18, v10, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v11, v19, v11, vcc
-; GFX7-NEXT: v_cmp_lt_f64_e32 vcc, v[14:15], v[30:31]
-; GFX7-NEXT: v_cndmask_b32_e64 v10, v10, v26, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v11, v11, v27, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v10, v18, v10, s[8:9]
-; GFX7-NEXT: v_cndmask_b32_e64 v11, v19, v11, s[8:9]
-; GFX7-NEXT: v_cmp_class_f64_e64 s[6:7], v[12:13], 32
-; GFX7-NEXT: v_cndmask_b32_e64 v16, 0, v16, s[12:13]
-; GFX7-NEXT: v_cmp_class_f64_e64 s[8:9], v[28:29], 32
-; GFX7-NEXT: v_cndmask_b32_e32 v18, v31, v15, vcc
-; GFX7-NEXT: v_cndmask_b32_e64 v19, v32, v18, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e32 v18, v30, v14, vcc
-; GFX7-NEXT: v_cmp_class_f64_e64 vcc, v[14:15], 32
-; GFX7-NEXT: v_cndmask_b32_e64 v18, 0, v18, s[4:5]
-; GFX7-NEXT: v_cmp_class_f64_e64 s[4:5], v[30:31], 32
-; GFX7-NEXT: v_cmp_eq_f64_e64 s[10:11], 0, v[16:17]
-; GFX7-NEXT: v_cmp_eq_f64_e64 s[12:13], 0, v[18:19]
-; GFX7-NEXT: v_cndmask_b32_e64 v12, v16, v12, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v13, v17, v13, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v12, v12, v28, s[8:9]
-; GFX7-NEXT: v_cndmask_b32_e32 v14, v18, v14, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v15, v19, v15, vcc
-; GFX7-NEXT: v_cndmask_b32_e64 v14, v14, v30, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v13, v13, v29, s[8:9]
-; GFX7-NEXT: v_cndmask_b32_e64 v15, v15, v31, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v12, v16, v12, s[10:11]
-; GFX7-NEXT: v_cndmask_b32_e64 v13, v17, v13, s[10:11]
-; GFX7-NEXT: v_cndmask_b32_e64 v14, v18, v14, s[12:13]
-; GFX7-NEXT: v_cndmask_b32_e64 v15, v19, v15, s[12:13]
+; GFX7-NEXT: v_min_f64 v[18:19], v[14:15], v[30:31]
+; GFX7-NEXT: v_cmp_u_f64_e32 vcc, v[14:15], v[30:31]
+; GFX7-NEXT: v_cndmask_b32_e64 v14, v18, 0, vcc
+; GFX7-NEXT: v_cndmask_b32_e32 v15, v19, v34, vcc
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_minimum_v8f64:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[16:17]
-; GFX8-NEXT: v_cmp_o_f64_e64 s[4:5], v[0:1], v[16:17]
-; GFX8-NEXT: v_mov_b32_e32 v32, 0x7ff80000
-; GFX8-NEXT: v_cmp_o_f64_e64 s[6:7], v[4:5], v[20:21]
-; GFX8-NEXT: v_cmp_o_f64_e64 s[8:9], v[8:9], v[24:25]
-; GFX8-NEXT: v_cmp_lt_f64_e64 s[10:11], v[12:13], v[28:29]
-; GFX8-NEXT: v_cmp_o_f64_e64 s[12:13], v[12:13], v[28:29]
-; GFX8-NEXT: v_cndmask_b32_e32 v31, v17, v1, vcc
-; GFX8-NEXT: v_cndmask_b32_e64 v34, v32, v31, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e32 v31, v16, v0, vcc
-; GFX8-NEXT: v_cmp_class_f64_e64 vcc, v[0:1], 32
-; GFX8-NEXT: v_cndmask_b32_e64 v33, 0, v31, s[4:5]
-; GFX8-NEXT: v_cmp_o_f64_e64 s[4:5], v[2:3], v[18:19]
; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v33, v0, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v34, v1, vcc
-; GFX8-NEXT: v_cmp_class_f64_e64 vcc, v[16:17], 32
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v16, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v17, vcc
-; GFX8-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[33:34]
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v33, v0, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v34, v1, vcc
-; GFX8-NEXT: v_cmp_lt_f64_e32 vcc, v[2:3], v[18:19]
-; GFX8-NEXT: v_cndmask_b32_e32 v16, v19, v3, vcc
-; GFX8-NEXT: v_cndmask_b32_e64 v17, v32, v16, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e32 v16, v18, v2, vcc
-; GFX8-NEXT: v_cmp_class_f64_e64 vcc, v[2:3], 32
-; GFX8-NEXT: v_cndmask_b32_e64 v16, 0, v16, s[4:5]
-; GFX8-NEXT: v_cmp_lt_f64_e64 s[4:5], v[4:5], v[20:21]
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v16, v2, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v3, v17, v3, vcc
-; GFX8-NEXT: v_cmp_class_f64_e64 vcc, v[18:19], 32
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v18, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v3, v3, v19, vcc
-; GFX8-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[16:17]
-; GFX8-NEXT: v_cndmask_b32_e64 v18, v21, v5, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v16, v2, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v3, v17, v3, vcc
-; GFX8-NEXT: v_cmp_class_f64_e64 vcc, v[4:5], 32
-; GFX8-NEXT: v_cndmask_b32_e64 v16, v20, v4, s[4:5]
-; GFX8-NEXT: v_cmp_class_f64_e64 s[4:5], v[20:21], 32
-; GFX8-NEXT: v_cndmask_b32_e64 v17, v32, v18, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v16, 0, v16, s[6:7]
-; GFX8-NEXT: v_cmp_o_f64_e64 s[6:7], v[6:7], v[22:23]
-; GFX8-NEXT: v_cndmask_b32_e32 v4, v16, v4, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v5, v17, v5, vcc
-; GFX8-NEXT: v_cndmask_b32_e64 v4, v4, v20, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v5, v5, v21, s[4:5]
-; GFX8-NEXT: v_cmp_eq_f64_e64 s[4:5], 0, v[16:17]
-; GFX8-NEXT: v_cmp_lt_f64_e32 vcc, v[6:7], v[22:23]
-; GFX8-NEXT: v_cndmask_b32_e64 v4, v16, v4, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v5, v17, v5, s[4:5]
-; GFX8-NEXT: v_cmp_lt_f64_e64 s[4:5], v[8:9], v[24:25]
-; GFX8-NEXT: v_cndmask_b32_e32 v18, v23, v7, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v16, v22, v6, vcc
-; GFX8-NEXT: v_cmp_class_f64_e64 vcc, v[6:7], 32
-; GFX8-NEXT: v_cndmask_b32_e64 v19, v32, v18, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v18, 0, v16, s[6:7]
-; GFX8-NEXT: v_cmp_class_f64_e64 s[6:7], v[22:23], 32
-; GFX8-NEXT: v_cndmask_b32_e64 v16, v25, v9, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v17, v32, v16, s[8:9]
-; GFX8-NEXT: v_cndmask_b32_e64 v16, v24, v8, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e32 v6, v18, v6, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v7, v19, v7, vcc
-; GFX8-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[18:19]
-; GFX8-NEXT: v_cmp_class_f64_e64 s[4:5], v[8:9], 32
-; GFX8-NEXT: v_cndmask_b32_e64 v6, v6, v22, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v7, v7, v23, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v16, 0, v16, s[8:9]
-; GFX8-NEXT: v_cmp_class_f64_e64 s[6:7], v[24:25], 32
-; GFX8-NEXT: v_cmp_o_f64_e64 s[8:9], v[10:11], v[26:27]
-; GFX8-NEXT: v_cndmask_b32_e32 v6, v18, v6, vcc
-; GFX8-NEXT: v_cndmask_b32_e64 v8, v16, v8, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e32 v7, v19, v7, vcc
-; GFX8-NEXT: v_cmp_lt_f64_e32 vcc, v[10:11], v[26:27]
-; GFX8-NEXT: v_cndmask_b32_e64 v9, v17, v9, s[4:5]
-; GFX8-NEXT: v_cmp_eq_f64_e64 s[4:5], 0, v[16:17]
-; GFX8-NEXT: v_cndmask_b32_e64 v8, v8, v24, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v9, v9, v25, s[6:7]
-; GFX8-NEXT: v_cmp_class_f64_e64 s[6:7], v[26:27], 32
-; GFX8-NEXT: v_cndmask_b32_e32 v18, v27, v11, vcc
-; GFX8-NEXT: v_cndmask_b32_e64 v19, v32, v18, s[8:9]
-; GFX8-NEXT: v_cndmask_b32_e64 v8, v16, v8, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e32 v16, v26, v10, vcc
-; GFX8-NEXT: v_cmp_class_f64_e64 vcc, v[10:11], 32
-; GFX8-NEXT: v_cndmask_b32_e64 v18, 0, v16, s[8:9]
-; GFX8-NEXT: v_cmp_eq_f64_e64 s[8:9], 0, v[18:19]
-; GFX8-NEXT: v_cndmask_b32_e64 v9, v17, v9, s[4:5]
+; GFX8-NEXT: v_min_f64 v[32:33], v[2:3], v[18:19]
+; GFX8-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[18:19]
+; GFX8-NEXT: v_min_f64 v[18:19], v[4:5], v[20:21]
+; GFX8-NEXT: v_cmp_u_f64_e64 s[4:5], v[4:5], v[20:21]
+; GFX8-NEXT: v_min_f64 v[2:3], v[0:1], v[16:17]
+; GFX8-NEXT: v_cmp_u_f64_e64 s[8:9], v[0:1], v[16:17]
+; GFX8-NEXT: v_mov_b32_e32 v34, 0x7ff80000
+; GFX8-NEXT: v_min_f64 v[20:21], v[6:7], v[22:23]
+; GFX8-NEXT: v_cmp_u_f64_e64 s[6:7], v[6:7], v[22:23]
+; GFX8-NEXT: v_min_f64 v[16:17], v[8:9], v[24:25]
+; GFX8-NEXT: v_cmp_u_f64_e64 s[10:11], v[8:9], v[24:25]
+; GFX8-NEXT: v_min_f64 v[22:23], v[10:11], v[26:27]
+; GFX8-NEXT: v_cmp_u_f64_e64 s[12:13], v[10:11], v[26:27]
+; GFX8-NEXT: v_min_f64 v[24:25], v[12:13], v[28:29]
+; GFX8-NEXT: v_cmp_u_f64_e64 s[14:15], v[12:13], v[28:29]
+; GFX8-NEXT: v_cndmask_b32_e64 v0, v2, 0, s[8:9]
+; GFX8-NEXT: v_cndmask_b32_e64 v1, v3, v34, s[8:9]
+; GFX8-NEXT: v_cndmask_b32_e64 v2, v32, 0, vcc
+; GFX8-NEXT: v_cndmask_b32_e32 v3, v33, v34, vcc
+; GFX8-NEXT: v_cndmask_b32_e64 v4, v18, 0, s[4:5]
+; GFX8-NEXT: v_cndmask_b32_e64 v5, v19, v34, s[4:5]
+; GFX8-NEXT: v_cndmask_b32_e64 v6, v20, 0, s[6:7]
+; GFX8-NEXT: v_cndmask_b32_e64 v7, v21, v34, s[6:7]
+; GFX8-NEXT: v_cndmask_b32_e64 v8, v16, 0, s[10:11]
+; GFX8-NEXT: v_cndmask_b32_e64 v9, v17, v34, s[10:11]
+; GFX8-NEXT: v_cndmask_b32_e64 v10, v22, 0, s[12:13]
+; GFX8-NEXT: v_cndmask_b32_e64 v11, v23, v34, s[12:13]
+; GFX8-NEXT: v_cndmask_b32_e64 v12, v24, 0, s[14:15]
+; GFX8-NEXT: v_cndmask_b32_e64 v13, v25, v34, s[14:15]
; GFX8-NEXT: s_waitcnt vmcnt(0)
-; GFX8-NEXT: v_cmp_o_f64_e64 s[4:5], v[14:15], v[30:31]
-; GFX8-NEXT: v_cndmask_b32_e64 v16, v29, v13, s[10:11]
-; GFX8-NEXT: v_cndmask_b32_e64 v17, v32, v16, s[12:13]
-; GFX8-NEXT: v_cndmask_b32_e64 v16, v28, v12, s[10:11]
-; GFX8-NEXT: v_cndmask_b32_e32 v10, v18, v10, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v11, v19, v11, vcc
-; GFX8-NEXT: v_cmp_lt_f64_e32 vcc, v[14:15], v[30:31]
-; GFX8-NEXT: v_cndmask_b32_e64 v10, v10, v26, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v11, v11, v27, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v10, v18, v10, s[8:9]
-; GFX8-NEXT: v_cndmask_b32_e64 v11, v19, v11, s[8:9]
-; GFX8-NEXT: v_cmp_class_f64_e64 s[6:7], v[12:13], 32
-; GFX8-NEXT: v_cndmask_b32_e64 v16, 0, v16, s[12:13]
-; GFX8-NEXT: v_cmp_class_f64_e64 s[8:9], v[28:29], 32
-; GFX8-NEXT: v_cndmask_b32_e32 v18, v31, v15, vcc
-; GFX8-NEXT: v_cndmask_b32_e64 v19, v32, v18, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e32 v18, v30, v14, vcc
-; GFX8-NEXT: v_cmp_class_f64_e64 vcc, v[14:15], 32
-; GFX8-NEXT: v_cndmask_b32_e64 v18, 0, v18, s[4:5]
-; GFX8-NEXT: v_cmp_class_f64_e64 s[4:5], v[30:31], 32
-; GFX8-NEXT: v_cmp_eq_f64_e64 s[10:11], 0, v[16:17]
-; GFX8-NEXT: v_cmp_eq_f64_e64 s[12:13], 0, v[18:19]
-; GFX8-NEXT: v_cndmask_b32_e64 v12, v16, v12, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v13, v17, v13, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v12, v12, v28, s[8:9]
-; GFX8-NEXT: v_cndmask_b32_e32 v14, v18, v14, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v15, v19, v15, vcc
-; GFX8-NEXT: v_cndmask_b32_e64 v14, v14, v30, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v13, v13, v29, s[8:9]
-; GFX8-NEXT: v_cndmask_b32_e64 v15, v15, v31, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v12, v16, v12, s[10:11]
-; GFX8-NEXT: v_cndmask_b32_e64 v13, v17, v13, s[10:11]
-; GFX8-NEXT: v_cndmask_b32_e64 v14, v18, v14, s[12:13]
-; GFX8-NEXT: v_cndmask_b32_e64 v15, v19, v15, s[12:13]
+; GFX8-NEXT: v_min_f64 v[18:19], v[14:15], v[30:31]
+; GFX8-NEXT: v_cmp_u_f64_e32 vcc, v[14:15], v[30:31]
+; GFX8-NEXT: v_cndmask_b32_e64 v14, v18, 0, vcc
+; GFX8-NEXT: v_cndmask_b32_e32 v15, v19, v34, vcc
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_minimum_v8f64:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[16:17]
-; GFX9-NEXT: v_cmp_o_f64_e64 s[4:5], v[0:1], v[16:17]
-; GFX9-NEXT: v_mov_b32_e32 v32, 0x7ff80000
-; GFX9-NEXT: v_cmp_o_f64_e64 s[6:7], v[4:5], v[20:21]
-; GFX9-NEXT: v_cmp_o_f64_e64 s[8:9], v[8:9], v[24:25]
-; GFX9-NEXT: v_cmp_lt_f64_e64 s[10:11], v[12:13], v[28:29]
-; GFX9-NEXT: v_cmp_o_f64_e64 s[12:13], v[12:13], v[28:29]
-; GFX9-NEXT: v_cndmask_b32_e32 v31, v17, v1, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v34, v32, v31, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e32 v31, v16, v0, vcc
-; GFX9-NEXT: v_cmp_class_f64_e64 vcc, v[0:1], 32
-; GFX9-NEXT: v_cndmask_b32_e64 v33, 0, v31, s[4:5]
-; GFX9-NEXT: v_cmp_o_f64_e64 s[4:5], v[2:3], v[18:19]
; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v33, v0, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v34, v1, vcc
-; GFX9-NEXT: v_cmp_class_f64_e64 vcc, v[16:17], 32
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v16, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v17, vcc
-; GFX9-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[33:34]
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v33, v0, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v34, v1, vcc
-; GFX9-NEXT: v_cmp_lt_f64_e32 vcc, v[2:3], v[18:19]
-; GFX9-NEXT: v_cndmask_b32_e32 v16, v19, v3, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v17, v32, v16, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e32 v16, v18, v2, vcc
-; GFX9-NEXT: v_cmp_class_f64_e64 vcc, v[2:3], 32
-; GFX9-NEXT: v_cndmask_b32_e64 v16, 0, v16, s[4:5]
-; GFX9-NEXT: v_cmp_lt_f64_e64 s[4:5], v[4:5], v[20:21]
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v16, v2, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v17, v3, vcc
-; GFX9-NEXT: v_cmp_class_f64_e64 vcc, v[18:19], 32
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v18, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v3, v19, vcc
-; GFX9-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[16:17]
-; GFX9-NEXT: v_cndmask_b32_e64 v18, v21, v5, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v16, v2, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v17, v3, vcc
-; GFX9-NEXT: v_cmp_class_f64_e64 vcc, v[4:5], 32
-; GFX9-NEXT: v_cndmask_b32_e64 v16, v20, v4, s[4:5]
-; GFX9-NEXT: v_cmp_class_f64_e64 s[4:5], v[20:21], 32
-; GFX9-NEXT: v_cndmask_b32_e64 v17, v32, v18, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v16, 0, v16, s[6:7]
-; GFX9-NEXT: v_cmp_o_f64_e64 s[6:7], v[6:7], v[22:23]
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v16, v4, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v5, v17, v5, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v4, v4, v20, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v5, v5, v21, s[4:5]
-; GFX9-NEXT: v_cmp_eq_f64_e64 s[4:5], 0, v[16:17]
-; GFX9-NEXT: v_cmp_lt_f64_e32 vcc, v[6:7], v[22:23]
-; GFX9-NEXT: v_cndmask_b32_e64 v4, v16, v4, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v5, v17, v5, s[4:5]
-; GFX9-NEXT: v_cmp_lt_f64_e64 s[4:5], v[8:9], v[24:25]
-; GFX9-NEXT: v_cndmask_b32_e32 v18, v23, v7, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v16, v22, v6, vcc
-; GFX9-NEXT: v_cmp_class_f64_e64 vcc, v[6:7], 32
-; GFX9-NEXT: v_cndmask_b32_e64 v19, v32, v18, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v18, 0, v16, s[6:7]
-; GFX9-NEXT: v_cmp_class_f64_e64 s[6:7], v[22:23], 32
-; GFX9-NEXT: v_cndmask_b32_e64 v16, v25, v9, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v17, v32, v16, s[8:9]
-; GFX9-NEXT: v_cndmask_b32_e64 v16, v24, v8, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e32 v6, v18, v6, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v7, v19, v7, vcc
-; GFX9-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[18:19]
-; GFX9-NEXT: v_cmp_class_f64_e64 s[4:5], v[8:9], 32
-; GFX9-NEXT: v_cndmask_b32_e64 v6, v6, v22, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v7, v7, v23, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v16, 0, v16, s[8:9]
-; GFX9-NEXT: v_cmp_class_f64_e64 s[6:7], v[24:25], 32
-; GFX9-NEXT: v_cmp_o_f64_e64 s[8:9], v[10:11], v[26:27]
-; GFX9-NEXT: v_cndmask_b32_e32 v6, v18, v6, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v8, v16, v8, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e32 v7, v19, v7, vcc
-; GFX9-NEXT: v_cmp_lt_f64_e32 vcc, v[10:11], v[26:27]
-; GFX9-NEXT: v_cndmask_b32_e64 v9, v17, v9, s[4:5]
-; GFX9-NEXT: v_cmp_eq_f64_e64 s[4:5], 0, v[16:17]
-; GFX9-NEXT: v_cndmask_b32_e64 v8, v8, v24, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v9, v9, v25, s[6:7]
-; GFX9-NEXT: v_cmp_class_f64_e64 s[6:7], v[26:27], 32
-; GFX9-NEXT: v_cndmask_b32_e32 v18, v27, v11, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v19, v32, v18, s[8:9]
-; GFX9-NEXT: v_cndmask_b32_e64 v8, v16, v8, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e32 v16, v26, v10, vcc
-; GFX9-NEXT: v_cmp_class_f64_e64 vcc, v[10:11], 32
-; GFX9-NEXT: v_cndmask_b32_e64 v18, 0, v16, s[8:9]
-; GFX9-NEXT: v_cmp_eq_f64_e64 s[8:9], 0, v[18:19]
-; GFX9-NEXT: v_cndmask_b32_e64 v9, v17, v9, s[4:5]
+; GFX9-NEXT: v_min_f64 v[32:33], v[2:3], v[18:19]
+; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[18:19]
+; GFX9-NEXT: v_min_f64 v[18:19], v[4:5], v[20:21]
+; GFX9-NEXT: v_cmp_u_f64_e64 s[4:5], v[4:5], v[20:21]
+; GFX9-NEXT: v_min_f64 v[2:3], v[0:1], v[16:17]
+; GFX9-NEXT: v_cmp_u_f64_e64 s[8:9], v[0:1], v[16:17]
+; GFX9-NEXT: v_mov_b32_e32 v34, 0x7ff80000
+; GFX9-NEXT: v_min_f64 v[20:21], v[6:7], v[22:23]
+; GFX9-NEXT: v_cmp_u_f64_e64 s[6:7], v[6:7], v[22:23]
+; GFX9-NEXT: v_min_f64 v[16:17], v[8:9], v[24:25]
+; GFX9-NEXT: v_cmp_u_f64_e64 s[10:11], v[8:9], v[24:25]
+; GFX9-NEXT: v_min_f64 v[22:23], v[10:11], v[26:27]
+; GFX9-NEXT: v_cmp_u_f64_e64 s[12:13], v[10:11], v[26:27]
+; GFX9-NEXT: v_min_f64 v[24:25], v[12:13], v[28:29]
+; GFX9-NEXT: v_cmp_u_f64_e64 s[14:15], v[12:13], v[28:29]
+; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, s[8:9]
+; GFX9-NEXT: v_cndmask_b32_e64 v1, v3, v34, s[8:9]
+; GFX9-NEXT: v_cndmask_b32_e64 v2, v32, 0, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v3, v33, v34, vcc
+; GFX9-NEXT: v_cndmask_b32_e64 v4, v18, 0, s[4:5]
+; GFX9-NEXT: v_cndmask_b32_e64 v5, v19, v34, s[4:5]
+; GFX9-NEXT: v_cndmask_b32_e64 v6, v20, 0, s[6:7]
+; GFX9-NEXT: v_cndmask_b32_e64 v7, v21, v34, s[6:7]
+; GFX9-NEXT: v_cndmask_b32_e64 v8, v16, 0, s[10:11]
+; GFX9-NEXT: v_cndmask_b32_e64 v9, v17, v34, s[10:11]
+; GFX9-NEXT: v_cndmask_b32_e64 v10, v22, 0, s[12:13]
+; GFX9-NEXT: v_cndmask_b32_e64 v11, v23, v34, s[12:13]
+; GFX9-NEXT: v_cndmask_b32_e64 v12, v24, 0, s[14:15]
+; GFX9-NEXT: v_cndmask_b32_e64 v13, v25, v34, s[14:15]
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: v_cmp_o_f64_e64 s[4:5], v[14:15], v[30:31]
-; GFX9-NEXT: v_cndmask_b32_e64 v16, v29, v13, s[10:11]
-; GFX9-NEXT: v_cndmask_b32_e64 v17, v32, v16, s[12:13]
-; GFX9-NEXT: v_cndmask_b32_e64 v16, v28, v12, s[10:11]
-; GFX9-NEXT: v_cndmask_b32_e32 v10, v18, v10, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v11, v19, v11, vcc
-; GFX9-NEXT: v_cmp_lt_f64_e32 vcc, v[14:15], v[30:31]
-; GFX9-NEXT: v_cndmask_b32_e64 v10, v10, v26, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v11, v11, v27, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v10, v18, v10, s[8:9]
-; GFX9-NEXT: v_cndmask_b32_e64 v11, v19, v11, s[8:9]
-; GFX9-NEXT: v_cmp_class_f64_e64 s[6:7], v[12:13], 32
-; GFX9-NEXT: v_cndmask_b32_e64 v16, 0, v16, s[12:13]
-; GFX9-NEXT: v_cmp_class_f64_e64 s[8:9], v[28:29], 32
-; GFX9-NEXT: v_cndmask_b32_e32 v18, v31, v15, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v19, v32, v18, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e32 v18, v30, v14, vcc
-; GFX9-NEXT: v_cmp_class_f64_e64 vcc, v[14:15], 32
-; GFX9-NEXT: v_cndmask_b32_e64 v18, 0, v18, s[4:5]
-; GFX9-NEXT: v_cmp_class_f64_e64 s[4:5], v[30:31], 32
-; GFX9-NEXT: v_cmp_eq_f64_e64 s[10:11], 0, v[16:17]
-; GFX9-NEXT: v_cmp_eq_f64_e64 s[12:13], 0, v[18:19]
-; GFX9-NEXT: v_cndmask_b32_e64 v12, v16, v12, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v13, v17, v13, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v12, v12, v28, s[8:9]
-; GFX9-NEXT: v_cndmask_b32_e32 v14, v18, v14, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v15, v19, v15, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v14, v14, v30, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v13, v13, v29, s[8:9]
-; GFX9-NEXT: v_cndmask_b32_e64 v15, v15, v31, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v12, v16, v12, s[10:11]
-; GFX9-NEXT: v_cndmask_b32_e64 v13, v17, v13, s[10:11]
-; GFX9-NEXT: v_cndmask_b32_e64 v14, v18, v14, s[12:13]
-; GFX9-NEXT: v_cndmask_b32_e64 v15, v19, v15, s[12:13]
+; GFX9-NEXT: v_min_f64 v[18:19], v[14:15], v[30:31]
+; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[14:15], v[30:31]
+; GFX9-NEXT: v_cndmask_b32_e64 v14, v18, 0, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v15, v19, v34, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX940-LABEL: v_minimum_v8f64:
; GFX940: ; %bb.0:
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX940-NEXT: scratch_load_dword v31, off, s32
-; GFX940-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[16:17]
-; GFX940-NEXT: v_mov_b32_e32 v32, 0x7ff80000
-; GFX940-NEXT: v_cmp_o_f64_e64 s[0:1], v[0:1], v[16:17]
-; GFX940-NEXT: v_cndmask_b32_e32 v33, v17, v1, vcc
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e64 v35, v32, v33, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v33, v16, v0, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v34, 0, v33, s[0:1]
-; GFX940-NEXT: v_cmp_class_f64_e64 vcc, v[0:1], 32
-; GFX940-NEXT: v_cmp_class_f64_e64 s[0:1], v[16:17], 32
-; GFX940-NEXT: v_cmp_eq_f64_e64 s[2:3], 0, v[34:35]
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v34, v0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v35, v1, vcc
-; GFX940-NEXT: v_cmp_lt_f64_e32 vcc, v[2:3], v[18:19]
-; GFX940-NEXT: v_cndmask_b32_e64 v0, v0, v16, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v1, v1, v17, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v19, v3, vcc
-; GFX940-NEXT: v_cmp_o_f64_e64 s[0:1], v[2:3], v[18:19]
-; GFX940-NEXT: v_cndmask_b32_e64 v0, v34, v0, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v1, v35, v1, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v17, v32, v16, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v18, v2, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v16, 0, v16, s[0:1]
-; GFX940-NEXT: v_cmp_class_f64_e64 vcc, v[2:3], 32
-; GFX940-NEXT: v_cmp_class_f64_e64 s[0:1], v[18:19], 32
-; GFX940-NEXT: v_cmp_eq_f64_e64 s[2:3], 0, v[16:17]
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v16, v2, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v2, v2, v18, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v17, v3, vcc
-; GFX940-NEXT: v_cmp_lt_f64_e32 vcc, v[4:5], v[20:21]
-; GFX940-NEXT: v_cndmask_b32_e64 v2, v16, v2, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v3, v3, v19, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v21, v5, vcc
-; GFX940-NEXT: v_cmp_o_f64_e64 s[0:1], v[4:5], v[20:21]
-; GFX940-NEXT: v_cndmask_b32_e64 v3, v17, v3, s[2:3]
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e64 v17, v32, v16, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v20, v4, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v16, 0, v16, s[0:1]
-; GFX940-NEXT: v_cmp_class_f64_e64 vcc, v[4:5], 32
-; GFX940-NEXT: v_cmp_class_f64_e64 s[0:1], v[20:21], 32
-; GFX940-NEXT: v_cmp_eq_f64_e64 s[2:3], 0, v[16:17]
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v16, v4, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v4, v4, v20, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v5, v17, v5, vcc
-; GFX940-NEXT: v_cmp_lt_f64_e32 vcc, v[6:7], v[22:23]
-; GFX940-NEXT: v_cndmask_b32_e64 v4, v16, v4, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v5, v5, v21, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v23, v7, vcc
-; GFX940-NEXT: v_cmp_o_f64_e64 s[0:1], v[6:7], v[22:23]
-; GFX940-NEXT: v_cndmask_b32_e64 v5, v17, v5, s[2:3]
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e64 v17, v32, v16, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v22, v6, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v16, 0, v16, s[0:1]
-; GFX940-NEXT: v_cmp_class_f64_e64 vcc, v[6:7], 32
-; GFX940-NEXT: v_cmp_class_f64_e64 s[0:1], v[22:23], 32
-; GFX940-NEXT: v_cmp_eq_f64_e64 s[2:3], 0, v[16:17]
-; GFX940-NEXT: v_cndmask_b32_e32 v6, v16, v6, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v6, v6, v22, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v7, v17, v7, vcc
-; GFX940-NEXT: v_cmp_lt_f64_e32 vcc, v[8:9], v[24:25]
-; GFX940-NEXT: v_cndmask_b32_e64 v6, v16, v6, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v7, v7, v23, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v25, v9, vcc
-; GFX940-NEXT: v_cmp_o_f64_e64 s[0:1], v[8:9], v[24:25]
-; GFX940-NEXT: v_cndmask_b32_e64 v7, v17, v7, s[2:3]
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e64 v17, v32, v16, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v24, v8, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v16, 0, v16, s[0:1]
-; GFX940-NEXT: v_cmp_class_f64_e64 vcc, v[8:9], 32
-; GFX940-NEXT: v_cmp_class_f64_e64 s[0:1], v[24:25], 32
-; GFX940-NEXT: v_cmp_eq_f64_e64 s[2:3], 0, v[16:17]
-; GFX940-NEXT: v_cndmask_b32_e32 v8, v16, v8, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v8, v8, v24, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v9, v17, v9, vcc
-; GFX940-NEXT: v_cmp_lt_f64_e32 vcc, v[10:11], v[26:27]
-; GFX940-NEXT: v_cndmask_b32_e64 v8, v16, v8, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v9, v9, v25, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v27, v11, vcc
-; GFX940-NEXT: v_cmp_o_f64_e64 s[0:1], v[10:11], v[26:27]
-; GFX940-NEXT: v_cndmask_b32_e64 v9, v17, v9, s[2:3]
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e64 v17, v32, v16, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v26, v10, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v16, 0, v16, s[0:1]
-; GFX940-NEXT: v_cmp_class_f64_e64 vcc, v[10:11], 32
-; GFX940-NEXT: v_cmp_class_f64_e64 s[0:1], v[26:27], 32
-; GFX940-NEXT: v_cmp_eq_f64_e64 s[2:3], 0, v[16:17]
-; GFX940-NEXT: v_cndmask_b32_e32 v10, v16, v10, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v10, v10, v26, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v11, v17, v11, vcc
-; GFX940-NEXT: v_cmp_lt_f64_e32 vcc, v[12:13], v[28:29]
-; GFX940-NEXT: v_cndmask_b32_e64 v10, v16, v10, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v11, v11, v27, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v29, v13, vcc
-; GFX940-NEXT: v_cmp_o_f64_e64 s[0:1], v[12:13], v[28:29]
-; GFX940-NEXT: v_cndmask_b32_e64 v11, v17, v11, s[2:3]
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e64 v17, v32, v16, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v28, v12, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v16, 0, v16, s[0:1]
-; GFX940-NEXT: v_cmp_class_f64_e64 vcc, v[12:13], 32
-; GFX940-NEXT: v_cmp_class_f64_e64 s[0:1], v[28:29], 32
-; GFX940-NEXT: v_cmp_eq_f64_e64 s[2:3], 0, v[16:17]
-; GFX940-NEXT: v_cndmask_b32_e32 v12, v16, v12, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v12, v12, v28, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v13, v17, v13, vcc
+; GFX940-NEXT: v_mov_b32_e32 v54, 0x7ff80000
+; GFX940-NEXT: v_min_f64 v[32:33], v[0:1], v[16:17]
+; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[16:17]
+; GFX940-NEXT: v_min_f64 v[34:35], v[2:3], v[18:19]
+; GFX940-NEXT: v_min_f64 v[36:37], v[4:5], v[20:21]
+; GFX940-NEXT: v_cndmask_b32_e64 v0, v32, 0, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v1, v33, v54, vcc
+; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[18:19]
+; GFX940-NEXT: v_min_f64 v[38:39], v[6:7], v[22:23]
+; GFX940-NEXT: v_min_f64 v[48:49], v[8:9], v[24:25]
+; GFX940-NEXT: v_cndmask_b32_e64 v2, v34, 0, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v3, v35, v54, vcc
+; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[4:5], v[20:21]
+; GFX940-NEXT: v_min_f64 v[50:51], v[10:11], v[26:27]
+; GFX940-NEXT: v_min_f64 v[52:53], v[12:13], v[28:29]
+; GFX940-NEXT: v_cndmask_b32_e64 v4, v36, 0, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v5, v37, v54, vcc
+; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[6:7], v[22:23]
; GFX940-NEXT: s_waitcnt vmcnt(0)
-; GFX940-NEXT: v_cmp_lt_f64_e32 vcc, v[14:15], v[30:31]
-; GFX940-NEXT: v_cndmask_b32_e64 v12, v16, v12, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v13, v13, v29, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v31, v15, vcc
-; GFX940-NEXT: v_cmp_o_f64_e64 s[0:1], v[14:15], v[30:31]
-; GFX940-NEXT: v_cndmask_b32_e64 v13, v17, v13, s[2:3]
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e64 v17, v32, v16, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v30, v14, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v16, 0, v16, s[0:1]
-; GFX940-NEXT: v_cmp_class_f64_e64 vcc, v[14:15], 32
-; GFX940-NEXT: v_cmp_class_f64_e64 s[0:1], v[30:31], 32
-; GFX940-NEXT: v_cmp_eq_f64_e64 s[2:3], 0, v[16:17]
-; GFX940-NEXT: v_cndmask_b32_e32 v14, v16, v14, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v15, v17, v15, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v14, v14, v30, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v15, v15, v31, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v14, v16, v14, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v15, v17, v15, s[2:3]
+; GFX940-NEXT: v_min_f64 v[16:17], v[14:15], v[30:31]
+; GFX940-NEXT: v_cndmask_b32_e64 v6, v38, 0, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v7, v39, v54, vcc
+; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[8:9], v[24:25]
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e64 v8, v48, 0, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v9, v49, v54, vcc
+; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[10:11], v[26:27]
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e64 v10, v50, 0, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v11, v51, v54, vcc
+; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[12:13], v[28:29]
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e64 v12, v52, 0, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v13, v53, v54, vcc
+; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[14:15], v[30:31]
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e64 v14, v16, 0, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v15, v17, v54, vcc
; GFX940-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimum_v8f64:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: buffer_load_dword v31, off, s[0:3], s32
-; GFX10-NEXT: v_cmp_lt_f64_e32 vcc_lo, v[0:1], v[16:17]
-; GFX10-NEXT: v_cmp_o_f64_e64 s4, v[0:1], v[16:17]
-; GFX10-NEXT: v_cmp_lt_f64_e64 s9, v[6:7], v[22:23]
-; GFX10-NEXT: v_cmp_lt_f64_e64 s10, v[8:9], v[24:25]
-; GFX10-NEXT: v_cmp_lt_f64_e64 s11, v[10:11], v[26:27]
-; GFX10-NEXT: v_cmp_lt_f64_e64 s12, v[12:13], v[28:29]
-; GFX10-NEXT: v_cmp_o_f64_e64 s13, v[6:7], v[22:23]
-; GFX10-NEXT: v_cmp_o_f64_e64 s14, v[8:9], v[24:25]
-; GFX10-NEXT: v_cmp_o_f64_e64 s15, v[10:11], v[26:27]
-; GFX10-NEXT: v_cmp_o_f64_e64 s16, v[12:13], v[28:29]
-; GFX10-NEXT: v_cmp_lt_f64_e64 s5, v[2:3], v[18:19]
-; GFX10-NEXT: v_cmp_o_f64_e64 s6, v[2:3], v[18:19]
-; GFX10-NEXT: v_cmp_lt_f64_e64 s7, v[4:5], v[20:21]
-; GFX10-NEXT: v_cmp_o_f64_e64 s8, v[4:5], v[20:21]
-; GFX10-NEXT: v_cmp_class_f64_e64 s17, v[26:27], 32
-; GFX10-NEXT: v_cmp_class_f64_e64 s18, v[28:29], 32
-; GFX10-NEXT: v_cndmask_b32_e32 v32, v17, v1, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v38, v23, v7, s9
-; GFX10-NEXT: v_cndmask_b32_e64 v48, v25, v9, s10
-; GFX10-NEXT: v_cndmask_b32_e64 v50, v27, v11, s11
-; GFX10-NEXT: v_cndmask_b32_e64 v33, 0x7ff80000, v32, s4
-; GFX10-NEXT: v_cndmask_b32_e32 v32, v16, v0, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v52, v29, v13, s12
-; GFX10-NEXT: v_cmp_class_f64_e64 vcc_lo, v[0:1], 32
-; GFX10-NEXT: v_cndmask_b32_e64 v39, 0x7ff80000, v38, s13
-; GFX10-NEXT: v_cndmask_b32_e64 v49, 0x7ff80000, v48, s14
-; GFX10-NEXT: v_cndmask_b32_e64 v32, 0, v32, s4
-; GFX10-NEXT: v_cmp_class_f64_e64 s4, v[2:3], 32
-; GFX10-NEXT: v_cndmask_b32_e64 v51, 0x7ff80000, v50, s15
-; GFX10-NEXT: v_cndmask_b32_e64 v53, 0x7ff80000, v52, s16
-; GFX10-NEXT: v_cndmask_b32_e64 v38, v22, v6, s9
-; GFX10-NEXT: v_cndmask_b32_e64 v48, v24, v8, s10
-; GFX10-NEXT: v_cndmask_b32_e64 v50, v26, v10, s11
-; GFX10-NEXT: v_cndmask_b32_e64 v52, v28, v12, s12
-; GFX10-NEXT: v_cmp_class_f64_e64 s11, v[16:17], 32
-; GFX10-NEXT: v_cmp_class_f64_e64 s12, v[18:19], 32
-; GFX10-NEXT: v_cndmask_b32_e64 v34, v19, v3, s5
-; GFX10-NEXT: v_cndmask_b32_e64 v38, 0, v38, s13
-; GFX10-NEXT: v_cndmask_b32_e64 v36, v21, v5, s7
-; GFX10-NEXT: v_cmp_class_f64_e64 s9, v[12:13], 32
-; GFX10-NEXT: v_cndmask_b32_e64 v48, 0, v48, s14
-; GFX10-NEXT: v_cndmask_b32_e64 v35, 0x7ff80000, v34, s6
-; GFX10-NEXT: v_cndmask_b32_e64 v34, v18, v2, s5
-; GFX10-NEXT: v_cndmask_b32_e64 v37, 0x7ff80000, v36, s8
-; GFX10-NEXT: v_cndmask_b32_e64 v36, v20, v4, s7
-; GFX10-NEXT: v_cndmask_b32_e32 v0, v32, v0, vcc_lo
-; GFX10-NEXT: v_cmp_class_f64_e64 s5, v[4:5], 32
-; GFX10-NEXT: v_cndmask_b32_e64 v34, 0, v34, s6
-; GFX10-NEXT: v_cmp_class_f64_e64 s6, v[6:7], 32
-; GFX10-NEXT: v_cndmask_b32_e64 v36, 0, v36, s8
-; GFX10-NEXT: v_cmp_class_f64_e64 s7, v[8:9], 32
-; GFX10-NEXT: v_cmp_class_f64_e64 s8, v[10:11], 32
-; GFX10-NEXT: v_cndmask_b32_e64 v2, v34, v2, s4
-; GFX10-NEXT: v_cndmask_b32_e64 v50, 0, v50, s15
-; GFX10-NEXT: v_cndmask_b32_e64 v52, 0, v52, s16
-; GFX10-NEXT: v_cmp_class_f64_e64 s14, v[20:21], 32
-; GFX10-NEXT: v_cndmask_b32_e64 v0, v0, v16, s11
-; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, v18, s12
-; GFX10-NEXT: v_cmp_class_f64_e64 s15, v[22:23], 32
-; GFX10-NEXT: v_cmp_class_f64_e64 s16, v[24:25], 32
-; GFX10-NEXT: v_cmp_eq_f64_e64 s19, 0, v[32:33]
-; GFX10-NEXT: v_cmp_eq_f64_e64 s20, 0, v[34:35]
-; GFX10-NEXT: v_cmp_eq_f64_e64 s21, 0, v[36:37]
-; GFX10-NEXT: v_cmp_eq_f64_e64 s22, 0, v[48:49]
-; GFX10-NEXT: v_cmp_eq_f64_e64 s23, 0, v[50:51]
-; GFX10-NEXT: v_cmp_eq_f64_e64 s24, 0, v[52:53]
-; GFX10-NEXT: v_cndmask_b32_e32 v1, v33, v1, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v12, v52, v12, s9
-; GFX10-NEXT: v_cndmask_b32_e64 v4, v36, v4, s5
-; GFX10-NEXT: v_cndmask_b32_e64 v3, v35, v3, s4
-; GFX10-NEXT: v_cndmask_b32_e64 v6, v38, v6, s6
-; GFX10-NEXT: v_cndmask_b32_e64 v5, v37, v5, s5
-; GFX10-NEXT: v_cndmask_b32_e64 v8, v48, v8, s7
-; GFX10-NEXT: v_cndmask_b32_e64 v10, v50, v10, s8
-; GFX10-NEXT: v_cndmask_b32_e64 v7, v39, v7, s6
-; GFX10-NEXT: v_cndmask_b32_e64 v9, v49, v9, s7
-; GFX10-NEXT: v_cndmask_b32_e64 v11, v51, v11, s8
-; GFX10-NEXT: v_cndmask_b32_e64 v13, v53, v13, s9
-; GFX10-NEXT: v_cndmask_b32_e64 v4, v4, v20, s14
-; GFX10-NEXT: v_cndmask_b32_e64 v10, v10, v26, s17
-; GFX10-NEXT: v_cndmask_b32_e64 v6, v6, v22, s15
-; GFX10-NEXT: v_cndmask_b32_e64 v8, v8, v24, s16
-; GFX10-NEXT: v_cndmask_b32_e64 v12, v12, v28, s18
-; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, v17, s11
-; GFX10-NEXT: v_cndmask_b32_e64 v3, v3, v19, s12
-; GFX10-NEXT: v_cndmask_b32_e64 v5, v5, v21, s14
-; GFX10-NEXT: v_cndmask_b32_e64 v7, v7, v23, s15
-; GFX10-NEXT: v_cndmask_b32_e64 v9, v9, v25, s16
-; GFX10-NEXT: v_cndmask_b32_e64 v11, v11, v27, s17
-; GFX10-NEXT: v_cndmask_b32_e64 v13, v13, v29, s18
-; GFX10-NEXT: v_cndmask_b32_e64 v0, v32, v0, s19
-; GFX10-NEXT: v_cndmask_b32_e64 v2, v34, v2, s20
-; GFX10-NEXT: v_cndmask_b32_e64 v4, v36, v4, s21
-; GFX10-NEXT: v_cndmask_b32_e64 v8, v48, v8, s22
-; GFX10-NEXT: v_cndmask_b32_e64 v10, v50, v10, s23
-; GFX10-NEXT: v_cndmask_b32_e64 v12, v52, v12, s24
-; GFX10-NEXT: v_cndmask_b32_e64 v1, v33, v1, s19
-; GFX10-NEXT: v_cndmask_b32_e64 v3, v35, v3, s20
-; GFX10-NEXT: v_cndmask_b32_e64 v5, v37, v5, s21
-; GFX10-NEXT: v_cndmask_b32_e64 v9, v49, v9, s22
-; GFX10-NEXT: v_cndmask_b32_e64 v11, v51, v11, s23
-; GFX10-NEXT: v_cndmask_b32_e64 v13, v53, v13, s24
+; GFX10-NEXT: v_min_f64 v[32:33], v[0:1], v[16:17]
+; GFX10-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[16:17]
+; GFX10-NEXT: v_min_f64 v[16:17], v[2:3], v[18:19]
+; GFX10-NEXT: v_cmp_u_f64_e64 s4, v[2:3], v[18:19]
+; GFX10-NEXT: v_min_f64 v[18:19], v[4:5], v[20:21]
+; GFX10-NEXT: v_cmp_u_f64_e64 s5, v[4:5], v[20:21]
+; GFX10-NEXT: v_min_f64 v[20:21], v[6:7], v[22:23]
+; GFX10-NEXT: v_cmp_u_f64_e64 s6, v[6:7], v[22:23]
+; GFX10-NEXT: v_min_f64 v[22:23], v[8:9], v[24:25]
+; GFX10-NEXT: v_cmp_u_f64_e64 s7, v[8:9], v[24:25]
+; GFX10-NEXT: v_min_f64 v[24:25], v[10:11], v[26:27]
+; GFX10-NEXT: v_cmp_u_f64_e64 s8, v[10:11], v[26:27]
+; GFX10-NEXT: v_min_f64 v[26:27], v[12:13], v[28:29]
+; GFX10-NEXT: v_cmp_u_f64_e64 s9, v[12:13], v[28:29]
+; GFX10-NEXT: v_cndmask_b32_e64 v0, v32, 0, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e64 v1, v33, 0x7ff80000, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e64 v2, v16, 0, s4
+; GFX10-NEXT: v_cndmask_b32_e64 v3, v17, 0x7ff80000, s4
+; GFX10-NEXT: v_cndmask_b32_e64 v4, v18, 0, s5
+; GFX10-NEXT: v_cndmask_b32_e64 v5, v19, 0x7ff80000, s5
+; GFX10-NEXT: v_cndmask_b32_e64 v6, v20, 0, s6
+; GFX10-NEXT: v_cndmask_b32_e64 v7, v21, 0x7ff80000, s6
+; GFX10-NEXT: v_cndmask_b32_e64 v8, v22, 0, s7
+; GFX10-NEXT: v_cndmask_b32_e64 v9, v23, 0x7ff80000, s7
+; GFX10-NEXT: v_cndmask_b32_e64 v10, v24, 0, s8
+; GFX10-NEXT: v_cndmask_b32_e64 v11, v25, 0x7ff80000, s8
+; GFX10-NEXT: v_cndmask_b32_e64 v12, v26, 0, s9
+; GFX10-NEXT: v_cndmask_b32_e64 v13, v27, 0x7ff80000, s9
; GFX10-NEXT: s_waitcnt vmcnt(0)
-; GFX10-NEXT: v_cmp_lt_f64_e64 s10, v[14:15], v[30:31]
-; GFX10-NEXT: v_cmp_o_f64_e64 s13, v[14:15], v[30:31]
-; GFX10-NEXT: v_cmp_class_f64_e64 s25, v[30:31], 32
-; GFX10-NEXT: v_cndmask_b32_e64 v16, v31, v15, s10
-; GFX10-NEXT: v_cndmask_b32_e64 v18, v30, v14, s10
-; GFX10-NEXT: v_cmp_eq_f64_e64 s10, 0, v[38:39]
-; GFX10-NEXT: v_cndmask_b32_e64 v55, 0x7ff80000, v16, s13
-; GFX10-NEXT: v_cndmask_b32_e64 v54, 0, v18, s13
-; GFX10-NEXT: v_cmp_class_f64_e64 s13, v[14:15], 32
-; GFX10-NEXT: v_cmp_eq_f64_e32 vcc_lo, 0, v[54:55]
-; GFX10-NEXT: v_cndmask_b32_e64 v6, v38, v6, s10
-; GFX10-NEXT: v_cndmask_b32_e64 v7, v39, v7, s10
-; GFX10-NEXT: v_cndmask_b32_e64 v14, v54, v14, s13
-; GFX10-NEXT: v_cndmask_b32_e64 v15, v55, v15, s13
-; GFX10-NEXT: v_cndmask_b32_e64 v14, v14, v30, s25
-; GFX10-NEXT: v_cndmask_b32_e64 v15, v15, v31, s25
-; GFX10-NEXT: v_cndmask_b32_e32 v14, v54, v14, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e32 v15, v55, v15, vcc_lo
+; GFX10-NEXT: v_min_f64 v[28:29], v[14:15], v[30:31]
+; GFX10-NEXT: v_cmp_u_f64_e64 s10, v[14:15], v[30:31]
+; GFX10-NEXT: v_cndmask_b32_e64 v14, v28, 0, s10
+; GFX10-NEXT: v_cndmask_b32_e64 v15, v29, 0x7ff80000, s10
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_minimum_v8f64:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: scratch_load_b32 v31, off, s32
-; GFX11-NEXT: v_cmp_lt_f64_e64 s4, v[6:7], v[22:23]
-; GFX11-NEXT: v_cmp_o_f64_e64 s9, v[6:7], v[22:23]
-; GFX11-NEXT: v_cmp_lt_f64_e64 s1, v[2:3], v[18:19]
-; GFX11-NEXT: v_cmp_lt_f64_e64 s6, v[10:11], v[26:27]
-; GFX11-NEXT: v_cmp_lt_f64_e32 vcc_lo, v[0:1], v[16:17]
-; GFX11-NEXT: v_cmp_o_f64_e64 s2, v[2:3], v[18:19]
-; GFX11-NEXT: v_cmp_o_f64_e64 s11, v[10:11], v[26:27]
-; GFX11-NEXT: v_cmp_o_f64_e64 s0, v[0:1], v[16:17]
-; GFX11-NEXT: v_cmp_lt_f64_e64 s3, v[4:5], v[20:21]
-; GFX11-NEXT: v_cmp_lt_f64_e64 s5, v[8:9], v[24:25]
-; GFX11-NEXT: v_cmp_lt_f64_e64 s7, v[12:13], v[28:29]
-; GFX11-NEXT: v_cmp_o_f64_e64 s8, v[4:5], v[20:21]
-; GFX11-NEXT: v_cmp_o_f64_e64 s10, v[8:9], v[24:25]
-; GFX11-NEXT: v_cmp_o_f64_e64 s12, v[12:13], v[28:29]
-; GFX11-NEXT: v_cmp_class_f64_e64 s13, v[18:19], 32
-; GFX11-NEXT: v_cmp_class_f64_e64 s15, v[20:21], 32
-; GFX11-NEXT: v_cndmask_b32_e64 v38, v23, v7, s4
-; GFX11-NEXT: v_cndmask_b32_e64 v34, v19, v3, s1
-; GFX11-NEXT: v_cndmask_b32_e64 v50, v27, v11, s6
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3)
-; GFX11-NEXT: v_cndmask_b32_e64 v39, 0x7ff80000, v38, s9
-; GFX11-NEXT: v_cndmask_b32_e64 v38, v22, v6, s4
-; GFX11-NEXT: v_cmp_class_f64_e64 s4, v[6:7], 32
-; GFX11-NEXT: v_cndmask_b32_e64 v35, 0x7ff80000, v34, s2
-; GFX11-NEXT: v_cndmask_b32_e64 v51, 0x7ff80000, v50, s11
-; GFX11-NEXT: v_cndmask_b32_e64 v34, v18, v2, s1
-; GFX11-NEXT: v_cndmask_b32_e64 v50, v26, v10, s6
-; GFX11-NEXT: v_cmp_class_f64_e64 s1, v[0:1], 32
-; GFX11-NEXT: v_cndmask_b32_e64 v36, v21, v5, s3
-; GFX11-NEXT: v_cndmask_b32_e64 v48, v25, v9, s5
-; GFX11-NEXT: v_cndmask_b32_e64 v52, v29, v13, s7
-; GFX11-NEXT: v_cndmask_b32_e64 v50, 0, v50, s11
-; GFX11-NEXT: v_cmp_class_f64_e64 s11, v[16:17], 32
-; GFX11-NEXT: v_cndmask_b32_e64 v37, 0x7ff80000, v36, s8
-; GFX11-NEXT: v_cndmask_b32_e64 v49, 0x7ff80000, v48, s10
-; GFX11-NEXT: v_cndmask_b32_e64 v53, 0x7ff80000, v52, s12
-; GFX11-NEXT: v_cndmask_b32_e64 v36, v20, v4, s3
-; GFX11-NEXT: v_cndmask_b32_e64 v48, v24, v8, s5
-; GFX11-NEXT: v_cndmask_b32_e64 v52, v28, v12, s7
-; GFX11-NEXT: v_cndmask_b32_e64 v34, 0, v34, s2
-; GFX11-NEXT: v_cndmask_b32_e64 v38, 0, v38, s9
-; GFX11-NEXT: v_cmp_class_f64_e64 s2, v[2:3], 32
-; GFX11-NEXT: v_cmp_class_f64_e64 s3, v[4:5], 32
-; GFX11-NEXT: v_cmp_class_f64_e64 s5, v[8:9], 32
-; GFX11-NEXT: v_cmp_class_f64_e64 s7, v[10:11], 32
-; GFX11-NEXT: v_cmp_class_f64_e64 s9, v[12:13], 32
-; GFX11-NEXT: v_cndmask_b32_e64 v36, 0, v36, s8
-; GFX11-NEXT: v_cndmask_b32_e64 v48, 0, v48, s10
-; GFX11-NEXT: v_cndmask_b32_e64 v52, 0, v52, s12
-; GFX11-NEXT: v_cmp_class_f64_e64 s6, v[24:25], 32
-; GFX11-NEXT: v_cmp_class_f64_e64 s8, v[26:27], 32
-; GFX11-NEXT: v_cmp_class_f64_e64 s10, v[28:29], 32
-; GFX11-NEXT: v_cmp_eq_f64_e64 s14, 0, v[34:35]
-; GFX11-NEXT: v_cmp_eq_f64_e64 s16, 0, v[36:37]
-; GFX11-NEXT: v_cmp_eq_f64_e64 s17, 0, v[38:39]
-; GFX11-NEXT: v_cmp_eq_f64_e64 s18, 0, v[48:49]
-; GFX11-NEXT: v_cmp_eq_f64_e64 s20, 0, v[50:51]
-; GFX11-NEXT: v_cmp_eq_f64_e64 s21, 0, v[52:53]
-; GFX11-NEXT: v_cndmask_b32_e64 v7, v39, v7, s4
-; GFX11-NEXT: v_cndmask_b32_e32 v32, v17, v1, vcc_lo
-; GFX11-NEXT: v_cndmask_b32_e64 v6, v38, v6, s4
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-NEXT: v_cndmask_b32_e64 v33, 0x7ff80000, v32, s0
-; GFX11-NEXT: v_cndmask_b32_e32 v32, v16, v0, vcc_lo
-; GFX11-NEXT: v_cndmask_b32_e64 v1, v33, v1, s1
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NEXT: v_cndmask_b32_e64 v32, 0, v32, s0
-; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, v17, s11
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX11-NEXT: v_cndmask_b32_e64 v0, v32, v0, s1
-; GFX11-NEXT: v_cmp_eq_f64_e64 s12, 0, v[32:33]
-; GFX11-NEXT: v_cndmask_b32_e64 v2, v34, v2, s2
-; GFX11-NEXT: v_cndmask_b32_e64 v4, v36, v4, s3
-; GFX11-NEXT: v_cndmask_b32_e64 v8, v48, v8, s5
-; GFX11-NEXT: v_cndmask_b32_e64 v0, v0, v16, s11
-; GFX11-NEXT: v_cndmask_b32_e64 v10, v50, v10, s7
-; GFX11-NEXT: v_cndmask_b32_e64 v12, v52, v12, s9
-; GFX11-NEXT: v_cndmask_b32_e64 v3, v35, v3, s2
-; GFX11-NEXT: v_cndmask_b32_e64 v5, v37, v5, s3
-; GFX11-NEXT: v_cndmask_b32_e64 v9, v49, v9, s5
-; GFX11-NEXT: v_cndmask_b32_e64 v11, v51, v11, s7
-; GFX11-NEXT: v_cndmask_b32_e64 v13, v53, v13, s9
-; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, v18, s13
-; GFX11-NEXT: v_cndmask_b32_e64 v4, v4, v20, s15
-; GFX11-NEXT: v_cndmask_b32_e64 v8, v8, v24, s6
-; GFX11-NEXT: v_cndmask_b32_e64 v10, v10, v26, s8
-; GFX11-NEXT: v_cndmask_b32_e64 v12, v12, v28, s10
-; GFX11-NEXT: v_cndmask_b32_e64 v3, v3, v19, s13
-; GFX11-NEXT: v_cndmask_b32_e64 v5, v5, v21, s15
-; GFX11-NEXT: v_cndmask_b32_e64 v9, v9, v25, s6
-; GFX11-NEXT: v_cndmask_b32_e64 v11, v11, v27, s8
-; GFX11-NEXT: v_cndmask_b32_e64 v13, v13, v29, s10
-; GFX11-NEXT: v_cndmask_b32_e64 v2, v34, v2, s14
-; GFX11-NEXT: v_cndmask_b32_e64 v4, v36, v4, s16
-; GFX11-NEXT: v_cndmask_b32_e64 v8, v48, v8, s18
-; GFX11-NEXT: v_cndmask_b32_e64 v10, v50, v10, s20
-; GFX11-NEXT: v_cndmask_b32_e64 v12, v52, v12, s21
-; GFX11-NEXT: v_cndmask_b32_e64 v3, v35, v3, s14
-; GFX11-NEXT: v_cndmask_b32_e64 v5, v37, v5, s16
-; GFX11-NEXT: v_cndmask_b32_e64 v9, v49, v9, s18
-; GFX11-NEXT: v_cndmask_b32_e64 v11, v51, v11, s20
-; GFX11-NEXT: v_cndmask_b32_e64 v13, v53, v13, s21
-; GFX11-NEXT: v_cndmask_b32_e64 v0, v32, v0, s12
-; GFX11-NEXT: v_cndmask_b32_e64 v1, v33, v1, s12
+; GFX11-NEXT: v_min_f64 v[32:33], v[0:1], v[16:17]
+; GFX11-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[16:17]
+; GFX11-NEXT: v_min_f64 v[16:17], v[2:3], v[18:19]
+; GFX11-NEXT: v_cmp_u_f64_e64 s0, v[2:3], v[18:19]
+; GFX11-NEXT: v_min_f64 v[18:19], v[4:5], v[20:21]
+; GFX11-NEXT: v_cmp_u_f64_e64 s1, v[4:5], v[20:21]
+; GFX11-NEXT: v_min_f64 v[20:21], v[6:7], v[22:23]
+; GFX11-NEXT: v_cmp_u_f64_e64 s2, v[6:7], v[22:23]
+; GFX11-NEXT: v_min_f64 v[22:23], v[8:9], v[24:25]
+; GFX11-NEXT: v_cmp_u_f64_e64 s3, v[8:9], v[24:25]
+; GFX11-NEXT: v_min_f64 v[24:25], v[10:11], v[26:27]
+; GFX11-NEXT: v_cmp_u_f64_e64 s4, v[10:11], v[26:27]
+; GFX11-NEXT: v_min_f64 v[26:27], v[12:13], v[28:29]
+; GFX11-NEXT: v_cmp_u_f64_e64 s5, v[12:13], v[28:29]
+; GFX11-NEXT: v_cndmask_b32_e64 v0, v32, 0, vcc_lo
+; GFX11-NEXT: v_cndmask_b32_e64 v1, v33, 0x7ff80000, vcc_lo
+; GFX11-NEXT: v_cndmask_b32_e64 v2, v16, 0, s0
+; GFX11-NEXT: v_cndmask_b32_e64 v3, v17, 0x7ff80000, s0
+; GFX11-NEXT: v_cndmask_b32_e64 v4, v18, 0, s1
+; GFX11-NEXT: v_cndmask_b32_e64 v5, v19, 0x7ff80000, s1
+; GFX11-NEXT: v_cndmask_b32_e64 v6, v20, 0, s2
+; GFX11-NEXT: v_cndmask_b32_e64 v7, v21, 0x7ff80000, s2
+; GFX11-NEXT: v_cndmask_b32_e64 v8, v22, 0, s3
+; GFX11-NEXT: v_cndmask_b32_e64 v9, v23, 0x7ff80000, s3
+; GFX11-NEXT: v_cndmask_b32_e64 v10, v24, 0, s4
+; GFX11-NEXT: v_cndmask_b32_e64 v11, v25, 0x7ff80000, s4
+; GFX11-NEXT: v_cndmask_b32_e64 v12, v26, 0, s5
+; GFX11-NEXT: v_cndmask_b32_e64 v13, v27, 0x7ff80000, s5
; GFX11-NEXT: s_waitcnt vmcnt(0)
-; GFX11-NEXT: v_cmp_lt_f64_e32 vcc_lo, v[14:15], v[30:31]
-; GFX11-NEXT: v_cmp_o_f64_e64 s0, v[14:15], v[30:31]
-; GFX11-NEXT: v_cmp_class_f64_e64 s19, v[30:31], 32
-; GFX11-NEXT: v_cndmask_b32_e32 v54, v31, v15, vcc_lo
-; GFX11-NEXT: v_cndmask_b32_e32 v16, v30, v14, vcc_lo
-; GFX11-NEXT: v_cmp_class_f64_e64 vcc_lo, v[22:23], 32
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NEXT: v_cndmask_b32_e64 v55, 0x7ff80000, v54, s0
-; GFX11-NEXT: v_cndmask_b32_e64 v54, 0, v16, s0
-; GFX11-NEXT: v_cmp_class_f64_e64 s0, v[14:15], 32
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3)
-; GFX11-NEXT: v_cmp_eq_f64_e64 s22, 0, v[54:55]
-; GFX11-NEXT: v_dual_cndmask_b32 v7, v7, v23 :: v_dual_cndmask_b32 v6, v6, v22
-; GFX11-NEXT: v_cndmask_b32_e64 v14, v54, v14, s0
-; GFX11-NEXT: v_cndmask_b32_e64 v15, v55, v15, s0
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_4)
-; GFX11-NEXT: v_cndmask_b32_e64 v6, v38, v6, s17
-; GFX11-NEXT: v_cndmask_b32_e64 v7, v39, v7, s17
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
-; GFX11-NEXT: v_cndmask_b32_e64 v14, v14, v30, s19
-; GFX11-NEXT: v_cndmask_b32_e64 v15, v15, v31, s19
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NEXT: v_cndmask_b32_e64 v14, v54, v14, s22
-; GFX11-NEXT: v_cndmask_b32_e64 v15, v55, v15, s22
+; GFX11-NEXT: v_min_f64 v[28:29], v[14:15], v[30:31]
+; GFX11-NEXT: v_cmp_u_f64_e64 s6, v[14:15], v[30:31]
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11-NEXT: v_cndmask_b32_e64 v14, v28, 0, s6
+; GFX11-NEXT: v_cndmask_b32_e64 v15, v29, 0x7ff80000, s6
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_minimum_v8f64:
@@ -4279,1799 +2067,798 @@ define <16 x double> @v_minimum_v16f64(<16 x double> %src0, <16 x double> %src1)
; GFX7-LABEL: v_minimum_v16f64:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; GFX7-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:132 ; 4-byte Folded Spill
+; GFX7-NEXT: s_mov_b64 exec, s[4:5]
; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:8
; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:4
-; GFX7-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:16
-; GFX7-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:12
-; GFX7-NEXT: buffer_load_dword v36, off, s[0:3], s32 offset:24
-; GFX7-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:20
-; GFX7-NEXT: buffer_load_dword v38, off, s[0:3], s32 offset:32
-; GFX7-NEXT: buffer_load_dword v37, off, s[0:3], s32 offset:28
-; GFX7-NEXT: v_mov_b32_e32 v39, 0x7ff80000
-; GFX7-NEXT: s_waitcnt vmcnt(6)
-; GFX7-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[31:32]
-; GFX7-NEXT: v_cmp_o_f64_e64 s[4:5], v[0:1], v[31:32]
-; GFX7-NEXT: s_waitcnt vmcnt(4)
-; GFX7-NEXT: v_cmp_o_f64_e64 s[6:7], v[2:3], v[33:34]
-; GFX7-NEXT: v_cndmask_b32_e32 v48, v32, v1, vcc
-; GFX7-NEXT: v_cndmask_b32_e64 v49, v39, v48, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e32 v48, v31, v0, vcc
-; GFX7-NEXT: v_cmp_class_f64_e64 vcc, v[0:1], 32
-; GFX7-NEXT: v_cndmask_b32_e64 v48, 0, v48, s[4:5]
-; GFX7-NEXT: v_cmp_class_f64_e64 s[4:5], v[31:32], 32
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v48, v0, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v49, v1, vcc
-; GFX7-NEXT: v_cmp_lt_f64_e32 vcc, v[2:3], v[33:34]
-; GFX7-NEXT: v_cndmask_b32_e64 v0, v0, v31, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v1, v1, v32, s[4:5]
-; GFX7-NEXT: v_cmp_class_f64_e64 s[4:5], v[33:34], 32
-; GFX7-NEXT: v_cndmask_b32_e32 v50, v34, v3, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v31, v33, v2, vcc
-; GFX7-NEXT: v_cmp_class_f64_e64 vcc, v[2:3], 32
-; GFX7-NEXT: v_cndmask_b32_e64 v32, v39, v50, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v31, 0, v31, s[6:7]
-; GFX7-NEXT: s_waitcnt vmcnt(2)
-; GFX7-NEXT: v_cmp_o_f64_e64 s[6:7], v[4:5], v[35:36]
-; GFX7-NEXT: v_cndmask_b32_e32 v2, v31, v2, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v3, v32, v3, vcc
-; GFX7-NEXT: v_cmp_lt_f64_e32 vcc, v[4:5], v[35:36]
-; GFX7-NEXT: v_cndmask_b32_e64 v2, v2, v33, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v3, v3, v34, s[4:5]
-; GFX7-NEXT: v_cmp_class_f64_e64 s[4:5], v[35:36], 32
-; GFX7-NEXT: v_cndmask_b32_e32 v50, v36, v5, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v33, v35, v4, vcc
-; GFX7-NEXT: v_cmp_class_f64_e64 vcc, v[4:5], 32
-; GFX7-NEXT: v_cndmask_b32_e64 v34, v39, v50, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v33, 0, v33, s[6:7]
+; GFX7-NEXT: v_writelane_b32 v34, s30, 0
+; GFX7-NEXT: v_writelane_b32 v34, s31, 1
+; GFX7-NEXT: v_writelane_b32 v34, s34, 2
+; GFX7-NEXT: v_writelane_b32 v34, s35, 3
; GFX7-NEXT: s_waitcnt vmcnt(0)
-; GFX7-NEXT: v_cmp_o_f64_e64 s[6:7], v[6:7], v[37:38]
-; GFX7-NEXT: v_cndmask_b32_e32 v4, v33, v4, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v5, v34, v5, vcc
-; GFX7-NEXT: v_cmp_lt_f64_e32 vcc, v[6:7], v[37:38]
-; GFX7-NEXT: v_cndmask_b32_e64 v4, v4, v35, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v5, v5, v36, s[4:5]
-; GFX7-NEXT: v_cmp_class_f64_e64 s[4:5], v[37:38], 32
-; GFX7-NEXT: v_cndmask_b32_e32 v50, v38, v7, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v35, v37, v6, vcc
-; GFX7-NEXT: v_cmp_class_f64_e64 vcc, v[6:7], 32
-; GFX7-NEXT: v_cndmask_b32_e64 v36, v39, v50, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v35, 0, v35, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e32 v6, v35, v6, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v7, v36, v7, vcc
-; GFX7-NEXT: v_cndmask_b32_e64 v6, v6, v37, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v7, v7, v38, s[4:5]
-; GFX7-NEXT: buffer_load_dword v37, off, s[0:3], s32 offset:36
-; GFX7-NEXT: buffer_load_dword v38, off, s[0:3], s32 offset:40
-; GFX7-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[48:49]
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v48, v0, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v49, v1, vcc
-; GFX7-NEXT: buffer_load_dword v49, off, s[0:3], s32 offset:48
-; GFX7-NEXT: buffer_load_dword v48, off, s[0:3], s32 offset:44
-; GFX7-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[31:32]
-; GFX7-NEXT: v_cndmask_b32_e32 v2, v31, v2, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v3, v32, v3, vcc
+; GFX7-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[31:32]
+; GFX7-NEXT: v_min_f64 v[0:1], v[0:1], v[31:32]
+; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:16
+; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:12
+; GFX7-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[31:32]
+; GFX7-NEXT: v_min_f64 v[2:3], v[2:3], v[31:32]
+; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:24
+; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:20
+; GFX7-NEXT: v_cndmask_b32_e64 v2, v2, 0, s[4:5]
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: v_cmp_u_f64_e64 s[6:7], v[4:5], v[31:32]
+; GFX7-NEXT: v_min_f64 v[4:5], v[4:5], v[31:32]
+; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:32
+; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:28
+; GFX7-NEXT: v_cndmask_b32_e64 v4, v4, 0, s[6:7]
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: v_cmp_u_f64_e64 s[8:9], v[6:7], v[31:32]
+; GFX7-NEXT: v_min_f64 v[6:7], v[6:7], v[31:32]
+; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:36
+; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:40
+; GFX7-NEXT: v_cndmask_b32_e64 v6, v6, 0, s[8:9]
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: v_cmp_u_f64_e64 s[10:11], v[8:9], v[31:32]
+; GFX7-NEXT: v_min_f64 v[8:9], v[8:9], v[31:32]
+; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:48
+; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:44
+; GFX7-NEXT: v_cndmask_b32_e64 v8, v8, 0, s[10:11]
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: v_cmp_u_f64_e64 s[12:13], v[10:11], v[31:32]
+; GFX7-NEXT: v_min_f64 v[10:11], v[10:11], v[31:32]
; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:56
; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:52
-; GFX7-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[33:34]
-; GFX7-NEXT: v_cndmask_b32_e32 v4, v33, v4, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v5, v34, v5, vcc
-; GFX7-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:64
-; GFX7-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:60
-; GFX7-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[35:36]
-; GFX7-NEXT: v_cndmask_b32_e32 v6, v35, v6, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v7, v36, v7, vcc
-; GFX7-NEXT: v_cmp_class_f64_e64 vcc, v[8:9], 32
-; GFX7-NEXT: s_waitcnt vmcnt(6)
-; GFX7-NEXT: v_cmp_lt_f64_e64 s[4:5], v[8:9], v[37:38]
-; GFX7-NEXT: v_cmp_o_f64_e64 s[6:7], v[8:9], v[37:38]
-; GFX7-NEXT: v_cndmask_b32_e64 v50, v38, v9, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v35, v37, v8, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v36, v39, v50, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v35, 0, v35, s[6:7]
-; GFX7-NEXT: v_cmp_class_f64_e64 s[4:5], v[37:38], 32
-; GFX7-NEXT: v_cndmask_b32_e32 v8, v35, v8, vcc
-; GFX7-NEXT: v_cmp_eq_f64_e64 s[6:7], 0, v[35:36]
-; GFX7-NEXT: v_cndmask_b32_e32 v9, v36, v9, vcc
-; GFX7-NEXT: s_waitcnt vmcnt(4)
-; GFX7-NEXT: v_cmp_lt_f64_e32 vcc, v[10:11], v[48:49]
-; GFX7-NEXT: v_cmp_o_f64_e64 s[8:9], v[10:11], v[48:49]
-; GFX7-NEXT: v_cndmask_b32_e64 v8, v8, v37, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v9, v9, v38, s[4:5]
-; GFX7-NEXT: buffer_load_dword v37, off, s[0:3], s32 offset:68
-; GFX7-NEXT: buffer_load_dword v38, off, s[0:3], s32 offset:72
-; GFX7-NEXT: v_cndmask_b32_e64 v8, v35, v8, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e32 v50, v49, v11, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v35, v48, v10, vcc
-; GFX7-NEXT: v_cmp_class_f64_e64 vcc, v[10:11], 32
-; GFX7-NEXT: v_cmp_class_f64_e64 s[4:5], v[48:49], 32
-; GFX7-NEXT: v_cndmask_b32_e64 v51, v39, v50, s[8:9]
-; GFX7-NEXT: v_cndmask_b32_e64 v50, 0, v35, s[8:9]
-; GFX7-NEXT: v_cndmask_b32_e64 v9, v36, v9, s[6:7]
-; GFX7-NEXT: buffer_load_dword v36, off, s[0:3], s32 offset:80
-; GFX7-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:76
-; GFX7-NEXT: s_waitcnt vmcnt(6)
-; GFX7-NEXT: v_cmp_o_f64_e64 s[6:7], v[12:13], v[31:32]
-; GFX7-NEXT: v_cndmask_b32_e32 v10, v50, v10, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v11, v51, v11, vcc
-; GFX7-NEXT: v_cndmask_b32_e64 v10, v10, v48, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v11, v11, v49, s[4:5]
-; GFX7-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[50:51]
-; GFX7-NEXT: v_cmp_lt_f64_e64 s[4:5], v[12:13], v[31:32]
-; GFX7-NEXT: v_cndmask_b32_e32 v10, v50, v10, vcc
-; GFX7-NEXT: v_cndmask_b32_e64 v48, v32, v13, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e32 v11, v51, v11, vcc
-; GFX7-NEXT: v_cmp_class_f64_e64 vcc, v[12:13], 32
-; GFX7-NEXT: v_cndmask_b32_e64 v49, v39, v48, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v48, v31, v12, s[4:5]
-; GFX7-NEXT: v_cmp_class_f64_e64 s[4:5], v[31:32], 32
-; GFX7-NEXT: v_cndmask_b32_e64 v48, 0, v48, s[6:7]
-; GFX7-NEXT: s_waitcnt vmcnt(4)
-; GFX7-NEXT: v_cmp_o_f64_e64 s[6:7], v[14:15], v[33:34]
-; GFX7-NEXT: v_cndmask_b32_e32 v12, v48, v12, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v13, v49, v13, vcc
-; GFX7-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[48:49]
-; GFX7-NEXT: v_cndmask_b32_e64 v12, v12, v31, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v13, v13, v32, s[4:5]
+; GFX7-NEXT: v_cndmask_b32_e64 v10, v10, 0, s[12:13]
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: v_cmp_u_f64_e64 s[14:15], v[12:13], v[31:32]
+; GFX7-NEXT: v_min_f64 v[12:13], v[12:13], v[31:32]
+; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:64
+; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:60
+; GFX7-NEXT: v_cndmask_b32_e64 v12, v12, 0, s[14:15]
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: v_cmp_u_f64_e64 s[16:17], v[14:15], v[31:32]
+; GFX7-NEXT: v_min_f64 v[14:15], v[14:15], v[31:32]
+; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:68
+; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:72
+; GFX7-NEXT: v_cndmask_b32_e64 v14, v14, 0, s[16:17]
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: v_cmp_u_f64_e64 s[18:19], v[16:17], v[31:32]
+; GFX7-NEXT: v_min_f64 v[16:17], v[16:17], v[31:32]
+; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:80
+; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:76
+; GFX7-NEXT: v_cndmask_b32_e64 v16, v16, 0, s[18:19]
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: v_cmp_u_f64_e64 s[20:21], v[18:19], v[31:32]
+; GFX7-NEXT: v_min_f64 v[18:19], v[18:19], v[31:32]
; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:88
; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:84
-; GFX7-NEXT: v_cmp_lt_f64_e64 s[4:5], v[14:15], v[33:34]
-; GFX7-NEXT: v_cndmask_b32_e32 v12, v48, v12, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v13, v49, v13, vcc
-; GFX7-NEXT: v_cmp_class_f64_e64 vcc, v[14:15], 32
-; GFX7-NEXT: v_cndmask_b32_e64 v50, v34, v15, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v48, v33, v14, s[4:5]
-; GFX7-NEXT: v_cmp_class_f64_e64 s[4:5], v[33:34], 32
-; GFX7-NEXT: v_cndmask_b32_e64 v49, v39, v50, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v48, 0, v48, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e32 v14, v48, v14, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v15, v49, v15, vcc
-; GFX7-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[48:49]
-; GFX7-NEXT: v_cndmask_b32_e64 v14, v14, v33, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v15, v15, v34, s[4:5]
-; GFX7-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:96
-; GFX7-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:92
-; GFX7-NEXT: v_cndmask_b32_e32 v14, v48, v14, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v15, v49, v15, vcc
-; GFX7-NEXT: v_cmp_class_f64_e64 vcc, v[16:17], 32
-; GFX7-NEXT: s_waitcnt vmcnt(6)
-; GFX7-NEXT: v_cmp_lt_f64_e64 s[4:5], v[16:17], v[37:38]
-; GFX7-NEXT: v_cmp_o_f64_e64 s[6:7], v[16:17], v[37:38]
-; GFX7-NEXT: v_cndmask_b32_e64 v50, v38, v17, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v48, v37, v16, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v49, v39, v50, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v48, 0, v48, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e32 v16, v48, v16, vcc
-; GFX7-NEXT: v_cmp_class_f64_e64 s[4:5], v[37:38], 32
-; GFX7-NEXT: v_cndmask_b32_e32 v17, v49, v17, vcc
-; GFX7-NEXT: s_waitcnt vmcnt(4)
-; GFX7-NEXT: v_cmp_lt_f64_e32 vcc, v[18:19], v[35:36]
-; GFX7-NEXT: v_cmp_o_f64_e64 s[6:7], v[18:19], v[35:36]
-; GFX7-NEXT: v_cndmask_b32_e64 v16, v16, v37, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v17, v17, v38, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e32 v50, v36, v19, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v37, v35, v18, vcc
-; GFX7-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[48:49]
-; GFX7-NEXT: v_cmp_class_f64_e64 s[4:5], v[18:19], 32
-; GFX7-NEXT: v_cndmask_b32_e64 v51, v39, v50, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v50, 0, v37, s[6:7]
-; GFX7-NEXT: buffer_load_dword v37, off, s[0:3], s32 offset:100
-; GFX7-NEXT: buffer_load_dword v38, off, s[0:3], s32 offset:104
-; GFX7-NEXT: v_cndmask_b32_e32 v16, v48, v16, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v17, v49, v17, vcc
-; GFX7-NEXT: v_cmp_class_f64_e64 vcc, v[35:36], 32
-; GFX7-NEXT: v_cndmask_b32_e64 v18, v50, v18, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v19, v51, v19, s[4:5]
-; GFX7-NEXT: s_waitcnt vmcnt(4)
-; GFX7-NEXT: v_cmp_lt_f64_e64 s[4:5], v[20:21], v[31:32]
-; GFX7-NEXT: v_cmp_o_f64_e64 s[6:7], v[20:21], v[31:32]
-; GFX7-NEXT: v_cndmask_b32_e32 v18, v18, v35, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v19, v19, v36, vcc
-; GFX7-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[50:51]
-; GFX7-NEXT: v_cndmask_b32_e64 v48, v32, v21, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v35, v31, v20, s[4:5]
-; GFX7-NEXT: v_cmp_class_f64_e64 s[4:5], v[20:21], 32
-; GFX7-NEXT: v_cndmask_b32_e64 v36, v39, v48, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v35, 0, v35, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e32 v18, v50, v18, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v19, v51, v19, vcc
-; GFX7-NEXT: v_cmp_class_f64_e64 vcc, v[31:32], 32
-; GFX7-NEXT: buffer_load_dword v49, off, s[0:3], s32 offset:112
-; GFX7-NEXT: buffer_load_dword v48, off, s[0:3], s32 offset:108
-; GFX7-NEXT: buffer_load_dword v51, off, s[0:3], s32 offset:120
-; GFX7-NEXT: buffer_load_dword v50, off, s[0:3], s32 offset:116
-; GFX7-NEXT: v_cndmask_b32_e64 v20, v35, v20, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v21, v36, v21, s[4:5]
-; GFX7-NEXT: v_cmp_eq_f64_e64 s[4:5], 0, v[35:36]
-; GFX7-NEXT: s_waitcnt vmcnt(6)
-; GFX7-NEXT: v_cmp_lt_f64_e64 s[6:7], v[22:23], v[33:34]
-; GFX7-NEXT: v_cmp_o_f64_e64 s[8:9], v[22:23], v[33:34]
-; GFX7-NEXT: v_cndmask_b32_e32 v20, v20, v31, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v21, v21, v32, vcc
-; GFX7-NEXT: v_cmp_class_f64_e64 vcc, v[22:23], 32
-; GFX7-NEXT: v_cndmask_b32_e64 v20, v35, v20, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v21, v36, v21, s[4:5]
-; GFX7-NEXT: v_cmp_class_f64_e64 s[4:5], v[33:34], 32
-; GFX7-NEXT: v_cndmask_b32_e64 v31, v34, v23, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v36, v39, v31, s[8:9]
-; GFX7-NEXT: v_cndmask_b32_e64 v31, v33, v22, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v35, 0, v31, s[8:9]
-; GFX7-NEXT: v_cndmask_b32_e32 v22, v35, v22, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v23, v36, v23, vcc
-; GFX7-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[35:36]
-; GFX7-NEXT: v_cndmask_b32_e64 v22, v22, v33, s[4:5]
+; GFX7-NEXT: v_cndmask_b32_e64 v18, v18, 0, s[20:21]
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: v_cmp_u_f64_e64 s[22:23], v[20:21], v[31:32]
+; GFX7-NEXT: v_min_f64 v[20:21], v[20:21], v[31:32]
+; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:96
+; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:92
+; GFX7-NEXT: v_cndmask_b32_e64 v20, v20, 0, s[22:23]
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: v_cmp_u_f64_e64 s[24:25], v[22:23], v[31:32]
+; GFX7-NEXT: v_min_f64 v[22:23], v[22:23], v[31:32]
+; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:100
+; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:104
+; GFX7-NEXT: v_cndmask_b32_e64 v22, v22, 0, s[24:25]
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: v_cmp_u_f64_e64 s[26:27], v[24:25], v[31:32]
+; GFX7-NEXT: v_min_f64 v[24:25], v[24:25], v[31:32]
+; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:112
+; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:108
+; GFX7-NEXT: v_cndmask_b32_e64 v24, v24, 0, s[26:27]
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: v_cmp_u_f64_e64 s[28:29], v[26:27], v[31:32]
+; GFX7-NEXT: v_min_f64 v[26:27], v[26:27], v[31:32]
+; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:120
+; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:116
+; GFX7-NEXT: v_cndmask_b32_e64 v26, v26, 0, s[28:29]
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: v_cmp_u_f64_e64 s[30:31], v[28:29], v[31:32]
+; GFX7-NEXT: v_min_f64 v[28:29], v[28:29], v[31:32]
; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32
; GFX7-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:128
; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:124
-; GFX7-NEXT: v_cndmask_b32_e64 v23, v23, v34, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e32 v22, v35, v22, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v23, v36, v23, vcc
-; GFX7-NEXT: v_cmp_class_f64_e64 vcc, v[24:25], 32
-; GFX7-NEXT: s_waitcnt vmcnt(7)
-; GFX7-NEXT: v_cmp_lt_f64_e64 s[4:5], v[24:25], v[37:38]
-; GFX7-NEXT: v_cmp_o_f64_e64 s[6:7], v[24:25], v[37:38]
-; GFX7-NEXT: v_cndmask_b32_e64 v34, v38, v25, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v35, v39, v34, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v34, v37, v24, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v34, 0, v34, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e32 v24, v34, v24, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v25, v35, v25, vcc
-; GFX7-NEXT: v_cmp_class_f64_e64 vcc, v[37:38], 32
-; GFX7-NEXT: v_cmp_eq_f64_e64 s[4:5], 0, v[34:35]
-; GFX7-NEXT: v_cndmask_b32_e32 v24, v24, v37, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v25, v25, v38, vcc
-; GFX7-NEXT: v_cmp_class_f64_e64 vcc, v[26:27], 32
-; GFX7-NEXT: v_cndmask_b32_e64 v24, v34, v24, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v25, v35, v25, s[4:5]
-; GFX7-NEXT: s_waitcnt vmcnt(5)
-; GFX7-NEXT: v_cmp_lt_f64_e64 s[6:7], v[26:27], v[48:49]
-; GFX7-NEXT: v_cmp_o_f64_e64 s[8:9], v[26:27], v[48:49]
-; GFX7-NEXT: v_cmp_class_f64_e64 s[4:5], v[48:49], 32
-; GFX7-NEXT: s_waitcnt vmcnt(3)
-; GFX7-NEXT: v_cmp_o_f64_e64 s[10:11], v[28:29], v[50:51]
-; GFX7-NEXT: v_cndmask_b32_e64 v36, v49, v27, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v34, v48, v26, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v35, v39, v36, s[8:9]
-; GFX7-NEXT: v_cndmask_b32_e64 v34, 0, v34, s[8:9]
-; GFX7-NEXT: v_cmp_lt_f64_e64 s[8:9], v[28:29], v[50:51]
-; GFX7-NEXT: v_cndmask_b32_e32 v26, v34, v26, vcc
-; GFX7-NEXT: v_cmp_eq_f64_e64 s[6:7], 0, v[34:35]
-; GFX7-NEXT: v_cndmask_b32_e32 v27, v35, v27, vcc
-; GFX7-NEXT: v_cndmask_b32_e64 v26, v26, v48, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v27, v27, v49, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v36, v51, v29, s[8:9]
-; GFX7-NEXT: v_cndmask_b32_e64 v27, v35, v27, s[6:7]
+; GFX7-NEXT: v_cndmask_b32_e64 v28, v28, 0, s[30:31]
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: v_cmp_u_f64_e64 s[34:35], v[30:31], v[32:33]
+; GFX7-NEXT: v_min_f64 v[30:31], v[30:31], v[32:33]
+; GFX7-NEXT: v_mov_b32_e32 v32, 0x7ff80000
+; GFX7-NEXT: v_cndmask_b32_e32 v1, v1, v32, vcc
+; GFX7-NEXT: v_cndmask_b32_e64 v3, v3, v32, s[4:5]
+; GFX7-NEXT: v_cndmask_b32_e64 v5, v5, v32, s[6:7]
+; GFX7-NEXT: v_cndmask_b32_e64 v7, v7, v32, s[8:9]
+; GFX7-NEXT: v_cndmask_b32_e64 v9, v9, v32, s[10:11]
+; GFX7-NEXT: v_cndmask_b32_e64 v11, v11, v32, s[12:13]
+; GFX7-NEXT: v_cndmask_b32_e64 v13, v13, v32, s[14:15]
+; GFX7-NEXT: v_cndmask_b32_e64 v15, v15, v32, s[16:17]
+; GFX7-NEXT: v_cndmask_b32_e64 v17, v17, v32, s[18:19]
+; GFX7-NEXT: v_cndmask_b32_e64 v19, v19, v32, s[20:21]
+; GFX7-NEXT: v_cndmask_b32_e64 v21, v21, v32, s[22:23]
+; GFX7-NEXT: v_cndmask_b32_e64 v23, v23, v32, s[24:25]
+; GFX7-NEXT: v_cndmask_b32_e64 v25, v25, v32, s[26:27]
+; GFX7-NEXT: v_cndmask_b32_e64 v27, v27, v32, s[28:29]
+; GFX7-NEXT: v_cndmask_b32_e64 v29, v29, v32, s[30:31]
+; GFX7-NEXT: v_cndmask_b32_e64 v31, v31, v32, s[34:35]
+; GFX7-NEXT: v_cndmask_b32_e64 v30, v30, 0, s[34:35]
+; GFX7-NEXT: v_readlane_b32 s35, v34, 3
+; GFX7-NEXT: v_readlane_b32 s34, v34, 2
+; GFX7-NEXT: v_readlane_b32 s31, v34, 1
+; GFX7-NEXT: v_readlane_b32 s30, v34, 0
+; GFX7-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; GFX7-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:132 ; 4-byte Folded Reload
+; GFX7-NEXT: s_mov_b64 exec, s[4:5]
; GFX7-NEXT: s_waitcnt vmcnt(0)
-; GFX7-NEXT: v_cmp_lt_f64_e32 vcc, v[30:31], v[32:33]
-; GFX7-NEXT: v_cmp_o_f64_e64 s[4:5], v[30:31], v[32:33]
-; GFX7-NEXT: v_cndmask_b32_e64 v35, v39, v36, s[10:11]
-; GFX7-NEXT: v_cndmask_b32_e64 v26, v34, v26, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v34, v50, v28, s[8:9]
-; GFX7-NEXT: v_cmp_class_f64_e64 s[6:7], v[28:29], 32
-; GFX7-NEXT: v_cndmask_b32_e64 v34, 0, v34, s[10:11]
-; GFX7-NEXT: v_cmp_class_f64_e64 s[8:9], v[50:51], 32
-; GFX7-NEXT: v_cndmask_b32_e32 v36, v33, v31, vcc
-; GFX7-NEXT: v_cndmask_b32_e64 v37, v39, v36, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e32 v36, v32, v30, vcc
-; GFX7-NEXT: v_cmp_class_f64_e64 vcc, v[30:31], 32
-; GFX7-NEXT: v_cndmask_b32_e64 v36, 0, v36, s[4:5]
-; GFX7-NEXT: v_cmp_class_f64_e64 s[4:5], v[32:33], 32
-; GFX7-NEXT: v_cmp_eq_f64_e64 s[10:11], 0, v[34:35]
-; GFX7-NEXT: v_cmp_eq_f64_e64 s[12:13], 0, v[36:37]
-; GFX7-NEXT: v_cndmask_b32_e64 v28, v34, v28, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v29, v35, v29, s[6:7]
-; GFX7-NEXT: v_cndmask_b32_e64 v28, v28, v50, s[8:9]
-; GFX7-NEXT: v_cndmask_b32_e32 v30, v36, v30, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v31, v37, v31, vcc
-; GFX7-NEXT: v_cndmask_b32_e64 v30, v30, v32, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v29, v29, v51, s[8:9]
-; GFX7-NEXT: v_cndmask_b32_e64 v31, v31, v33, s[4:5]
-; GFX7-NEXT: v_cndmask_b32_e64 v28, v34, v28, s[10:11]
-; GFX7-NEXT: v_cndmask_b32_e64 v29, v35, v29, s[10:11]
-; GFX7-NEXT: v_cndmask_b32_e64 v30, v36, v30, s[12:13]
-; GFX7-NEXT: v_cndmask_b32_e64 v31, v37, v31, s[12:13]
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_minimum_v16f64:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; GFX8-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:132 ; 4-byte Folded Spill
+; GFX8-NEXT: s_mov_b64 exec, s[4:5]
; GFX8-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:8
; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:4
-; GFX8-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:16
-; GFX8-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:12
-; GFX8-NEXT: buffer_load_dword v36, off, s[0:3], s32 offset:24
-; GFX8-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:20
-; GFX8-NEXT: buffer_load_dword v38, off, s[0:3], s32 offset:32
-; GFX8-NEXT: buffer_load_dword v37, off, s[0:3], s32 offset:28
-; GFX8-NEXT: v_mov_b32_e32 v39, 0x7ff80000
-; GFX8-NEXT: s_waitcnt vmcnt(6)
-; GFX8-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[31:32]
-; GFX8-NEXT: v_cmp_o_f64_e64 s[4:5], v[0:1], v[31:32]
-; GFX8-NEXT: s_waitcnt vmcnt(4)
-; GFX8-NEXT: v_cmp_o_f64_e64 s[6:7], v[2:3], v[33:34]
-; GFX8-NEXT: v_cndmask_b32_e32 v48, v32, v1, vcc
-; GFX8-NEXT: v_cndmask_b32_e64 v49, v39, v48, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e32 v48, v31, v0, vcc
-; GFX8-NEXT: v_cmp_class_f64_e64 vcc, v[0:1], 32
-; GFX8-NEXT: v_cndmask_b32_e64 v48, 0, v48, s[4:5]
-; GFX8-NEXT: v_cmp_class_f64_e64 s[4:5], v[31:32], 32
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v48, v0, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v49, v1, vcc
-; GFX8-NEXT: v_cmp_lt_f64_e32 vcc, v[2:3], v[33:34]
-; GFX8-NEXT: v_cndmask_b32_e64 v0, v0, v31, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v1, v1, v32, s[4:5]
-; GFX8-NEXT: v_cmp_class_f64_e64 s[4:5], v[33:34], 32
-; GFX8-NEXT: v_cndmask_b32_e32 v50, v34, v3, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v31, v33, v2, vcc
-; GFX8-NEXT: v_cmp_class_f64_e64 vcc, v[2:3], 32
-; GFX8-NEXT: v_cndmask_b32_e64 v32, v39, v50, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v31, 0, v31, s[6:7]
-; GFX8-NEXT: s_waitcnt vmcnt(2)
-; GFX8-NEXT: v_cmp_o_f64_e64 s[6:7], v[4:5], v[35:36]
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v31, v2, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v3, v32, v3, vcc
-; GFX8-NEXT: v_cmp_lt_f64_e32 vcc, v[4:5], v[35:36]
-; GFX8-NEXT: v_cndmask_b32_e64 v2, v2, v33, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v3, v3, v34, s[4:5]
-; GFX8-NEXT: v_cmp_class_f64_e64 s[4:5], v[35:36], 32
-; GFX8-NEXT: v_cndmask_b32_e32 v50, v36, v5, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v33, v35, v4, vcc
-; GFX8-NEXT: v_cmp_class_f64_e64 vcc, v[4:5], 32
-; GFX8-NEXT: v_cndmask_b32_e64 v34, v39, v50, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v33, 0, v33, s[6:7]
+; GFX8-NEXT: v_writelane_b32 v34, s30, 0
+; GFX8-NEXT: v_writelane_b32 v34, s31, 1
+; GFX8-NEXT: v_writelane_b32 v34, s34, 2
+; GFX8-NEXT: v_writelane_b32 v34, s35, 3
+; GFX8-NEXT: s_waitcnt vmcnt(0)
+; GFX8-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[31:32]
+; GFX8-NEXT: v_min_f64 v[0:1], v[0:1], v[31:32]
+; GFX8-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:16
+; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:12
+; GFX8-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc
+; GFX8-NEXT: s_waitcnt vmcnt(0)
+; GFX8-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[31:32]
+; GFX8-NEXT: v_min_f64 v[2:3], v[2:3], v[31:32]
+; GFX8-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:24
+; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:20
+; GFX8-NEXT: v_cndmask_b32_e64 v2, v2, 0, s[4:5]
+; GFX8-NEXT: s_waitcnt vmcnt(0)
+; GFX8-NEXT: v_cmp_u_f64_e64 s[6:7], v[4:5], v[31:32]
+; GFX8-NEXT: v_min_f64 v[4:5], v[4:5], v[31:32]
+; GFX8-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:32
+; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:28
+; GFX8-NEXT: v_cndmask_b32_e64 v4, v4, 0, s[6:7]
; GFX8-NEXT: s_waitcnt vmcnt(0)
-; GFX8-NEXT: v_cmp_o_f64_e64 s[6:7], v[6:7], v[37:38]
-; GFX8-NEXT: v_cndmask_b32_e32 v4, v33, v4, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v5, v34, v5, vcc
-; GFX8-NEXT: v_cmp_lt_f64_e32 vcc, v[6:7], v[37:38]
-; GFX8-NEXT: v_cndmask_b32_e64 v4, v4, v35, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v5, v5, v36, s[4:5]
-; GFX8-NEXT: v_cmp_class_f64_e64 s[4:5], v[37:38], 32
-; GFX8-NEXT: v_cndmask_b32_e32 v50, v38, v7, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v35, v37, v6, vcc
-; GFX8-NEXT: v_cmp_class_f64_e64 vcc, v[6:7], 32
-; GFX8-NEXT: v_cndmask_b32_e64 v36, v39, v50, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v35, 0, v35, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e32 v6, v35, v6, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v7, v36, v7, vcc
-; GFX8-NEXT: v_cndmask_b32_e64 v6, v6, v37, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v7, v7, v38, s[4:5]
-; GFX8-NEXT: buffer_load_dword v37, off, s[0:3], s32 offset:36
-; GFX8-NEXT: buffer_load_dword v38, off, s[0:3], s32 offset:40
-; GFX8-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[48:49]
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v48, v0, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v49, v1, vcc
-; GFX8-NEXT: buffer_load_dword v49, off, s[0:3], s32 offset:48
-; GFX8-NEXT: buffer_load_dword v48, off, s[0:3], s32 offset:44
-; GFX8-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[31:32]
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v31, v2, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v3, v32, v3, vcc
+; GFX8-NEXT: v_cmp_u_f64_e64 s[8:9], v[6:7], v[31:32]
+; GFX8-NEXT: v_min_f64 v[6:7], v[6:7], v[31:32]
+; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:36
+; GFX8-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:40
+; GFX8-NEXT: v_cndmask_b32_e64 v6, v6, 0, s[8:9]
+; GFX8-NEXT: s_waitcnt vmcnt(0)
+; GFX8-NEXT: v_cmp_u_f64_e64 s[10:11], v[8:9], v[31:32]
+; GFX8-NEXT: v_min_f64 v[8:9], v[8:9], v[31:32]
+; GFX8-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:48
+; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:44
+; GFX8-NEXT: v_cndmask_b32_e64 v8, v8, 0, s[10:11]
+; GFX8-NEXT: s_waitcnt vmcnt(0)
+; GFX8-NEXT: v_cmp_u_f64_e64 s[12:13], v[10:11], v[31:32]
+; GFX8-NEXT: v_min_f64 v[10:11], v[10:11], v[31:32]
; GFX8-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:56
; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:52
-; GFX8-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[33:34]
-; GFX8-NEXT: v_cndmask_b32_e32 v4, v33, v4, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v5, v34, v5, vcc
-; GFX8-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:64
-; GFX8-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:60
-; GFX8-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[35:36]
-; GFX8-NEXT: v_cndmask_b32_e32 v6, v35, v6, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v7, v36, v7, vcc
-; GFX8-NEXT: v_cmp_class_f64_e64 vcc, v[8:9], 32
-; GFX8-NEXT: s_waitcnt vmcnt(6)
-; GFX8-NEXT: v_cmp_lt_f64_e64 s[4:5], v[8:9], v[37:38]
-; GFX8-NEXT: v_cmp_o_f64_e64 s[6:7], v[8:9], v[37:38]
-; GFX8-NEXT: v_cndmask_b32_e64 v50, v38, v9, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v35, v37, v8, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v36, v39, v50, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v35, 0, v35, s[6:7]
-; GFX8-NEXT: v_cmp_class_f64_e64 s[4:5], v[37:38], 32
-; GFX8-NEXT: v_cndmask_b32_e32 v8, v35, v8, vcc
-; GFX8-NEXT: v_cmp_eq_f64_e64 s[6:7], 0, v[35:36]
-; GFX8-NEXT: v_cndmask_b32_e32 v9, v36, v9, vcc
-; GFX8-NEXT: s_waitcnt vmcnt(4)
-; GFX8-NEXT: v_cmp_lt_f64_e32 vcc, v[10:11], v[48:49]
-; GFX8-NEXT: v_cmp_o_f64_e64 s[8:9], v[10:11], v[48:49]
-; GFX8-NEXT: v_cndmask_b32_e64 v8, v8, v37, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v9, v9, v38, s[4:5]
-; GFX8-NEXT: buffer_load_dword v37, off, s[0:3], s32 offset:68
-; GFX8-NEXT: buffer_load_dword v38, off, s[0:3], s32 offset:72
-; GFX8-NEXT: v_cndmask_b32_e64 v8, v35, v8, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e32 v50, v49, v11, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v35, v48, v10, vcc
-; GFX8-NEXT: v_cmp_class_f64_e64 vcc, v[10:11], 32
-; GFX8-NEXT: v_cmp_class_f64_e64 s[4:5], v[48:49], 32
-; GFX8-NEXT: v_cndmask_b32_e64 v51, v39, v50, s[8:9]
-; GFX8-NEXT: v_cndmask_b32_e64 v50, 0, v35, s[8:9]
-; GFX8-NEXT: v_cndmask_b32_e64 v9, v36, v9, s[6:7]
-; GFX8-NEXT: buffer_load_dword v36, off, s[0:3], s32 offset:80
-; GFX8-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:76
-; GFX8-NEXT: s_waitcnt vmcnt(6)
-; GFX8-NEXT: v_cmp_o_f64_e64 s[6:7], v[12:13], v[31:32]
-; GFX8-NEXT: v_cndmask_b32_e32 v10, v50, v10, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v11, v51, v11, vcc
-; GFX8-NEXT: v_cndmask_b32_e64 v10, v10, v48, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v11, v11, v49, s[4:5]
-; GFX8-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[50:51]
-; GFX8-NEXT: v_cmp_lt_f64_e64 s[4:5], v[12:13], v[31:32]
-; GFX8-NEXT: v_cndmask_b32_e32 v10, v50, v10, vcc
-; GFX8-NEXT: v_cndmask_b32_e64 v48, v32, v13, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e32 v11, v51, v11, vcc
-; GFX8-NEXT: v_cmp_class_f64_e64 vcc, v[12:13], 32
-; GFX8-NEXT: v_cndmask_b32_e64 v49, v39, v48, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v48, v31, v12, s[4:5]
-; GFX8-NEXT: v_cmp_class_f64_e64 s[4:5], v[31:32], 32
-; GFX8-NEXT: v_cndmask_b32_e64 v48, 0, v48, s[6:7]
-; GFX8-NEXT: s_waitcnt vmcnt(4)
-; GFX8-NEXT: v_cmp_o_f64_e64 s[6:7], v[14:15], v[33:34]
-; GFX8-NEXT: v_cndmask_b32_e32 v12, v48, v12, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v13, v49, v13, vcc
-; GFX8-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[48:49]
-; GFX8-NEXT: v_cndmask_b32_e64 v12, v12, v31, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v13, v13, v32, s[4:5]
+; GFX8-NEXT: v_cndmask_b32_e64 v10, v10, 0, s[12:13]
+; GFX8-NEXT: s_waitcnt vmcnt(0)
+; GFX8-NEXT: v_cmp_u_f64_e64 s[14:15], v[12:13], v[31:32]
+; GFX8-NEXT: v_min_f64 v[12:13], v[12:13], v[31:32]
+; GFX8-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:64
+; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:60
+; GFX8-NEXT: v_cndmask_b32_e64 v12, v12, 0, s[14:15]
+; GFX8-NEXT: s_waitcnt vmcnt(0)
+; GFX8-NEXT: v_cmp_u_f64_e64 s[16:17], v[14:15], v[31:32]
+; GFX8-NEXT: v_min_f64 v[14:15], v[14:15], v[31:32]
+; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:68
+; GFX8-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:72
+; GFX8-NEXT: v_cndmask_b32_e64 v14, v14, 0, s[16:17]
+; GFX8-NEXT: s_waitcnt vmcnt(0)
+; GFX8-NEXT: v_cmp_u_f64_e64 s[18:19], v[16:17], v[31:32]
+; GFX8-NEXT: v_min_f64 v[16:17], v[16:17], v[31:32]
+; GFX8-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:80
+; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:76
+; GFX8-NEXT: v_cndmask_b32_e64 v16, v16, 0, s[18:19]
+; GFX8-NEXT: s_waitcnt vmcnt(0)
+; GFX8-NEXT: v_cmp_u_f64_e64 s[20:21], v[18:19], v[31:32]
+; GFX8-NEXT: v_min_f64 v[18:19], v[18:19], v[31:32]
; GFX8-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:88
; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:84
-; GFX8-NEXT: v_cmp_lt_f64_e64 s[4:5], v[14:15], v[33:34]
-; GFX8-NEXT: v_cndmask_b32_e32 v12, v48, v12, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v13, v49, v13, vcc
-; GFX8-NEXT: v_cmp_class_f64_e64 vcc, v[14:15], 32
-; GFX8-NEXT: v_cndmask_b32_e64 v50, v34, v15, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v48, v33, v14, s[4:5]
-; GFX8-NEXT: v_cmp_class_f64_e64 s[4:5], v[33:34], 32
-; GFX8-NEXT: v_cndmask_b32_e64 v49, v39, v50, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v48, 0, v48, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e32 v14, v48, v14, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v15, v49, v15, vcc
-; GFX8-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[48:49]
-; GFX8-NEXT: v_cndmask_b32_e64 v14, v14, v33, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v15, v15, v34, s[4:5]
-; GFX8-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:96
-; GFX8-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:92
-; GFX8-NEXT: v_cndmask_b32_e32 v14, v48, v14, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v15, v49, v15, vcc
-; GFX8-NEXT: v_cmp_class_f64_e64 vcc, v[16:17], 32
-; GFX8-NEXT: s_waitcnt vmcnt(6)
-; GFX8-NEXT: v_cmp_lt_f64_e64 s[4:5], v[16:17], v[37:38]
-; GFX8-NEXT: v_cmp_o_f64_e64 s[6:7], v[16:17], v[37:38]
-; GFX8-NEXT: v_cndmask_b32_e64 v50, v38, v17, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v48, v37, v16, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v49, v39, v50, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v48, 0, v48, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e32 v16, v48, v16, vcc
-; GFX8-NEXT: v_cmp_class_f64_e64 s[4:5], v[37:38], 32
-; GFX8-NEXT: v_cndmask_b32_e32 v17, v49, v17, vcc
-; GFX8-NEXT: s_waitcnt vmcnt(4)
-; GFX8-NEXT: v_cmp_lt_f64_e32 vcc, v[18:19], v[35:36]
-; GFX8-NEXT: v_cmp_o_f64_e64 s[6:7], v[18:19], v[35:36]
-; GFX8-NEXT: v_cndmask_b32_e64 v16, v16, v37, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v17, v17, v38, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e32 v50, v36, v19, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v37, v35, v18, vcc
-; GFX8-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[48:49]
-; GFX8-NEXT: v_cmp_class_f64_e64 s[4:5], v[18:19], 32
-; GFX8-NEXT: v_cndmask_b32_e64 v51, v39, v50, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v50, 0, v37, s[6:7]
-; GFX8-NEXT: buffer_load_dword v37, off, s[0:3], s32 offset:100
-; GFX8-NEXT: buffer_load_dword v38, off, s[0:3], s32 offset:104
-; GFX8-NEXT: v_cndmask_b32_e32 v16, v48, v16, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v17, v49, v17, vcc
-; GFX8-NEXT: v_cmp_class_f64_e64 vcc, v[35:36], 32
-; GFX8-NEXT: v_cndmask_b32_e64 v18, v50, v18, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v19, v51, v19, s[4:5]
-; GFX8-NEXT: s_waitcnt vmcnt(4)
-; GFX8-NEXT: v_cmp_lt_f64_e64 s[4:5], v[20:21], v[31:32]
-; GFX8-NEXT: v_cmp_o_f64_e64 s[6:7], v[20:21], v[31:32]
-; GFX8-NEXT: v_cndmask_b32_e32 v18, v18, v35, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v19, v19, v36, vcc
-; GFX8-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[50:51]
-; GFX8-NEXT: v_cndmask_b32_e64 v48, v32, v21, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v35, v31, v20, s[4:5]
-; GFX8-NEXT: v_cmp_class_f64_e64 s[4:5], v[20:21], 32
-; GFX8-NEXT: v_cndmask_b32_e64 v36, v39, v48, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v35, 0, v35, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e32 v18, v50, v18, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v19, v51, v19, vcc
-; GFX8-NEXT: v_cmp_class_f64_e64 vcc, v[31:32], 32
-; GFX8-NEXT: buffer_load_dword v49, off, s[0:3], s32 offset:112
-; GFX8-NEXT: buffer_load_dword v48, off, s[0:3], s32 offset:108
-; GFX8-NEXT: buffer_load_dword v51, off, s[0:3], s32 offset:120
-; GFX8-NEXT: buffer_load_dword v50, off, s[0:3], s32 offset:116
-; GFX8-NEXT: v_cndmask_b32_e64 v20, v35, v20, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v21, v36, v21, s[4:5]
-; GFX8-NEXT: v_cmp_eq_f64_e64 s[4:5], 0, v[35:36]
-; GFX8-NEXT: s_waitcnt vmcnt(6)
-; GFX8-NEXT: v_cmp_lt_f64_e64 s[6:7], v[22:23], v[33:34]
-; GFX8-NEXT: v_cmp_o_f64_e64 s[8:9], v[22:23], v[33:34]
-; GFX8-NEXT: v_cndmask_b32_e32 v20, v20, v31, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v21, v21, v32, vcc
-; GFX8-NEXT: v_cmp_class_f64_e64 vcc, v[22:23], 32
-; GFX8-NEXT: v_cndmask_b32_e64 v20, v35, v20, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v21, v36, v21, s[4:5]
-; GFX8-NEXT: v_cmp_class_f64_e64 s[4:5], v[33:34], 32
-; GFX8-NEXT: v_cndmask_b32_e64 v31, v34, v23, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v36, v39, v31, s[8:9]
-; GFX8-NEXT: v_cndmask_b32_e64 v31, v33, v22, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v35, 0, v31, s[8:9]
-; GFX8-NEXT: v_cndmask_b32_e32 v22, v35, v22, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v23, v36, v23, vcc
-; GFX8-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[35:36]
-; GFX8-NEXT: v_cndmask_b32_e64 v22, v22, v33, s[4:5]
+; GFX8-NEXT: v_cndmask_b32_e64 v18, v18, 0, s[20:21]
+; GFX8-NEXT: s_waitcnt vmcnt(0)
+; GFX8-NEXT: v_cmp_u_f64_e64 s[22:23], v[20:21], v[31:32]
+; GFX8-NEXT: v_min_f64 v[20:21], v[20:21], v[31:32]
+; GFX8-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:96
+; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:92
+; GFX8-NEXT: v_cndmask_b32_e64 v20, v20, 0, s[22:23]
+; GFX8-NEXT: s_waitcnt vmcnt(0)
+; GFX8-NEXT: v_cmp_u_f64_e64 s[24:25], v[22:23], v[31:32]
+; GFX8-NEXT: v_min_f64 v[22:23], v[22:23], v[31:32]
+; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:100
+; GFX8-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:104
+; GFX8-NEXT: v_cndmask_b32_e64 v22, v22, 0, s[24:25]
+; GFX8-NEXT: s_waitcnt vmcnt(0)
+; GFX8-NEXT: v_cmp_u_f64_e64 s[26:27], v[24:25], v[31:32]
+; GFX8-NEXT: v_min_f64 v[24:25], v[24:25], v[31:32]
+; GFX8-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:112
+; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:108
+; GFX8-NEXT: v_cndmask_b32_e64 v24, v24, 0, s[26:27]
+; GFX8-NEXT: s_waitcnt vmcnt(0)
+; GFX8-NEXT: v_cmp_u_f64_e64 s[28:29], v[26:27], v[31:32]
+; GFX8-NEXT: v_min_f64 v[26:27], v[26:27], v[31:32]
+; GFX8-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:120
+; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:116
+; GFX8-NEXT: v_cndmask_b32_e64 v26, v26, 0, s[28:29]
+; GFX8-NEXT: s_waitcnt vmcnt(0)
+; GFX8-NEXT: v_cmp_u_f64_e64 s[30:31], v[28:29], v[31:32]
+; GFX8-NEXT: v_min_f64 v[28:29], v[28:29], v[31:32]
; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32
; GFX8-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:128
; GFX8-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:124
-; GFX8-NEXT: v_cndmask_b32_e64 v23, v23, v34, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e32 v22, v35, v22, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v23, v36, v23, vcc
-; GFX8-NEXT: v_cmp_class_f64_e64 vcc, v[24:25], 32
-; GFX8-NEXT: s_waitcnt vmcnt(7)
-; GFX8-NEXT: v_cmp_lt_f64_e64 s[4:5], v[24:25], v[37:38]
-; GFX8-NEXT: v_cmp_o_f64_e64 s[6:7], v[24:25], v[37:38]
-; GFX8-NEXT: v_cndmask_b32_e64 v34, v38, v25, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v35, v39, v34, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v34, v37, v24, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v34, 0, v34, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e32 v24, v34, v24, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v25, v35, v25, vcc
-; GFX8-NEXT: v_cmp_class_f64_e64 vcc, v[37:38], 32
-; GFX8-NEXT: v_cmp_eq_f64_e64 s[4:5], 0, v[34:35]
-; GFX8-NEXT: v_cndmask_b32_e32 v24, v24, v37, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v25, v25, v38, vcc
-; GFX8-NEXT: v_cmp_class_f64_e64 vcc, v[26:27], 32
-; GFX8-NEXT: v_cndmask_b32_e64 v24, v34, v24, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v25, v35, v25, s[4:5]
-; GFX8-NEXT: s_waitcnt vmcnt(5)
-; GFX8-NEXT: v_cmp_lt_f64_e64 s[6:7], v[26:27], v[48:49]
-; GFX8-NEXT: v_cmp_o_f64_e64 s[8:9], v[26:27], v[48:49]
-; GFX8-NEXT: v_cmp_class_f64_e64 s[4:5], v[48:49], 32
-; GFX8-NEXT: s_waitcnt vmcnt(3)
-; GFX8-NEXT: v_cmp_o_f64_e64 s[10:11], v[28:29], v[50:51]
-; GFX8-NEXT: v_cndmask_b32_e64 v36, v49, v27, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v34, v48, v26, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v35, v39, v36, s[8:9]
-; GFX8-NEXT: v_cndmask_b32_e64 v34, 0, v34, s[8:9]
-; GFX8-NEXT: v_cmp_lt_f64_e64 s[8:9], v[28:29], v[50:51]
-; GFX8-NEXT: v_cndmask_b32_e32 v26, v34, v26, vcc
-; GFX8-NEXT: v_cmp_eq_f64_e64 s[6:7], 0, v[34:35]
-; GFX8-NEXT: v_cndmask_b32_e32 v27, v35, v27, vcc
-; GFX8-NEXT: v_cndmask_b32_e64 v26, v26, v48, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v27, v27, v49, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v36, v51, v29, s[8:9]
-; GFX8-NEXT: v_cndmask_b32_e64 v27, v35, v27, s[6:7]
+; GFX8-NEXT: v_cndmask_b32_e64 v28, v28, 0, s[30:31]
+; GFX8-NEXT: s_waitcnt vmcnt(0)
+; GFX8-NEXT: v_cmp_u_f64_e64 s[34:35], v[30:31], v[32:33]
+; GFX8-NEXT: v_min_f64 v[30:31], v[30:31], v[32:33]
+; GFX8-NEXT: v_mov_b32_e32 v32, 0x7ff80000
+; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v32, vcc
+; GFX8-NEXT: v_cndmask_b32_e64 v3, v3, v32, s[4:5]
+; GFX8-NEXT: v_cndmask_b32_e64 v5, v5, v32, s[6:7]
+; GFX8-NEXT: v_cndmask_b32_e64 v7, v7, v32, s[8:9]
+; GFX8-NEXT: v_cndmask_b32_e64 v9, v9, v32, s[10:11]
+; GFX8-NEXT: v_cndmask_b32_e64 v11, v11, v32, s[12:13]
+; GFX8-NEXT: v_cndmask_b32_e64 v13, v13, v32, s[14:15]
+; GFX8-NEXT: v_cndmask_b32_e64 v15, v15, v32, s[16:17]
+; GFX8-NEXT: v_cndmask_b32_e64 v17, v17, v32, s[18:19]
+; GFX8-NEXT: v_cndmask_b32_e64 v19, v19, v32, s[20:21]
+; GFX8-NEXT: v_cndmask_b32_e64 v21, v21, v32, s[22:23]
+; GFX8-NEXT: v_cndmask_b32_e64 v23, v23, v32, s[24:25]
+; GFX8-NEXT: v_cndmask_b32_e64 v25, v25, v32, s[26:27]
+; GFX8-NEXT: v_cndmask_b32_e64 v27, v27, v32, s[28:29]
+; GFX8-NEXT: v_cndmask_b32_e64 v29, v29, v32, s[30:31]
+; GFX8-NEXT: v_cndmask_b32_e64 v31, v31, v32, s[34:35]
+; GFX8-NEXT: v_cndmask_b32_e64 v30, v30, 0, s[34:35]
+; GFX8-NEXT: v_readlane_b32 s35, v34, 3
+; GFX8-NEXT: v_readlane_b32 s34, v34, 2
+; GFX8-NEXT: v_readlane_b32 s31, v34, 1
+; GFX8-NEXT: v_readlane_b32 s30, v34, 0
+; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; GFX8-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:132 ; 4-byte Folded Reload
+; GFX8-NEXT: s_mov_b64 exec, s[4:5]
; GFX8-NEXT: s_waitcnt vmcnt(0)
-; GFX8-NEXT: v_cmp_lt_f64_e32 vcc, v[30:31], v[32:33]
-; GFX8-NEXT: v_cmp_o_f64_e64 s[4:5], v[30:31], v[32:33]
-; GFX8-NEXT: v_cndmask_b32_e64 v35, v39, v36, s[10:11]
-; GFX8-NEXT: v_cndmask_b32_e64 v26, v34, v26, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v34, v50, v28, s[8:9]
-; GFX8-NEXT: v_cmp_class_f64_e64 s[6:7], v[28:29], 32
-; GFX8-NEXT: v_cndmask_b32_e64 v34, 0, v34, s[10:11]
-; GFX8-NEXT: v_cmp_class_f64_e64 s[8:9], v[50:51], 32
-; GFX8-NEXT: v_cndmask_b32_e32 v36, v33, v31, vcc
-; GFX8-NEXT: v_cndmask_b32_e64 v37, v39, v36, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e32 v36, v32, v30, vcc
-; GFX8-NEXT: v_cmp_class_f64_e64 vcc, v[30:31], 32
-; GFX8-NEXT: v_cndmask_b32_e64 v36, 0, v36, s[4:5]
-; GFX8-NEXT: v_cmp_class_f64_e64 s[4:5], v[32:33], 32
-; GFX8-NEXT: v_cmp_eq_f64_e64 s[10:11], 0, v[34:35]
-; GFX8-NEXT: v_cmp_eq_f64_e64 s[12:13], 0, v[36:37]
-; GFX8-NEXT: v_cndmask_b32_e64 v28, v34, v28, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v29, v35, v29, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v28, v28, v50, s[8:9]
-; GFX8-NEXT: v_cndmask_b32_e32 v30, v36, v30, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v31, v37, v31, vcc
-; GFX8-NEXT: v_cndmask_b32_e64 v30, v30, v32, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v29, v29, v51, s[8:9]
-; GFX8-NEXT: v_cndmask_b32_e64 v31, v31, v33, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v28, v34, v28, s[10:11]
-; GFX8-NEXT: v_cndmask_b32_e64 v29, v35, v29, s[10:11]
-; GFX8-NEXT: v_cndmask_b32_e64 v30, v36, v30, s[12:13]
-; GFX8-NEXT: v_cndmask_b32_e64 v31, v37, v31, s[12:13]
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_minimum_v16f64:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; GFX9-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:132 ; 4-byte Folded Spill
+; GFX9-NEXT: s_mov_b64 exec, s[4:5]
; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:8
; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:4
-; GFX9-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:16
-; GFX9-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:12
-; GFX9-NEXT: buffer_load_dword v36, off, s[0:3], s32 offset:24
-; GFX9-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:20
-; GFX9-NEXT: buffer_load_dword v38, off, s[0:3], s32 offset:32
-; GFX9-NEXT: buffer_load_dword v37, off, s[0:3], s32 offset:28
-; GFX9-NEXT: v_mov_b32_e32 v39, 0x7ff80000
-; GFX9-NEXT: s_waitcnt vmcnt(6)
-; GFX9-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[31:32]
-; GFX9-NEXT: v_cmp_o_f64_e64 s[4:5], v[0:1], v[31:32]
-; GFX9-NEXT: s_waitcnt vmcnt(4)
-; GFX9-NEXT: v_cmp_o_f64_e64 s[6:7], v[2:3], v[33:34]
-; GFX9-NEXT: v_cndmask_b32_e32 v48, v32, v1, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v49, v39, v48, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e32 v48, v31, v0, vcc
-; GFX9-NEXT: v_cmp_class_f64_e64 vcc, v[0:1], 32
-; GFX9-NEXT: v_cndmask_b32_e64 v48, 0, v48, s[4:5]
-; GFX9-NEXT: v_cmp_class_f64_e64 s[4:5], v[31:32], 32
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v48, v0, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v49, v1, vcc
-; GFX9-NEXT: v_cmp_lt_f64_e32 vcc, v[2:3], v[33:34]
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v0, v31, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v1, v1, v32, s[4:5]
-; GFX9-NEXT: v_cmp_class_f64_e64 s[4:5], v[33:34], 32
-; GFX9-NEXT: v_cndmask_b32_e32 v50, v34, v3, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v31, v33, v2, vcc
-; GFX9-NEXT: v_cmp_class_f64_e64 vcc, v[2:3], 32
-; GFX9-NEXT: v_cndmask_b32_e64 v32, v39, v50, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v31, 0, v31, s[6:7]
-; GFX9-NEXT: s_waitcnt vmcnt(2)
-; GFX9-NEXT: v_cmp_o_f64_e64 s[6:7], v[4:5], v[35:36]
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v31, v2, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v32, v3, vcc
-; GFX9-NEXT: v_cmp_lt_f64_e32 vcc, v[4:5], v[35:36]
-; GFX9-NEXT: v_cndmask_b32_e64 v2, v2, v33, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v3, v3, v34, s[4:5]
-; GFX9-NEXT: v_cmp_class_f64_e64 s[4:5], v[35:36], 32
-; GFX9-NEXT: v_cndmask_b32_e32 v50, v36, v5, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v33, v35, v4, vcc
-; GFX9-NEXT: v_cmp_class_f64_e64 vcc, v[4:5], 32
-; GFX9-NEXT: v_cndmask_b32_e64 v34, v39, v50, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v33, 0, v33, s[6:7]
+; GFX9-NEXT: v_writelane_b32 v34, s30, 0
+; GFX9-NEXT: v_writelane_b32 v34, s31, 1
+; GFX9-NEXT: v_writelane_b32 v34, s34, 2
+; GFX9-NEXT: v_writelane_b32 v34, s35, 3
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[31:32]
+; GFX9-NEXT: v_min_f64 v[0:1], v[0:1], v[31:32]
+; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:16
+; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:12
+; GFX9-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: v_cmp_o_f64_e64 s[6:7], v[6:7], v[37:38]
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v33, v4, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v5, v34, v5, vcc
-; GFX9-NEXT: v_cmp_lt_f64_e32 vcc, v[6:7], v[37:38]
-; GFX9-NEXT: v_cndmask_b32_e64 v4, v4, v35, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v5, v5, v36, s[4:5]
-; GFX9-NEXT: v_cmp_class_f64_e64 s[4:5], v[37:38], 32
-; GFX9-NEXT: v_cndmask_b32_e32 v50, v38, v7, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v35, v37, v6, vcc
-; GFX9-NEXT: v_cmp_class_f64_e64 vcc, v[6:7], 32
-; GFX9-NEXT: v_cndmask_b32_e64 v36, v39, v50, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v35, 0, v35, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e32 v6, v35, v6, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v7, v36, v7, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v6, v6, v37, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v7, v7, v38, s[4:5]
-; GFX9-NEXT: buffer_load_dword v37, off, s[0:3], s32 offset:36
-; GFX9-NEXT: buffer_load_dword v38, off, s[0:3], s32 offset:40
-; GFX9-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[48:49]
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v48, v0, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v49, v1, vcc
-; GFX9-NEXT: buffer_load_dword v49, off, s[0:3], s32 offset:48
-; GFX9-NEXT: buffer_load_dword v48, off, s[0:3], s32 offset:44
-; GFX9-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[31:32]
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v31, v2, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v32, v3, vcc
+; GFX9-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[31:32]
+; GFX9-NEXT: v_min_f64 v[2:3], v[2:3], v[31:32]
+; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:24
+; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:20
+; GFX9-NEXT: v_cndmask_b32_e64 v2, v2, 0, s[4:5]
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: v_cmp_u_f64_e64 s[6:7], v[4:5], v[31:32]
+; GFX9-NEXT: v_min_f64 v[4:5], v[4:5], v[31:32]
+; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:32
+; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:28
+; GFX9-NEXT: v_cndmask_b32_e64 v4, v4, 0, s[6:7]
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: v_cmp_u_f64_e64 s[8:9], v[6:7], v[31:32]
+; GFX9-NEXT: v_min_f64 v[6:7], v[6:7], v[31:32]
+; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:36
+; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:40
+; GFX9-NEXT: v_cndmask_b32_e64 v6, v6, 0, s[8:9]
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: v_cmp_u_f64_e64 s[10:11], v[8:9], v[31:32]
+; GFX9-NEXT: v_min_f64 v[8:9], v[8:9], v[31:32]
+; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:48
+; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:44
+; GFX9-NEXT: v_cndmask_b32_e64 v8, v8, 0, s[10:11]
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: v_cmp_u_f64_e64 s[12:13], v[10:11], v[31:32]
+; GFX9-NEXT: v_min_f64 v[10:11], v[10:11], v[31:32]
; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:56
; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:52
-; GFX9-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[33:34]
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v33, v4, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v5, v34, v5, vcc
-; GFX9-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:64
-; GFX9-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:60
-; GFX9-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[35:36]
-; GFX9-NEXT: v_cndmask_b32_e32 v6, v35, v6, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v7, v36, v7, vcc
-; GFX9-NEXT: v_cmp_class_f64_e64 vcc, v[8:9], 32
-; GFX9-NEXT: s_waitcnt vmcnt(6)
-; GFX9-NEXT: v_cmp_lt_f64_e64 s[4:5], v[8:9], v[37:38]
-; GFX9-NEXT: v_cmp_o_f64_e64 s[6:7], v[8:9], v[37:38]
-; GFX9-NEXT: v_cndmask_b32_e64 v50, v38, v9, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v35, v37, v8, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v36, v39, v50, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v35, 0, v35, s[6:7]
-; GFX9-NEXT: v_cmp_class_f64_e64 s[4:5], v[37:38], 32
-; GFX9-NEXT: v_cndmask_b32_e32 v8, v35, v8, vcc
-; GFX9-NEXT: v_cmp_eq_f64_e64 s[6:7], 0, v[35:36]
-; GFX9-NEXT: v_cndmask_b32_e32 v9, v36, v9, vcc
-; GFX9-NEXT: s_waitcnt vmcnt(4)
-; GFX9-NEXT: v_cmp_lt_f64_e32 vcc, v[10:11], v[48:49]
-; GFX9-NEXT: v_cmp_o_f64_e64 s[8:9], v[10:11], v[48:49]
-; GFX9-NEXT: v_cndmask_b32_e64 v8, v8, v37, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v9, v9, v38, s[4:5]
-; GFX9-NEXT: buffer_load_dword v37, off, s[0:3], s32 offset:68
-; GFX9-NEXT: buffer_load_dword v38, off, s[0:3], s32 offset:72
-; GFX9-NEXT: v_cndmask_b32_e64 v8, v35, v8, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e32 v50, v49, v11, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v35, v48, v10, vcc
-; GFX9-NEXT: v_cmp_class_f64_e64 vcc, v[10:11], 32
-; GFX9-NEXT: v_cmp_class_f64_e64 s[4:5], v[48:49], 32
-; GFX9-NEXT: v_cndmask_b32_e64 v51, v39, v50, s[8:9]
-; GFX9-NEXT: v_cndmask_b32_e64 v50, 0, v35, s[8:9]
-; GFX9-NEXT: v_cndmask_b32_e64 v9, v36, v9, s[6:7]
-; GFX9-NEXT: buffer_load_dword v36, off, s[0:3], s32 offset:80
-; GFX9-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:76
-; GFX9-NEXT: s_waitcnt vmcnt(6)
-; GFX9-NEXT: v_cmp_o_f64_e64 s[6:7], v[12:13], v[31:32]
-; GFX9-NEXT: v_cndmask_b32_e32 v10, v50, v10, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v11, v51, v11, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v10, v10, v48, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v11, v11, v49, s[4:5]
-; GFX9-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[50:51]
-; GFX9-NEXT: v_cmp_lt_f64_e64 s[4:5], v[12:13], v[31:32]
-; GFX9-NEXT: v_cndmask_b32_e32 v10, v50, v10, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v48, v32, v13, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e32 v11, v51, v11, vcc
-; GFX9-NEXT: v_cmp_class_f64_e64 vcc, v[12:13], 32
-; GFX9-NEXT: v_cndmask_b32_e64 v49, v39, v48, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v48, v31, v12, s[4:5]
-; GFX9-NEXT: v_cmp_class_f64_e64 s[4:5], v[31:32], 32
-; GFX9-NEXT: v_cndmask_b32_e64 v48, 0, v48, s[6:7]
-; GFX9-NEXT: s_waitcnt vmcnt(4)
-; GFX9-NEXT: v_cmp_o_f64_e64 s[6:7], v[14:15], v[33:34]
-; GFX9-NEXT: v_cndmask_b32_e32 v12, v48, v12, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v13, v49, v13, vcc
-; GFX9-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[48:49]
-; GFX9-NEXT: v_cndmask_b32_e64 v12, v12, v31, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v13, v13, v32, s[4:5]
+; GFX9-NEXT: v_cndmask_b32_e64 v10, v10, 0, s[12:13]
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: v_cmp_u_f64_e64 s[14:15], v[12:13], v[31:32]
+; GFX9-NEXT: v_min_f64 v[12:13], v[12:13], v[31:32]
+; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:64
+; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:60
+; GFX9-NEXT: v_cndmask_b32_e64 v12, v12, 0, s[14:15]
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: v_cmp_u_f64_e64 s[16:17], v[14:15], v[31:32]
+; GFX9-NEXT: v_min_f64 v[14:15], v[14:15], v[31:32]
+; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:68
+; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:72
+; GFX9-NEXT: v_cndmask_b32_e64 v14, v14, 0, s[16:17]
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: v_cmp_u_f64_e64 s[18:19], v[16:17], v[31:32]
+; GFX9-NEXT: v_min_f64 v[16:17], v[16:17], v[31:32]
+; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:80
+; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:76
+; GFX9-NEXT: v_cndmask_b32_e64 v16, v16, 0, s[18:19]
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: v_cmp_u_f64_e64 s[20:21], v[18:19], v[31:32]
+; GFX9-NEXT: v_min_f64 v[18:19], v[18:19], v[31:32]
; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:88
; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:84
-; GFX9-NEXT: v_cmp_lt_f64_e64 s[4:5], v[14:15], v[33:34]
-; GFX9-NEXT: v_cndmask_b32_e32 v12, v48, v12, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v13, v49, v13, vcc
-; GFX9-NEXT: v_cmp_class_f64_e64 vcc, v[14:15], 32
-; GFX9-NEXT: v_cndmask_b32_e64 v50, v34, v15, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v48, v33, v14, s[4:5]
-; GFX9-NEXT: v_cmp_class_f64_e64 s[4:5], v[33:34], 32
-; GFX9-NEXT: v_cndmask_b32_e64 v49, v39, v50, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v48, 0, v48, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e32 v14, v48, v14, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v15, v49, v15, vcc
-; GFX9-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[48:49]
-; GFX9-NEXT: v_cndmask_b32_e64 v14, v14, v33, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v15, v15, v34, s[4:5]
-; GFX9-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:96
-; GFX9-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:92
-; GFX9-NEXT: v_cndmask_b32_e32 v14, v48, v14, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v15, v49, v15, vcc
-; GFX9-NEXT: v_cmp_class_f64_e64 vcc, v[16:17], 32
-; GFX9-NEXT: s_waitcnt vmcnt(6)
-; GFX9-NEXT: v_cmp_lt_f64_e64 s[4:5], v[16:17], v[37:38]
-; GFX9-NEXT: v_cmp_o_f64_e64 s[6:7], v[16:17], v[37:38]
-; GFX9-NEXT: v_cndmask_b32_e64 v50, v38, v17, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v48, v37, v16, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v49, v39, v50, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v48, 0, v48, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e32 v16, v48, v16, vcc
-; GFX9-NEXT: v_cmp_class_f64_e64 s[4:5], v[37:38], 32
-; GFX9-NEXT: v_cndmask_b32_e32 v17, v49, v17, vcc
-; GFX9-NEXT: s_waitcnt vmcnt(4)
-; GFX9-NEXT: v_cmp_lt_f64_e32 vcc, v[18:19], v[35:36]
-; GFX9-NEXT: v_cmp_o_f64_e64 s[6:7], v[18:19], v[35:36]
-; GFX9-NEXT: v_cndmask_b32_e64 v16, v16, v37, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v17, v17, v38, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e32 v50, v36, v19, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v37, v35, v18, vcc
-; GFX9-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[48:49]
-; GFX9-NEXT: v_cmp_class_f64_e64 s[4:5], v[18:19], 32
-; GFX9-NEXT: v_cndmask_b32_e64 v51, v39, v50, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v50, 0, v37, s[6:7]
-; GFX9-NEXT: buffer_load_dword v37, off, s[0:3], s32 offset:100
-; GFX9-NEXT: buffer_load_dword v38, off, s[0:3], s32 offset:104
-; GFX9-NEXT: v_cndmask_b32_e32 v16, v48, v16, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v17, v49, v17, vcc
-; GFX9-NEXT: v_cmp_class_f64_e64 vcc, v[35:36], 32
-; GFX9-NEXT: v_cndmask_b32_e64 v18, v50, v18, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v19, v51, v19, s[4:5]
-; GFX9-NEXT: s_waitcnt vmcnt(4)
-; GFX9-NEXT: v_cmp_lt_f64_e64 s[4:5], v[20:21], v[31:32]
-; GFX9-NEXT: v_cmp_o_f64_e64 s[6:7], v[20:21], v[31:32]
-; GFX9-NEXT: v_cndmask_b32_e32 v18, v18, v35, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v19, v19, v36, vcc
-; GFX9-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[50:51]
-; GFX9-NEXT: v_cndmask_b32_e64 v48, v32, v21, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v35, v31, v20, s[4:5]
-; GFX9-NEXT: v_cmp_class_f64_e64 s[4:5], v[20:21], 32
-; GFX9-NEXT: v_cndmask_b32_e64 v36, v39, v48, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v35, 0, v35, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e32 v18, v50, v18, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v19, v51, v19, vcc
-; GFX9-NEXT: v_cmp_class_f64_e64 vcc, v[31:32], 32
-; GFX9-NEXT: buffer_load_dword v49, off, s[0:3], s32 offset:112
-; GFX9-NEXT: buffer_load_dword v48, off, s[0:3], s32 offset:108
-; GFX9-NEXT: buffer_load_dword v51, off, s[0:3], s32 offset:120
-; GFX9-NEXT: buffer_load_dword v50, off, s[0:3], s32 offset:116
-; GFX9-NEXT: v_cndmask_b32_e64 v20, v35, v20, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v21, v36, v21, s[4:5]
-; GFX9-NEXT: v_cmp_eq_f64_e64 s[4:5], 0, v[35:36]
-; GFX9-NEXT: s_waitcnt vmcnt(6)
-; GFX9-NEXT: v_cmp_lt_f64_e64 s[6:7], v[22:23], v[33:34]
-; GFX9-NEXT: v_cmp_o_f64_e64 s[8:9], v[22:23], v[33:34]
-; GFX9-NEXT: v_cndmask_b32_e32 v20, v20, v31, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v21, v21, v32, vcc
-; GFX9-NEXT: v_cmp_class_f64_e64 vcc, v[22:23], 32
-; GFX9-NEXT: v_cndmask_b32_e64 v20, v35, v20, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v21, v36, v21, s[4:5]
-; GFX9-NEXT: v_cmp_class_f64_e64 s[4:5], v[33:34], 32
-; GFX9-NEXT: v_cndmask_b32_e64 v31, v34, v23, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v36, v39, v31, s[8:9]
-; GFX9-NEXT: v_cndmask_b32_e64 v31, v33, v22, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v35, 0, v31, s[8:9]
-; GFX9-NEXT: v_cndmask_b32_e32 v22, v35, v22, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v23, v36, v23, vcc
-; GFX9-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[35:36]
-; GFX9-NEXT: v_cndmask_b32_e64 v22, v22, v33, s[4:5]
+; GFX9-NEXT: v_cndmask_b32_e64 v18, v18, 0, s[20:21]
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: v_cmp_u_f64_e64 s[22:23], v[20:21], v[31:32]
+; GFX9-NEXT: v_min_f64 v[20:21], v[20:21], v[31:32]
+; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:96
+; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:92
+; GFX9-NEXT: v_cndmask_b32_e64 v20, v20, 0, s[22:23]
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: v_cmp_u_f64_e64 s[24:25], v[22:23], v[31:32]
+; GFX9-NEXT: v_min_f64 v[22:23], v[22:23], v[31:32]
+; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:100
+; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:104
+; GFX9-NEXT: v_cndmask_b32_e64 v22, v22, 0, s[24:25]
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: v_cmp_u_f64_e64 s[26:27], v[24:25], v[31:32]
+; GFX9-NEXT: v_min_f64 v[24:25], v[24:25], v[31:32]
+; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:112
+; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:108
+; GFX9-NEXT: v_cndmask_b32_e64 v24, v24, 0, s[26:27]
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: v_cmp_u_f64_e64 s[28:29], v[26:27], v[31:32]
+; GFX9-NEXT: v_min_f64 v[26:27], v[26:27], v[31:32]
+; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:120
+; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:116
+; GFX9-NEXT: v_cndmask_b32_e64 v26, v26, 0, s[28:29]
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: v_cmp_u_f64_e64 s[30:31], v[28:29], v[31:32]
+; GFX9-NEXT: v_min_f64 v[28:29], v[28:29], v[31:32]
; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32
; GFX9-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:128
; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:124
-; GFX9-NEXT: v_cndmask_b32_e64 v23, v23, v34, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e32 v22, v35, v22, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v23, v36, v23, vcc
-; GFX9-NEXT: v_cmp_class_f64_e64 vcc, v[24:25], 32
-; GFX9-NEXT: s_waitcnt vmcnt(7)
-; GFX9-NEXT: v_cmp_lt_f64_e64 s[4:5], v[24:25], v[37:38]
-; GFX9-NEXT: v_cmp_o_f64_e64 s[6:7], v[24:25], v[37:38]
-; GFX9-NEXT: v_cndmask_b32_e64 v34, v38, v25, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v35, v39, v34, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v34, v37, v24, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v34, 0, v34, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e32 v24, v34, v24, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v25, v35, v25, vcc
-; GFX9-NEXT: v_cmp_class_f64_e64 vcc, v[37:38], 32
-; GFX9-NEXT: v_cmp_eq_f64_e64 s[4:5], 0, v[34:35]
-; GFX9-NEXT: v_cndmask_b32_e32 v24, v24, v37, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v25, v25, v38, vcc
-; GFX9-NEXT: v_cmp_class_f64_e64 vcc, v[26:27], 32
-; GFX9-NEXT: v_cndmask_b32_e64 v24, v34, v24, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v25, v35, v25, s[4:5]
-; GFX9-NEXT: s_waitcnt vmcnt(5)
-; GFX9-NEXT: v_cmp_lt_f64_e64 s[6:7], v[26:27], v[48:49]
-; GFX9-NEXT: v_cmp_o_f64_e64 s[8:9], v[26:27], v[48:49]
-; GFX9-NEXT: v_cmp_class_f64_e64 s[4:5], v[48:49], 32
-; GFX9-NEXT: s_waitcnt vmcnt(3)
-; GFX9-NEXT: v_cmp_o_f64_e64 s[10:11], v[28:29], v[50:51]
-; GFX9-NEXT: v_cndmask_b32_e64 v36, v49, v27, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v34, v48, v26, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v35, v39, v36, s[8:9]
-; GFX9-NEXT: v_cndmask_b32_e64 v34, 0, v34, s[8:9]
-; GFX9-NEXT: v_cmp_lt_f64_e64 s[8:9], v[28:29], v[50:51]
-; GFX9-NEXT: v_cndmask_b32_e32 v26, v34, v26, vcc
-; GFX9-NEXT: v_cmp_eq_f64_e64 s[6:7], 0, v[34:35]
-; GFX9-NEXT: v_cndmask_b32_e32 v27, v35, v27, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v26, v26, v48, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v27, v27, v49, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v36, v51, v29, s[8:9]
-; GFX9-NEXT: v_cndmask_b32_e64 v27, v35, v27, s[6:7]
+; GFX9-NEXT: v_cndmask_b32_e64 v28, v28, 0, s[30:31]
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: v_cmp_u_f64_e64 s[34:35], v[30:31], v[32:33]
+; GFX9-NEXT: v_min_f64 v[30:31], v[30:31], v[32:33]
+; GFX9-NEXT: v_mov_b32_e32 v32, 0x7ff80000
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v32, vcc
+; GFX9-NEXT: v_cndmask_b32_e64 v3, v3, v32, s[4:5]
+; GFX9-NEXT: v_cndmask_b32_e64 v5, v5, v32, s[6:7]
+; GFX9-NEXT: v_cndmask_b32_e64 v7, v7, v32, s[8:9]
+; GFX9-NEXT: v_cndmask_b32_e64 v9, v9, v32, s[10:11]
+; GFX9-NEXT: v_cndmask_b32_e64 v11, v11, v32, s[12:13]
+; GFX9-NEXT: v_cndmask_b32_e64 v13, v13, v32, s[14:15]
+; GFX9-NEXT: v_cndmask_b32_e64 v15, v15, v32, s[16:17]
+; GFX9-NEXT: v_cndmask_b32_e64 v17, v17, v32, s[18:19]
+; GFX9-NEXT: v_cndmask_b32_e64 v19, v19, v32, s[20:21]
+; GFX9-NEXT: v_cndmask_b32_e64 v21, v21, v32, s[22:23]
+; GFX9-NEXT: v_cndmask_b32_e64 v23, v23, v32, s[24:25]
+; GFX9-NEXT: v_cndmask_b32_e64 v25, v25, v32, s[26:27]
+; GFX9-NEXT: v_cndmask_b32_e64 v27, v27, v32, s[28:29]
+; GFX9-NEXT: v_cndmask_b32_e64 v29, v29, v32, s[30:31]
+; GFX9-NEXT: v_cndmask_b32_e64 v31, v31, v32, s[34:35]
+; GFX9-NEXT: v_cndmask_b32_e64 v30, v30, 0, s[34:35]
+; GFX9-NEXT: v_readlane_b32 s35, v34, 3
+; GFX9-NEXT: v_readlane_b32 s34, v34, 2
+; GFX9-NEXT: v_readlane_b32 s31, v34, 1
+; GFX9-NEXT: v_readlane_b32 s30, v34, 0
+; GFX9-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; GFX9-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:132 ; 4-byte Folded Reload
+; GFX9-NEXT: s_mov_b64 exec, s[4:5]
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: v_cmp_lt_f64_e32 vcc, v[30:31], v[32:33]
-; GFX9-NEXT: v_cmp_o_f64_e64 s[4:5], v[30:31], v[32:33]
-; GFX9-NEXT: v_cndmask_b32_e64 v35, v39, v36, s[10:11]
-; GFX9-NEXT: v_cndmask_b32_e64 v26, v34, v26, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v34, v50, v28, s[8:9]
-; GFX9-NEXT: v_cmp_class_f64_e64 s[6:7], v[28:29], 32
-; GFX9-NEXT: v_cndmask_b32_e64 v34, 0, v34, s[10:11]
-; GFX9-NEXT: v_cmp_class_f64_e64 s[8:9], v[50:51], 32
-; GFX9-NEXT: v_cndmask_b32_e32 v36, v33, v31, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v37, v39, v36, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e32 v36, v32, v30, vcc
-; GFX9-NEXT: v_cmp_class_f64_e64 vcc, v[30:31], 32
-; GFX9-NEXT: v_cndmask_b32_e64 v36, 0, v36, s[4:5]
-; GFX9-NEXT: v_cmp_class_f64_e64 s[4:5], v[32:33], 32
-; GFX9-NEXT: v_cmp_eq_f64_e64 s[10:11], 0, v[34:35]
-; GFX9-NEXT: v_cmp_eq_f64_e64 s[12:13], 0, v[36:37]
-; GFX9-NEXT: v_cndmask_b32_e64 v28, v34, v28, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v29, v35, v29, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v28, v28, v50, s[8:9]
-; GFX9-NEXT: v_cndmask_b32_e32 v30, v36, v30, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v31, v37, v31, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v30, v30, v32, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v29, v29, v51, s[8:9]
-; GFX9-NEXT: v_cndmask_b32_e64 v31, v31, v33, s[4:5]
-; GFX9-NEXT: v_cndmask_b32_e64 v28, v34, v28, s[10:11]
-; GFX9-NEXT: v_cndmask_b32_e64 v29, v35, v29, s[10:11]
-; GFX9-NEXT: v_cndmask_b32_e64 v30, v36, v30, s[12:13]
-; GFX9-NEXT: v_cndmask_b32_e64 v31, v37, v31, s[12:13]
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX940-LABEL: v_minimum_v16f64:
; GFX940: ; %bb.0:
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_accvgpr_write_b32 a0, v40 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_write_b32 a1, v41 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_write_b32 a2, v42 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_write_b32 a3, v43 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_write_b32 a4, v44 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_write_b32 a5, v45 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_write_b32 a6, v46 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_write_b32 a7, v47 ; Reload Reuse
-; GFX940-NEXT: scratch_load_dword v41, off, s32 offset:8
-; GFX940-NEXT: scratch_load_dword v40, off, s32 offset:4
-; GFX940-NEXT: scratch_load_dword v51, off, s32 offset:16
-; GFX940-NEXT: scratch_load_dword v50, off, s32 offset:12
-; GFX940-NEXT: scratch_load_dword v45, off, s32 offset:24
-; GFX940-NEXT: scratch_load_dword v44, off, s32 offset:20
-; GFX940-NEXT: scratch_load_dword v47, off, s32 offset:32
-; GFX940-NEXT: scratch_load_dword v46, off, s32 offset:28
+; GFX940-NEXT: v_accvgpr_write_b32 a1, v40 ; Reload Reuse
+; GFX940-NEXT: v_accvgpr_write_b32 a2, v41 ; Reload Reuse
+; GFX940-NEXT: v_accvgpr_write_b32 a3, v42 ; Reload Reuse
+; GFX940-NEXT: v_accvgpr_write_b32 a4, v43 ; Reload Reuse
+; GFX940-NEXT: v_accvgpr_write_b32 a5, v44 ; Reload Reuse
+; GFX940-NEXT: v_accvgpr_write_b32 a6, v45 ; Reload Reuse
+; GFX940-NEXT: v_accvgpr_write_b32 a7, v46 ; Reload Reuse
+; GFX940-NEXT: v_accvgpr_write_b32 a8, v47 ; Reload Reuse
+; GFX940-NEXT: v_accvgpr_write_b32 a9, v56 ; Reload Reuse
+; GFX940-NEXT: v_accvgpr_write_b32 a10, v57 ; Reload Reuse
+; GFX940-NEXT: scratch_load_dword v37, off, s32 offset:16
+; GFX940-NEXT: scratch_load_dword v36, off, s32 offset:12
+; GFX940-NEXT: scratch_load_dword v39, off, s32 offset:24
+; GFX940-NEXT: scratch_load_dword v38, off, s32 offset:20
+; GFX940-NEXT: scratch_load_dword v49, off, s32 offset:32
+; GFX940-NEXT: scratch_load_dword v48, off, s32 offset:28
+; GFX940-NEXT: scratch_load_dword v57, off, s32 offset:8
+; GFX940-NEXT: scratch_load_dword v56, off, s32 offset:4
+; GFX940-NEXT: scratch_load_dword v47, off, s32 offset:40
+; GFX940-NEXT: scratch_load_dword v46, off, s32 offset:36
+; GFX940-NEXT: scratch_load_dword v45, off, s32 offset:48
+; GFX940-NEXT: scratch_load_dword v44, off, s32 offset:44
+; GFX940-NEXT: scratch_load_dword v43, off, s32 offset:56
+; GFX940-NEXT: scratch_load_dword v42, off, s32 offset:52
+; GFX940-NEXT: scratch_load_dword v41, off, s32 offset:64
+; GFX940-NEXT: scratch_load_dword v40, off, s32 offset:60
+; GFX940-NEXT: scratch_load_dword v55, off, s32 offset:72
+; GFX940-NEXT: scratch_load_dword v54, off, s32 offset:68
+; GFX940-NEXT: scratch_load_dword v53, off, s32 offset:80
+; GFX940-NEXT: scratch_load_dword v52, off, s32 offset:76
+; GFX940-NEXT: scratch_load_dword v51, off, s32 offset:88
+; GFX940-NEXT: scratch_load_dword v50, off, s32 offset:84
+; GFX940-NEXT: scratch_load_dword v35, off, s32 offset:96
+; GFX940-NEXT: scratch_load_dword v34, off, s32 offset:92
; GFX940-NEXT: scratch_load_dword v31, off, s32
-; GFX940-NEXT: scratch_load_dword v33, off, s32 offset:128
-; GFX940-NEXT: scratch_load_dword v32, off, s32 offset:124
-; GFX940-NEXT: scratch_load_dword v35, off, s32 offset:120
-; GFX940-NEXT: scratch_load_dword v34, off, s32 offset:116
-; GFX940-NEXT: scratch_load_dword v43, off, s32 offset:40
-; GFX940-NEXT: scratch_load_dword v42, off, s32 offset:36
+; GFX940-NEXT: scratch_load_dword v33, off, s32 offset:104
+; GFX940-NEXT: scratch_load_dword v32, off, s32 offset:100
+; GFX940-NEXT: v_accvgpr_write_b32 a11, v58 ; Reload Reuse
+; GFX940-NEXT: v_accvgpr_write_b32 a12, v59 ; Reload Reuse
+; GFX940-NEXT: v_accvgpr_write_b32 a13, v60 ; Reload Reuse
+; GFX940-NEXT: v_accvgpr_write_b32 a14, v61 ; Reload Reuse
+; GFX940-NEXT: v_accvgpr_write_b32 a15, v62 ; Reload Reuse
+; GFX940-NEXT: v_accvgpr_write_b32 a16, v63 ; Reload Reuse
+; GFX940-NEXT: s_waitcnt vmcnt(25)
+; GFX940-NEXT: v_min_f64 v[58:59], v[2:3], v[36:37]
+; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[36:37]
; GFX940-NEXT: scratch_load_dword v37, off, s32 offset:112
; GFX940-NEXT: scratch_load_dword v36, off, s32 offset:108
-; GFX940-NEXT: scratch_load_dword v39, off, s32 offset:104
-; GFX940-NEXT: scratch_load_dword v38, off, s32 offset:100
-; GFX940-NEXT: scratch_load_dword v49, off, s32 offset:96
-; GFX940-NEXT: scratch_load_dword v48, off, s32 offset:92
-; GFX940-NEXT: scratch_load_dword v53, off, s32 offset:56
-; GFX940-NEXT: scratch_load_dword v52, off, s32 offset:52
-; GFX940-NEXT: scratch_load_dword v55, off, s32 offset:48
-; GFX940-NEXT: scratch_load_dword v54, off, s32 offset:44
-; GFX940-NEXT: v_accvgpr_write_b32 a8, v56 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_write_b32 a9, v57 ; Reload Reuse
-; GFX940-NEXT: v_mov_b32_e32 v56, 0x7ff80000
-; GFX940-NEXT: v_accvgpr_write_b32 a11, v59 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_write_b32 a10, v58 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_write_b32 a12, v60 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_write_b32 a13, v61 ; Reload Reuse
+; GFX940-NEXT: s_waitcnt vmcnt(25)
+; GFX940-NEXT: v_min_f64 v[60:61], v[4:5], v[38:39]
+; GFX940-NEXT: v_cmp_u_f64_e64 s[0:1], v[4:5], v[38:39]
+; GFX940-NEXT: scratch_load_dword v39, off, s32 offset:120
+; GFX940-NEXT: scratch_load_dword v38, off, s32 offset:116
+; GFX940-NEXT: s_waitcnt vmcnt(25)
+; GFX940-NEXT: v_min_f64 v[62:63], v[6:7], v[48:49]
+; GFX940-NEXT: v_cmp_u_f64_e64 s[2:3], v[6:7], v[48:49]
+; GFX940-NEXT: scratch_load_dword v49, off, s32 offset:128
+; GFX940-NEXT: scratch_load_dword v48, off, s32 offset:124
+; GFX940-NEXT: s_waitcnt vmcnt(25)
+; GFX940-NEXT: v_min_f64 v[2:3], v[0:1], v[56:57]
+; GFX940-NEXT: v_cmp_u_f64_e64 s[4:5], v[0:1], v[56:57]
+; GFX940-NEXT: v_mov_b32_e32 v0, 0x7ff80000
; GFX940-NEXT: s_waitcnt vmcnt(23)
-; GFX940-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[40:41]
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v57, v41, v1, vcc
-; GFX940-NEXT: v_cmp_o_f64_e64 s[0:1], v[0:1], v[40:41]
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e64 v59, v56, v57, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v57, v40, v0, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v58, 0, v57, s[0:1]
-; GFX940-NEXT: v_cmp_class_f64_e64 vcc, v[0:1], 32
-; GFX940-NEXT: v_cmp_class_f64_e64 s[0:1], v[40:41], 32
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v58, v0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v59, v1, vcc
+; GFX940-NEXT: v_min_f64 v[56:57], v[8:9], v[46:47]
+; GFX940-NEXT: v_cndmask_b32_e64 v1, v2, 0, s[4:5]
+; GFX940-NEXT: v_accvgpr_write_b32 a0, v1
+; GFX940-NEXT: v_cndmask_b32_e64 v1, v3, v0, s[4:5]
+; GFX940-NEXT: v_cndmask_b32_e64 v2, v58, 0, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v3, v59, v0, vcc
+; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[8:9], v[46:47]
; GFX940-NEXT: s_waitcnt vmcnt(21)
-; GFX940-NEXT: v_cmp_lt_f64_e32 vcc, v[2:3], v[50:51]
-; GFX940-NEXT: v_cndmask_b32_e64 v0, v0, v40, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v1, v1, v41, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v57, v51, v3, vcc
-; GFX940-NEXT: v_cmp_o_f64_e64 s[0:1], v[2:3], v[50:51]
-; GFX940-NEXT: v_cndmask_b32_e32 v40, v50, v2, vcc
-; GFX940-NEXT: v_cmp_class_f64_e64 vcc, v[2:3], 32
-; GFX940-NEXT: v_cndmask_b32_e64 v61, v56, v57, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v60, 0, v40, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v60, v2, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v61, v3, vcc
-; GFX940-NEXT: v_cmp_class_f64_e64 vcc, v[50:51], 32
-; GFX940-NEXT: scratch_load_dword v41, off, s32 offset:64
-; GFX940-NEXT: scratch_load_dword v40, off, s32 offset:60
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v2, v50, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v3, v51, vcc
-; GFX940-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[58:59]
-; GFX940-NEXT: s_waitcnt vmcnt(21)
-; GFX940-NEXT: v_cmp_o_f64_e64 s[0:1], v[4:5], v[44:45]
-; GFX940-NEXT: scratch_load_dword v51, off, s32 offset:88
-; GFX940-NEXT: v_cndmask_b32_e32 v0, v58, v0, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v1, v59, v1, vcc
-; GFX940-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[60:61]
+; GFX940-NEXT: v_min_f64 v[46:47], v[10:11], v[44:45]
+; GFX940-NEXT: v_cndmask_b32_e64 v4, v60, 0, s[0:1]
+; GFX940-NEXT: v_cndmask_b32_e64 v8, v56, 0, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v9, v57, v0, vcc
+; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[10:11], v[44:45]
+; GFX940-NEXT: s_waitcnt vmcnt(19)
+; GFX940-NEXT: v_min_f64 v[44:45], v[12:13], v[42:43]
+; GFX940-NEXT: v_cndmask_b32_e64 v5, v61, v0, s[0:1]
+; GFX940-NEXT: v_cndmask_b32_e64 v10, v46, 0, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v11, v47, v0, vcc
+; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[12:13], v[42:43]
+; GFX940-NEXT: s_waitcnt vmcnt(17)
+; GFX940-NEXT: v_min_f64 v[42:43], v[14:15], v[40:41]
+; GFX940-NEXT: v_cndmask_b32_e64 v6, v62, 0, s[2:3]
+; GFX940-NEXT: v_cndmask_b32_e64 v12, v44, 0, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v13, v45, v0, vcc
+; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[14:15], v[40:41]
+; GFX940-NEXT: s_waitcnt vmcnt(15)
+; GFX940-NEXT: v_min_f64 v[40:41], v[16:17], v[54:55]
+; GFX940-NEXT: v_cndmask_b32_e64 v7, v63, v0, s[2:3]
+; GFX940-NEXT: v_cndmask_b32_e64 v14, v42, 0, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v15, v43, v0, vcc
+; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[16:17], v[54:55]
+; GFX940-NEXT: s_waitcnt vmcnt(13)
+; GFX940-NEXT: v_min_f64 v[54:55], v[18:19], v[52:53]
+; GFX940-NEXT: v_accvgpr_read_b32 v63, a16 ; Reload Reuse
+; GFX940-NEXT: v_cndmask_b32_e64 v16, v40, 0, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v17, v41, v0, vcc
+; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[18:19], v[52:53]
+; GFX940-NEXT: s_waitcnt vmcnt(11)
+; GFX940-NEXT: v_min_f64 v[52:53], v[20:21], v[50:51]
+; GFX940-NEXT: v_accvgpr_read_b32 v62, a15 ; Reload Reuse
+; GFX940-NEXT: v_cndmask_b32_e64 v18, v54, 0, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v19, v55, v0, vcc
+; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[20:21], v[50:51]
+; GFX940-NEXT: s_waitcnt vmcnt(9)
+; GFX940-NEXT: v_min_f64 v[50:51], v[22:23], v[34:35]
+; GFX940-NEXT: v_accvgpr_read_b32 v61, a14 ; Reload Reuse
+; GFX940-NEXT: v_cndmask_b32_e64 v20, v52, 0, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v21, v53, v0, vcc
+; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[22:23], v[34:35]
+; GFX940-NEXT: s_waitcnt vmcnt(6)
+; GFX940-NEXT: v_min_f64 v[34:35], v[24:25], v[32:33]
+; GFX940-NEXT: v_accvgpr_read_b32 v60, a13 ; Reload Reuse
+; GFX940-NEXT: v_cndmask_b32_e64 v22, v50, 0, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v23, v51, v0, vcc
+; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[24:25], v[32:33]
+; GFX940-NEXT: v_accvgpr_read_b32 v59, a12 ; Reload Reuse
+; GFX940-NEXT: v_accvgpr_read_b32 v58, a11 ; Reload Reuse
+; GFX940-NEXT: v_cndmask_b32_e64 v24, v34, 0, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v25, v35, v0, vcc
+; GFX940-NEXT: v_accvgpr_read_b32 v57, a10 ; Reload Reuse
+; GFX940-NEXT: v_accvgpr_read_b32 v56, a9 ; Reload Reuse
+; GFX940-NEXT: v_accvgpr_read_b32 v47, a8 ; Reload Reuse
+; GFX940-NEXT: v_accvgpr_read_b32 v46, a7 ; Reload Reuse
+; GFX940-NEXT: v_accvgpr_read_b32 v45, a6 ; Reload Reuse
+; GFX940-NEXT: v_accvgpr_read_b32 v44, a5 ; Reload Reuse
+; GFX940-NEXT: v_accvgpr_read_b32 v43, a4 ; Reload Reuse
+; GFX940-NEXT: v_accvgpr_read_b32 v42, a3 ; Reload Reuse
+; GFX940-NEXT: v_accvgpr_read_b32 v41, a2 ; Reload Reuse
+; GFX940-NEXT: v_accvgpr_read_b32 v40, a1 ; Reload Reuse
+; GFX940-NEXT: s_waitcnt vmcnt(4)
+; GFX940-NEXT: v_min_f64 v[32:33], v[26:27], v[36:37]
+; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[26:27], v[36:37]
; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v2, v60, v2, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v3, v61, v3, vcc
-; GFX940-NEXT: v_cmp_lt_f64_e32 vcc, v[4:5], v[44:45]
-; GFX940-NEXT: v_accvgpr_read_b32 v61, a13 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_read_b32 v60, a12 ; Reload Reuse
-; GFX940-NEXT: v_cndmask_b32_e32 v50, v45, v5, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v59, v56, v50, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v50, v44, v4, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v58, 0, v50, s[0:1]
-; GFX940-NEXT: v_cmp_class_f64_e64 vcc, v[4:5], 32
-; GFX940-NEXT: v_cmp_class_f64_e64 s[0:1], v[44:45], 32
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v58, v4, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v5, v59, v5, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v4, v4, v44, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v5, v5, v45, s[0:1]
-; GFX940-NEXT: scratch_load_dword v45, off, s32 offset:72
-; GFX940-NEXT: scratch_load_dword v44, off, s32 offset:68
-; GFX940-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[58:59]
-; GFX940-NEXT: s_waitcnt vmcnt(22)
-; GFX940-NEXT: v_cmp_o_f64_e64 s[0:1], v[6:7], v[46:47]
-; GFX940-NEXT: v_cndmask_b32_e32 v4, v58, v4, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v5, v59, v5, vcc
-; GFX940-NEXT: v_cmp_lt_f64_e32 vcc, v[6:7], v[46:47]
+; GFX940-NEXT: v_cndmask_b32_e64 v26, v32, 0, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v27, v33, v0, vcc
+; GFX940-NEXT: s_waitcnt vmcnt(2)
+; GFX940-NEXT: v_min_f64 v[32:33], v[28:29], v[38:39]
+; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[28:29], v[38:39]
; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v50, v47, v7, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v59, v56, v50, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v50, v46, v6, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v58, 0, v50, s[0:1]
-; GFX940-NEXT: v_cmp_class_f64_e64 vcc, v[6:7], 32
-; GFX940-NEXT: v_cmp_class_f64_e64 s[0:1], v[46:47], 32
-; GFX940-NEXT: s_nop 0
-; GFX940-NEXT: v_cndmask_b32_e32 v6, v58, v6, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v7, v59, v7, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v6, v6, v46, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v7, v7, v47, s[0:1]
-; GFX940-NEXT: scratch_load_dword v47, off, s32 offset:80
-; GFX940-NEXT: scratch_load_dword v46, off, s32 offset:76
-; GFX940-NEXT: scratch_load_dword v50, off, s32 offset:84
-; GFX940-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[58:59]
-; GFX940-NEXT: s_waitcnt vmcnt(18)
-; GFX940-NEXT: v_cmp_o_f64_e64 s[0:1], v[8:9], v[42:43]
-; GFX940-NEXT: v_cndmask_b32_e32 v6, v58, v6, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v7, v59, v7, vcc
-; GFX940-NEXT: v_cmp_lt_f64_e32 vcc, v[8:9], v[42:43]
-; GFX940-NEXT: s_nop 1
-; GFX940-NEXT: v_cndmask_b32_e32 v57, v43, v9, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v59, v56, v57, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v57, v42, v8, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v58, 0, v57, s[0:1]
-; GFX940-NEXT: v_cmp_class_f64_e64 vcc, v[8:9], 32
-; GFX940-NEXT: v_cmp_class_f64_e64 s[0:1], v[42:43], 32
-; GFX940-NEXT: v_cmp_eq_f64_e64 s[2:3], 0, v[58:59]
-; GFX940-NEXT: v_cndmask_b32_e32 v8, v58, v8, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v9, v59, v9, vcc
-; GFX940-NEXT: s_waitcnt vmcnt(8)
-; GFX940-NEXT: v_cmp_lt_f64_e32 vcc, v[10:11], v[54:55]
-; GFX940-NEXT: v_cndmask_b32_e64 v8, v8, v42, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v9, v9, v43, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v42, v55, v11, vcc
-; GFX940-NEXT: v_cmp_o_f64_e64 s[0:1], v[10:11], v[54:55]
-; GFX940-NEXT: v_cndmask_b32_e64 v8, v58, v8, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v9, v59, v9, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v43, v56, v42, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v42, v54, v10, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v42, 0, v42, s[0:1]
-; GFX940-NEXT: v_cmp_class_f64_e64 vcc, v[10:11], 32
-; GFX940-NEXT: v_cmp_class_f64_e64 s[0:1], v[54:55], 32
-; GFX940-NEXT: v_cmp_eq_f64_e64 s[2:3], 0, v[42:43]
-; GFX940-NEXT: v_cndmask_b32_e32 v10, v42, v10, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v11, v43, v11, vcc
-; GFX940-NEXT: v_cmp_lt_f64_e32 vcc, v[12:13], v[52:53]
-; GFX940-NEXT: v_cndmask_b32_e64 v10, v10, v54, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v11, v11, v55, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v54, v53, v13, vcc
-; GFX940-NEXT: v_cmp_o_f64_e64 s[0:1], v[12:13], v[52:53]
-; GFX940-NEXT: v_cndmask_b32_e64 v10, v42, v10, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v11, v43, v11, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v55, v56, v54, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v54, v52, v12, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v54, 0, v54, s[0:1]
-; GFX940-NEXT: v_cmp_class_f64_e64 vcc, v[12:13], 32
-; GFX940-NEXT: v_cmp_class_f64_e64 s[0:1], v[52:53], 32
-; GFX940-NEXT: v_cmp_eq_f64_e64 s[2:3], 0, v[54:55]
-; GFX940-NEXT: v_cndmask_b32_e32 v12, v54, v12, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v13, v55, v13, vcc
-; GFX940-NEXT: s_waitcnt vmcnt(6)
-; GFX940-NEXT: v_cmp_lt_f64_e32 vcc, v[14:15], v[40:41]
-; GFX940-NEXT: v_cndmask_b32_e64 v12, v12, v52, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v13, v13, v53, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v52, v41, v15, vcc
-; GFX940-NEXT: v_cmp_o_f64_e64 s[0:1], v[14:15], v[40:41]
-; GFX940-NEXT: v_cndmask_b32_e64 v12, v54, v12, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v13, v55, v13, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v53, v56, v52, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v52, v40, v14, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v52, 0, v52, s[0:1]
-; GFX940-NEXT: v_cmp_class_f64_e64 vcc, v[14:15], 32
-; GFX940-NEXT: v_cmp_class_f64_e64 s[0:1], v[40:41], 32
-; GFX940-NEXT: v_cmp_eq_f64_e64 s[2:3], 0, v[52:53]
-; GFX940-NEXT: v_cndmask_b32_e32 v14, v52, v14, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v14, v14, v40, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v15, v53, v15, vcc
-; GFX940-NEXT: s_waitcnt vmcnt(3)
-; GFX940-NEXT: v_cmp_lt_f64_e32 vcc, v[16:17], v[44:45]
-; GFX940-NEXT: v_cndmask_b32_e64 v14, v52, v14, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v15, v15, v41, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v52, v45, v17, vcc
-; GFX940-NEXT: v_cmp_o_f64_e64 s[0:1], v[16:17], v[44:45]
-; GFX940-NEXT: v_cndmask_b32_e64 v15, v53, v15, s[2:3]
-; GFX940-NEXT: v_accvgpr_read_b32 v59, a11 ; Reload Reuse
-; GFX940-NEXT: v_cndmask_b32_e64 v53, v56, v52, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v52, v44, v16, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v52, 0, v52, s[0:1]
-; GFX940-NEXT: v_cmp_class_f64_e64 vcc, v[16:17], 32
-; GFX940-NEXT: v_cmp_class_f64_e64 s[0:1], v[44:45], 32
-; GFX940-NEXT: v_cmp_eq_f64_e64 s[2:3], 0, v[52:53]
-; GFX940-NEXT: v_cndmask_b32_e32 v16, v52, v16, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v16, v16, v44, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v17, v53, v17, vcc
-; GFX940-NEXT: s_waitcnt vmcnt(1)
-; GFX940-NEXT: v_cmp_lt_f64_e32 vcc, v[18:19], v[46:47]
-; GFX940-NEXT: v_cndmask_b32_e64 v16, v52, v16, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v17, v17, v45, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v52, v47, v19, vcc
-; GFX940-NEXT: v_cmp_o_f64_e64 s[0:1], v[18:19], v[46:47]
-; GFX940-NEXT: v_cndmask_b32_e64 v17, v53, v17, s[2:3]
-; GFX940-NEXT: v_accvgpr_read_b32 v58, a10 ; Reload Reuse
-; GFX940-NEXT: v_cndmask_b32_e64 v53, v56, v52, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v52, v46, v18, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v52, 0, v52, s[0:1]
-; GFX940-NEXT: v_cmp_class_f64_e64 vcc, v[18:19], 32
-; GFX940-NEXT: v_cmp_class_f64_e64 s[0:1], v[46:47], 32
-; GFX940-NEXT: v_cmp_eq_f64_e64 s[2:3], 0, v[52:53]
-; GFX940-NEXT: v_cndmask_b32_e32 v18, v52, v18, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v18, v18, v46, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v19, v53, v19, vcc
+; GFX940-NEXT: v_cndmask_b32_e64 v28, v32, 0, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v29, v33, v0, vcc
; GFX940-NEXT: s_waitcnt vmcnt(0)
-; GFX940-NEXT: v_cmp_lt_f64_e32 vcc, v[20:21], v[50:51]
-; GFX940-NEXT: v_cndmask_b32_e64 v18, v52, v18, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v19, v19, v47, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v52, v51, v21, vcc
-; GFX940-NEXT: v_cmp_o_f64_e64 s[0:1], v[20:21], v[50:51]
-; GFX940-NEXT: v_cndmask_b32_e64 v19, v53, v19, s[2:3]
-; GFX940-NEXT: v_accvgpr_read_b32 v57, a9 ; Reload Reuse
-; GFX940-NEXT: v_cndmask_b32_e64 v53, v56, v52, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v52, v50, v20, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v52, 0, v52, s[0:1]
-; GFX940-NEXT: v_cmp_class_f64_e64 vcc, v[20:21], 32
-; GFX940-NEXT: v_cmp_class_f64_e64 s[0:1], v[50:51], 32
-; GFX940-NEXT: v_cmp_eq_f64_e64 s[2:3], 0, v[52:53]
-; GFX940-NEXT: v_cndmask_b32_e32 v20, v52, v20, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v21, v53, v21, vcc
-; GFX940-NEXT: v_cmp_lt_f64_e32 vcc, v[22:23], v[48:49]
-; GFX940-NEXT: v_cndmask_b32_e64 v20, v20, v50, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v21, v21, v51, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v50, v49, v23, vcc
-; GFX940-NEXT: v_cmp_o_f64_e64 s[0:1], v[22:23], v[48:49]
-; GFX940-NEXT: v_cndmask_b32_e64 v20, v52, v20, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v21, v53, v21, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v51, v56, v50, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v50, v48, v22, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v50, 0, v50, s[0:1]
-; GFX940-NEXT: v_cmp_class_f64_e64 vcc, v[22:23], 32
-; GFX940-NEXT: v_cmp_class_f64_e64 s[0:1], v[48:49], 32
-; GFX940-NEXT: v_cmp_eq_f64_e64 s[2:3], 0, v[50:51]
-; GFX940-NEXT: v_cndmask_b32_e32 v22, v50, v22, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v23, v51, v23, vcc
-; GFX940-NEXT: v_cmp_lt_f64_e32 vcc, v[24:25], v[38:39]
-; GFX940-NEXT: v_cndmask_b32_e64 v22, v22, v48, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v23, v23, v49, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v48, v39, v25, vcc
-; GFX940-NEXT: v_cmp_o_f64_e64 s[0:1], v[24:25], v[38:39]
-; GFX940-NEXT: v_cndmask_b32_e64 v22, v50, v22, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v23, v51, v23, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v49, v56, v48, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v48, v38, v24, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v48, 0, v48, s[0:1]
-; GFX940-NEXT: v_cmp_class_f64_e64 vcc, v[24:25], 32
-; GFX940-NEXT: v_cmp_class_f64_e64 s[0:1], v[38:39], 32
-; GFX940-NEXT: v_cmp_eq_f64_e64 s[2:3], 0, v[48:49]
-; GFX940-NEXT: v_cndmask_b32_e32 v24, v48, v24, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v25, v49, v25, vcc
-; GFX940-NEXT: v_cmp_lt_f64_e32 vcc, v[26:27], v[36:37]
-; GFX940-NEXT: v_cndmask_b32_e64 v24, v24, v38, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v25, v25, v39, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v38, v37, v27, vcc
-; GFX940-NEXT: v_cmp_o_f64_e64 s[0:1], v[26:27], v[36:37]
-; GFX940-NEXT: v_cndmask_b32_e64 v24, v48, v24, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v25, v49, v25, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v39, v56, v38, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v38, v36, v26, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v38, 0, v38, s[0:1]
-; GFX940-NEXT: v_cmp_class_f64_e64 vcc, v[26:27], 32
-; GFX940-NEXT: v_cmp_class_f64_e64 s[0:1], v[36:37], 32
-; GFX940-NEXT: v_cmp_eq_f64_e64 s[2:3], 0, v[38:39]
-; GFX940-NEXT: v_cndmask_b32_e32 v26, v38, v26, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v27, v39, v27, vcc
-; GFX940-NEXT: v_cmp_lt_f64_e32 vcc, v[28:29], v[34:35]
-; GFX940-NEXT: v_cndmask_b32_e64 v26, v26, v36, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v27, v27, v37, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v36, v35, v29, vcc
-; GFX940-NEXT: v_cmp_o_f64_e64 s[0:1], v[28:29], v[34:35]
-; GFX940-NEXT: v_cndmask_b32_e64 v26, v38, v26, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v27, v39, v27, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v37, v56, v36, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v36, v34, v28, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v36, 0, v36, s[0:1]
-; GFX940-NEXT: v_cmp_class_f64_e64 vcc, v[28:29], 32
-; GFX940-NEXT: v_cmp_class_f64_e64 s[0:1], v[34:35], 32
-; GFX940-NEXT: v_cmp_eq_f64_e64 s[2:3], 0, v[36:37]
-; GFX940-NEXT: v_cndmask_b32_e32 v28, v36, v28, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v29, v37, v29, vcc
-; GFX940-NEXT: v_cmp_lt_f64_e32 vcc, v[30:31], v[32:33]
-; GFX940-NEXT: v_cndmask_b32_e64 v28, v28, v34, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v29, v29, v35, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v34, v33, v31, vcc
-; GFX940-NEXT: v_cmp_o_f64_e64 s[0:1], v[30:31], v[32:33]
-; GFX940-NEXT: v_cndmask_b32_e64 v28, v36, v28, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v29, v37, v29, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v35, v56, v34, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e32 v34, v32, v30, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v34, 0, v34, s[0:1]
-; GFX940-NEXT: v_cmp_class_f64_e64 vcc, v[30:31], 32
-; GFX940-NEXT: v_cmp_class_f64_e64 s[0:1], v[32:33], 32
-; GFX940-NEXT: v_cmp_eq_f64_e64 s[2:3], 0, v[34:35]
-; GFX940-NEXT: v_cndmask_b32_e32 v30, v34, v30, vcc
-; GFX940-NEXT: v_cndmask_b32_e32 v31, v35, v31, vcc
-; GFX940-NEXT: v_cndmask_b32_e64 v30, v30, v32, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v31, v31, v33, s[0:1]
-; GFX940-NEXT: v_cndmask_b32_e64 v30, v34, v30, s[2:3]
-; GFX940-NEXT: v_cndmask_b32_e64 v31, v35, v31, s[2:3]
-; GFX940-NEXT: v_accvgpr_read_b32 v56, a8 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_read_b32 v47, a7 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_read_b32 v46, a6 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_read_b32 v45, a5 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_read_b32 v44, a4 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_read_b32 v43, a3 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_read_b32 v42, a2 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_read_b32 v41, a1 ; Reload Reuse
-; GFX940-NEXT: v_accvgpr_read_b32 v40, a0 ; Reload Reuse
+; GFX940-NEXT: v_min_f64 v[32:33], v[30:31], v[48:49]
+; GFX940-NEXT: v_cmp_u_f64_e32 vcc, v[30:31], v[48:49]
+; GFX940-NEXT: s_nop 1
+; GFX940-NEXT: v_cndmask_b32_e64 v30, v32, 0, vcc
+; GFX940-NEXT: v_cndmask_b32_e32 v31, v33, v0, vcc
+; GFX940-NEXT: v_accvgpr_read_b32 v0, a0
; GFX940-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimum_v16f64:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_clause 0x20
+; GFX10-NEXT: s_clause 0x19
+; GFX10-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:16
+; GFX10-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:12
+; GFX10-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:24
+; GFX10-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:20
+; GFX10-NEXT: buffer_load_dword v36, off, s[0:3], s32 offset:32
+; GFX10-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:28
+; GFX10-NEXT: buffer_load_dword v37, off, s[0:3], s32 offset:36
+; GFX10-NEXT: buffer_load_dword v48, off, s[0:3], s32 offset:68
+; GFX10-NEXT: buffer_load_dword v51, off, s[0:3], s32 offset:64
+; GFX10-NEXT: buffer_load_dword v50, off, s[0:3], s32 offset:60
+; GFX10-NEXT: buffer_load_dword v53, off, s[0:3], s32 offset:56
+; GFX10-NEXT: buffer_load_dword v52, off, s[0:3], s32 offset:52
+; GFX10-NEXT: buffer_load_dword v55, off, s[0:3], s32 offset:48
+; GFX10-NEXT: buffer_load_dword v54, off, s[0:3], s32 offset:44
+; GFX10-NEXT: buffer_load_dword v38, off, s[0:3], s32 offset:40
; GFX10-NEXT: buffer_load_dword v65, off, s[0:3], s32 offset:8
; GFX10-NEXT: buffer_load_dword v64, off, s[0:3], s32 offset:4
-; GFX10-NEXT: buffer_load_dword v55, off, s[0:3], s32 offset:16
-; GFX10-NEXT: buffer_load_dword v54, off, s[0:3], s32 offset:12
-; GFX10-NEXT: buffer_load_dword v53, off, s[0:3], s32 offset:24
-; GFX10-NEXT: buffer_load_dword v52, off, s[0:3], s32 offset:20
-; GFX10-NEXT: buffer_load_dword v51, off, s[0:3], s32 offset:32
-; GFX10-NEXT: buffer_load_dword v50, off, s[0:3], s32 offset:28
-; GFX10-NEXT: buffer_load_dword v48, off, s[0:3], s32 offset:36
-; GFX10-NEXT: buffer_load_dword v49, off, s[0:3], s32 offset:40
-; GFX10-NEXT: buffer_load_dword v37, off, s[0:3], s32 offset:56
-; GFX10-NEXT: buffer_load_dword v36, off, s[0:3], s32 offset:52
-; GFX10-NEXT: buffer_load_dword v39, off, s[0:3], s32 offset:48
-; GFX10-NEXT: buffer_load_dword v38, off, s[0:3], s32 offset:44
-; GFX10-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:64
-; GFX10-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:60
-; GFX10-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:68
-; GFX10-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:72
-; GFX10-NEXT: buffer_load_dword v83, off, s[0:3], s32 offset:80
-; GFX10-NEXT: buffer_load_dword v82, off, s[0:3], s32 offset:76
-; GFX10-NEXT: buffer_load_dword v67, off, s[0:3], s32 offset:88
-; GFX10-NEXT: buffer_load_dword v66, off, s[0:3], s32 offset:84
+; GFX10-NEXT: buffer_load_dword v66, off, s[0:3], s32 offset:100
; GFX10-NEXT: buffer_load_dword v69, off, s[0:3], s32 offset:96
; GFX10-NEXT: buffer_load_dword v68, off, s[0:3], s32 offset:92
-; GFX10-NEXT: buffer_load_dword v70, off, s[0:3], s32 offset:100
-; GFX10-NEXT: buffer_load_dword v71, off, s[0:3], s32 offset:104
-; GFX10-NEXT: buffer_load_dword v81, off, s[0:3], s32 offset:112
-; GFX10-NEXT: buffer_load_dword v80, off, s[0:3], s32 offset:108
-; GFX10-NEXT: buffer_load_dword v85, off, s[0:3], s32 offset:120
-; GFX10-NEXT: buffer_load_dword v84, off, s[0:3], s32 offset:116
+; GFX10-NEXT: buffer_load_dword v71, off, s[0:3], s32 offset:88
+; GFX10-NEXT: buffer_load_dword v70, off, s[0:3], s32 offset:84
+; GFX10-NEXT: buffer_load_dword v81, off, s[0:3], s32 offset:80
+; GFX10-NEXT: buffer_load_dword v80, off, s[0:3], s32 offset:76
+; GFX10-NEXT: buffer_load_dword v49, off, s[0:3], s32 offset:72
+; GFX10-NEXT: buffer_load_dword v67, off, s[0:3], s32 offset:104
+; GFX10-NEXT: s_waitcnt vmcnt(24)
+; GFX10-NEXT: v_min_f64 v[82:83], v[2:3], v[31:32]
+; GFX10-NEXT: v_cmp_u_f64_e32 vcc_lo, v[2:3], v[31:32]
+; GFX10-NEXT: s_waitcnt vmcnt(22)
+; GFX10-NEXT: v_min_f64 v[84:85], v[4:5], v[33:34]
+; GFX10-NEXT: v_cmp_u_f64_e64 s4, v[4:5], v[33:34]
+; GFX10-NEXT: s_clause 0x3
+; GFX10-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:120
+; GFX10-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:116
+; GFX10-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:112
+; GFX10-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:108
+; GFX10-NEXT: s_waitcnt vmcnt(24)
+; GFX10-NEXT: v_min_f64 v[32:33], v[6:7], v[35:36]
+; GFX10-NEXT: v_cmp_u_f64_e64 s5, v[6:7], v[35:36]
+; GFX10-NEXT: s_clause 0x2
; GFX10-NEXT: buffer_load_dword v31, off, s[0:3], s32
-; GFX10-NEXT: buffer_load_dword v87, off, s[0:3], s32 offset:128
-; GFX10-NEXT: buffer_load_dword v86, off, s[0:3], s32 offset:124
-; GFX10-NEXT: v_cmp_class_f64_e64 s10, v[0:1], 32
-; GFX10-NEXT: v_cmp_class_f64_e64 s12, v[2:3], 32
-; GFX10-NEXT: v_cmp_class_f64_e64 s17, v[10:11], 32
-; GFX10-NEXT: s_waitcnt vmcnt(31)
-; GFX10-NEXT: v_cmp_lt_f64_e32 vcc_lo, v[0:1], v[64:65]
-; GFX10-NEXT: v_cmp_o_f64_e64 s4, v[0:1], v[64:65]
-; GFX10-NEXT: s_waitcnt vmcnt(29)
-; GFX10-NEXT: v_cmp_lt_f64_e64 s5, v[2:3], v[54:55]
-; GFX10-NEXT: v_cmp_o_f64_e64 s6, v[2:3], v[54:55]
-; GFX10-NEXT: s_waitcnt vmcnt(27)
-; GFX10-NEXT: v_cmp_lt_f64_e64 s7, v[4:5], v[52:53]
-; GFX10-NEXT: v_cmp_o_f64_e64 s8, v[4:5], v[52:53]
-; GFX10-NEXT: s_waitcnt vmcnt(25)
-; GFX10-NEXT: v_cmp_lt_f64_e64 s9, v[6:7], v[50:51]
-; GFX10-NEXT: v_cmp_o_f64_e64 s11, v[6:7], v[50:51]
+; GFX10-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:128
+; GFX10-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:124
; GFX10-NEXT: s_waitcnt vmcnt(23)
-; GFX10-NEXT: v_cmp_lt_f64_e64 s13, v[8:9], v[48:49]
-; GFX10-NEXT: v_cmp_class_f64_e64 s14, v[64:65], 32
+; GFX10-NEXT: v_cmp_u_f64_e64 s10, v[14:15], v[50:51]
; GFX10-NEXT: s_waitcnt vmcnt(21)
-; GFX10-NEXT: v_cmp_lt_f64_e64 s15, v[12:13], v[36:37]
-; GFX10-NEXT: s_waitcnt vmcnt(17)
-; GFX10-NEXT: v_cmp_o_f64_e64 s16, v[14:15], v[34:35]
-; GFX10-NEXT: v_cndmask_b32_e32 v96, v64, v0, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v97, v54, v2, s5
-; GFX10-NEXT: v_cndmask_b32_e64 v99, v55, v3, s5
-; GFX10-NEXT: v_cndmask_b32_e64 v100, v52, v4, s7
-; GFX10-NEXT: v_cndmask_b32_e64 v96, 0, v96, s4
-; GFX10-NEXT: v_cndmask_b32_e64 v101, v50, v6, s9
-; GFX10-NEXT: v_cndmask_b32_e64 v98, 0, v97, s6
-; GFX10-NEXT: v_cndmask_b32_e32 v97, v65, v1, vcc_lo
-; GFX10-NEXT: v_cmp_class_f64_e64 vcc_lo, v[54:55], 32
-; GFX10-NEXT: v_cndmask_b32_e64 v0, v96, v0, s10
-; GFX10-NEXT: v_cndmask_b32_e64 v99, 0x7ff80000, v99, s6
-; GFX10-NEXT: v_cndmask_b32_e64 v2, v98, v2, s12
-; GFX10-NEXT: v_cndmask_b32_e64 v97, 0x7ff80000, v97, s4
-; GFX10-NEXT: v_cndmask_b32_e64 v100, 0, v100, s8
-; GFX10-NEXT: v_cndmask_b32_e64 v102, 0, v101, s11
-; GFX10-NEXT: v_cndmask_b32_e64 v3, v99, v3, s12
-; GFX10-NEXT: v_cmp_class_f64_e64 s12, v[6:7], 32
-; GFX10-NEXT: v_cndmask_b32_e64 v1, v97, v1, s10
-; GFX10-NEXT: v_cmp_class_f64_e64 s10, v[4:5], 32
-; GFX10-NEXT: v_cmp_lt_f64_e64 s4, v[10:11], v[38:39]
-; GFX10-NEXT: v_cndmask_b32_e64 v112, v48, v8, s13
-; GFX10-NEXT: v_cmp_o_f64_e64 s5, v[12:13], v[36:37]
-; GFX10-NEXT: v_cmp_lt_f64_e64 s6, v[14:15], v[34:35]
-; GFX10-NEXT: v_cndmask_b32_e64 v0, v0, v64, s14
-; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, v65, s14
-; GFX10-NEXT: v_cmp_class_f64_e64 s14, v[52:53], 32
-; GFX10-NEXT: v_cndmask_b32_e64 v113, v36, v12, s15
+; GFX10-NEXT: v_cmp_u_f64_e64 s9, v[12:13], v[52:53]
+; GFX10-NEXT: s_waitcnt vmcnt(19)
+; GFX10-NEXT: v_cmp_u_f64_e64 s7, v[10:11], v[54:55]
+; GFX10-NEXT: s_waitcnt vmcnt(18)
+; GFX10-NEXT: v_min_f64 v[34:35], v[8:9], v[37:38]
+; GFX10-NEXT: v_cmp_u_f64_e64 s6, v[8:9], v[37:38]
+; GFX10-NEXT: s_waitcnt vmcnt(16)
+; GFX10-NEXT: v_min_f64 v[8:9], v[0:1], v[64:65]
+; GFX10-NEXT: v_min_f64 v[36:37], v[10:11], v[54:55]
+; GFX10-NEXT: v_cmp_u_f64_e64 s8, v[0:1], v[64:65]
+; GFX10-NEXT: v_min_f64 v[38:39], v[12:13], v[52:53]
+; GFX10-NEXT: v_min_f64 v[52:53], v[14:15], v[50:51]
+; GFX10-NEXT: s_waitcnt vmcnt(11)
+; GFX10-NEXT: v_min_f64 v[54:55], v[20:21], v[70:71]
+; GFX10-NEXT: v_cmp_u_f64_e64 s13, v[20:21], v[70:71]
+; GFX10-NEXT: s_waitcnt vmcnt(9)
+; GFX10-NEXT: v_cmp_u_f64_e64 s12, v[18:19], v[80:81]
+; GFX10-NEXT: s_waitcnt vmcnt(8)
+; GFX10-NEXT: v_min_f64 v[50:51], v[16:17], v[48:49]
+; GFX10-NEXT: v_cmp_u_f64_e64 s11, v[16:17], v[48:49]
+; GFX10-NEXT: v_min_f64 v[48:49], v[18:19], v[80:81]
+; GFX10-NEXT: v_min_f64 v[64:65], v[22:23], v[68:69]
+; GFX10-NEXT: v_cmp_u_f64_e64 s14, v[22:23], v[68:69]
+; GFX10-NEXT: s_waitcnt vmcnt(7)
+; GFX10-NEXT: v_min_f64 v[68:69], v[24:25], v[66:67]
+; GFX10-NEXT: v_cmp_u_f64_e64 s15, v[24:25], v[66:67]
+; GFX10-NEXT: v_cndmask_b32_e64 v10, v36, 0, s7
+; GFX10-NEXT: v_cndmask_b32_e64 v0, v8, 0, s8
+; GFX10-NEXT: v_cndmask_b32_e64 v1, v9, 0x7ff80000, s8
+; GFX10-NEXT: v_cndmask_b32_e64 v8, v34, 0, s6
+; GFX10-NEXT: v_cndmask_b32_e64 v9, v35, 0x7ff80000, s6
+; GFX10-NEXT: v_cndmask_b32_e64 v11, v37, 0x7ff80000, s7
+; GFX10-NEXT: v_cndmask_b32_e64 v12, v38, 0, s9
+; GFX10-NEXT: v_cndmask_b32_e64 v13, v39, 0x7ff80000, s9
+; GFX10-NEXT: v_cndmask_b32_e64 v14, v52, 0, s10
+; GFX10-NEXT: v_cndmask_b32_e64 v15, v53, 0x7ff80000, s10
+; GFX10-NEXT: v_cndmask_b32_e64 v16, v50, 0, s11
+; GFX10-NEXT: v_cndmask_b32_e64 v17, v51, 0x7ff80000, s11
+; GFX10-NEXT: v_cndmask_b32_e64 v18, v48, 0, s12
+; GFX10-NEXT: v_cndmask_b32_e64 v19, v49, 0x7ff80000, s12
+; GFX10-NEXT: v_cndmask_b32_e64 v20, v54, 0, s13
+; GFX10-NEXT: v_cndmask_b32_e64 v21, v55, 0x7ff80000, s13
+; GFX10-NEXT: v_cndmask_b32_e64 v22, v64, 0, s14
+; GFX10-NEXT: v_cndmask_b32_e64 v23, v65, 0x7ff80000, s14
+; GFX10-NEXT: v_cndmask_b32_e64 v24, v68, 0, s15
+; GFX10-NEXT: v_cndmask_b32_e64 v25, v69, 0x7ff80000, s15
+; GFX10-NEXT: s_waitcnt vmcnt(5)
+; GFX10-NEXT: v_min_f64 v[70:71], v[28:29], v[2:3]
+; GFX10-NEXT: v_cmp_u_f64_e64 s17, v[28:29], v[2:3]
+; GFX10-NEXT: s_waitcnt vmcnt(3)
+; GFX10-NEXT: v_min_f64 v[66:67], v[26:27], v[4:5]
+; GFX10-NEXT: v_cmp_u_f64_e64 s16, v[26:27], v[4:5]
+; GFX10-NEXT: v_cndmask_b32_e64 v2, v82, 0, vcc_lo
; GFX10-NEXT: s_waitcnt vmcnt(0)
-; GFX10-NEXT: v_cmp_o_f64_e64 s18, v[30:31], v[86:87]
-; GFX10-NEXT: v_cndmask_b32_e32 v2, v2, v54, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v54, v53, v5, s7
-; GFX10-NEXT: v_cndmask_b32_e32 v3, v3, v55, vcc_lo
-; GFX10-NEXT: v_cmp_class_f64_e64 vcc_lo, v[50:51], 32
-; GFX10-NEXT: v_cndmask_b32_e64 v55, v51, v7, s9
-; GFX10-NEXT: v_cmp_o_f64_e64 s9, v[8:9], v[48:49]
-; GFX10-NEXT: v_cndmask_b32_e64 v101, 0x7ff80000, v54, s8
-; GFX10-NEXT: v_cmp_lt_f64_e64 s7, v[16:17], v[32:33]
-; GFX10-NEXT: v_cndmask_b32_e64 v6, v102, v6, s12
-; GFX10-NEXT: v_cndmask_b32_e64 v103, 0x7ff80000, v55, s11
-; GFX10-NEXT: v_cndmask_b32_e64 v4, v100, v4, s10
-; GFX10-NEXT: v_cndmask_b32_e64 v5, v101, v5, s10
-; GFX10-NEXT: v_cmp_class_f64_e64 s10, v[8:9], 32
-; GFX10-NEXT: v_cmp_o_f64_e64 s11, v[10:11], v[38:39]
-; GFX10-NEXT: v_cndmask_b32_e64 v7, v103, v7, s12
-; GFX10-NEXT: v_cmp_class_f64_e64 s12, v[48:49], 32
-; GFX10-NEXT: v_cndmask_b32_e64 v114, v38, v10, s4
-; GFX10-NEXT: v_cndmask_b32_e64 v115, v34, v14, s6
-; GFX10-NEXT: v_cmp_o_f64_e64 s8, v[16:17], v[32:33]
-; GFX10-NEXT: v_cndmask_b32_e64 v4, v4, v52, s14
-; GFX10-NEXT: v_cndmask_b32_e64 v5, v5, v53, s14
-; GFX10-NEXT: v_cmp_lt_f64_e64 s14, v[18:19], v[82:83]
-; GFX10-NEXT: v_cndmask_b32_e64 v52, 0, v115, s16
-; GFX10-NEXT: v_cndmask_b32_e32 v6, v6, v50, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v50, v49, v9, s13
-; GFX10-NEXT: v_cndmask_b32_e32 v7, v7, v51, vcc_lo
-; GFX10-NEXT: v_cmp_class_f64_e64 vcc_lo, v[38:39], 32
-; GFX10-NEXT: v_cndmask_b32_e64 v54, 0, v112, s9
-; GFX10-NEXT: v_cndmask_b32_e64 v51, v39, v11, s4
-; GFX10-NEXT: v_cndmask_b32_e64 v55, 0x7ff80000, v50, s9
-; GFX10-NEXT: v_cndmask_b32_e64 v50, 0, v113, s5
-; GFX10-NEXT: v_cmp_o_f64_e64 s4, v[18:19], v[82:83]
-; GFX10-NEXT: v_cndmask_b32_e64 v8, v54, v8, s10
-; GFX10-NEXT: v_cndmask_b32_e64 v64, 0, v114, s11
-; GFX10-NEXT: v_cndmask_b32_e64 v9, v55, v9, s10
-; GFX10-NEXT: v_cmp_class_f64_e64 s10, v[12:13], 32
-; GFX10-NEXT: v_cndmask_b32_e64 v65, 0x7ff80000, v51, s11
-; GFX10-NEXT: v_cndmask_b32_e64 v8, v8, v48, s12
-; GFX10-NEXT: v_cndmask_b32_e64 v10, v64, v10, s17
-; GFX10-NEXT: v_cndmask_b32_e64 v9, v9, v49, s12
-; GFX10-NEXT: v_cmp_class_f64_e64 s12, v[14:15], 32
-; GFX10-NEXT: v_cndmask_b32_e64 v11, v65, v11, s17
-; GFX10-NEXT: v_cndmask_b32_e64 v48, v37, v13, s15
-; GFX10-NEXT: v_cmp_class_f64_e64 s17, v[34:35], 32
-; GFX10-NEXT: v_cmp_lt_f64_e64 s9, v[20:21], v[66:67]
-; GFX10-NEXT: v_cmp_o_f64_e64 s11, v[20:21], v[66:67]
-; GFX10-NEXT: v_cndmask_b32_e64 v116, v32, v16, s7
-; GFX10-NEXT: v_cndmask_b32_e64 v51, 0x7ff80000, v48, s5
-; GFX10-NEXT: v_cndmask_b32_e32 v10, v10, v38, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e32 v11, v11, v39, vcc_lo
-; GFX10-NEXT: v_cmp_lt_f64_e32 vcc_lo, v[22:23], v[68:69]
-; GFX10-NEXT: v_cndmask_b32_e64 v38, v35, v15, s6
-; GFX10-NEXT: v_cndmask_b32_e64 v49, v82, v18, s14
-; GFX10-NEXT: v_cndmask_b32_e64 v48, 0, v116, s8
-; GFX10-NEXT: v_cmp_class_f64_e64 s13, v[36:37], 32
-; GFX10-NEXT: v_cmp_o_f64_e64 s5, v[22:23], v[68:69]
-; GFX10-NEXT: v_cndmask_b32_e64 v53, 0x7ff80000, v38, s16
-; GFX10-NEXT: v_cndmask_b32_e64 v12, v50, v12, s10
-; GFX10-NEXT: v_cndmask_b32_e64 v13, v51, v13, s10
-; GFX10-NEXT: v_cmp_class_f64_e64 s10, v[16:17], 32
-; GFX10-NEXT: v_cndmask_b32_e64 v38, 0, v49, s4
-; GFX10-NEXT: v_cndmask_b32_e64 v112, v83, v19, s14
-; GFX10-NEXT: v_cndmask_b32_e64 v14, v52, v14, s12
-; GFX10-NEXT: v_cndmask_b32_e64 v15, v53, v15, s12
-; GFX10-NEXT: v_cmp_class_f64_e64 s12, v[32:33], 32
-; GFX10-NEXT: v_cmp_class_f64_e64 s14, v[18:19], 32
-; GFX10-NEXT: v_cndmask_b32_e64 v114, v67, v21, s9
-; GFX10-NEXT: v_cndmask_b32_e64 v14, v14, v34, s17
-; GFX10-NEXT: v_cndmask_b32_e64 v15, v15, v35, s17
-; GFX10-NEXT: v_cndmask_b32_e64 v34, v33, v17, s7
-; GFX10-NEXT: v_cndmask_b32_e64 v39, 0x7ff80000, v112, s4
-; GFX10-NEXT: v_cmp_lt_f64_e64 s4, v[24:25], v[70:71]
-; GFX10-NEXT: v_cndmask_b32_e32 v113, v69, v23, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e32 v35, v68, v22, vcc_lo
-; GFX10-NEXT: v_cmp_class_f64_e64 vcc_lo, v[20:21], 32
-; GFX10-NEXT: v_cndmask_b32_e64 v49, 0x7ff80000, v34, s8
-; GFX10-NEXT: v_cndmask_b32_e64 v34, 0x7ff80000, v114, s11
-; GFX10-NEXT: v_cndmask_b32_e64 v12, v12, v36, s13
-; GFX10-NEXT: v_cndmask_b32_e64 v13, v13, v37, s13
-; GFX10-NEXT: v_cmp_class_f64_e64 s13, v[82:83], 32
-; GFX10-NEXT: v_cmp_o_f64_e64 s6, v[24:25], v[70:71]
-; GFX10-NEXT: v_cndmask_b32_e64 v16, v48, v16, s10
-; GFX10-NEXT: v_cndmask_b32_e64 v17, v49, v17, s10
-; GFX10-NEXT: v_cndmask_b32_e64 v36, 0x7ff80000, v113, s5
-; GFX10-NEXT: v_cndmask_b32_e64 v35, 0, v35, s5
-; GFX10-NEXT: v_cmp_lt_f64_e64 s7, v[26:27], v[80:81]
-; GFX10-NEXT: v_cndmask_b32_e64 v16, v16, v32, s12
-; GFX10-NEXT: v_cndmask_b32_e64 v32, v66, v20, s9
-; GFX10-NEXT: v_cndmask_b32_e64 v17, v17, v33, s12
-; GFX10-NEXT: v_cndmask_b32_e64 v18, v38, v18, s14
-; GFX10-NEXT: v_cndmask_b32_e64 v19, v39, v19, s14
-; GFX10-NEXT: v_cmp_o_f64_e64 s15, v[26:27], v[80:81]
-; GFX10-NEXT: v_cndmask_b32_e64 v33, 0, v32, s11
-; GFX10-NEXT: v_cndmask_b32_e64 v112, v71, v25, s4
-; GFX10-NEXT: v_cmp_lt_f64_e64 s16, v[28:29], v[84:85]
-; GFX10-NEXT: v_cmp_o_f64_e64 s8, v[28:29], v[84:85]
-; GFX10-NEXT: v_cndmask_b32_e32 v21, v34, v21, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e32 v20, v33, v20, vcc_lo
-; GFX10-NEXT: v_cmp_class_f64_e64 vcc_lo, v[22:23], 32
-; GFX10-NEXT: v_cmp_lt_f64_e64 s17, v[30:31], v[86:87]
-; GFX10-NEXT: v_cmp_class_f64_e64 s5, v[70:71], 32
-; GFX10-NEXT: v_cndmask_b32_e64 v18, v18, v82, s13
-; GFX10-NEXT: v_cndmask_b32_e64 v82, v70, v24, s4
-; GFX10-NEXT: v_cndmask_b32_e64 v19, v19, v83, s13
-; GFX10-NEXT: v_cndmask_b32_e64 v83, 0x7ff80000, v112, s6
-; GFX10-NEXT: v_cmp_class_f64_e64 s4, v[68:69], 32
-; GFX10-NEXT: v_cmp_eq_f64_e64 s9, 0, v[96:97]
-; GFX10-NEXT: v_cndmask_b32_e64 v82, 0, v82, s6
-; GFX10-NEXT: v_cndmask_b32_e64 v37, v81, v27, s7
-; GFX10-NEXT: v_cndmask_b32_e64 v32, v80, v26, s7
-; GFX10-NEXT: v_cmp_class_f64_e64 s6, v[80:81], 32
-; GFX10-NEXT: v_cmp_class_f64_e64 s7, v[84:85], 32
-; GFX10-NEXT: v_cmp_eq_f64_e64 s10, 0, v[98:99]
-; GFX10-NEXT: v_cndmask_b32_e64 v113, 0x7ff80000, v37, s15
-; GFX10-NEXT: v_cndmask_b32_e64 v112, 0, v32, s15
-; GFX10-NEXT: v_cmp_eq_f64_e64 s11, 0, v[100:101]
-; GFX10-NEXT: v_cndmask_b32_e64 v115, v85, v29, s16
-; GFX10-NEXT: v_cndmask_b32_e64 v114, v84, v28, s16
-; GFX10-NEXT: v_cmp_eq_f64_e64 s12, 0, v[102:103]
-; GFX10-NEXT: v_cmp_eq_f64_e64 s13, 0, v[54:55]
-; GFX10-NEXT: v_cndmask_b32_e32 v22, v35, v22, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e32 v23, v36, v23, vcc_lo
-; GFX10-NEXT: v_cmp_class_f64_e64 vcc_lo, v[24:25], 32
-; GFX10-NEXT: v_cndmask_b32_e64 v115, 0x7ff80000, v115, s8
-; GFX10-NEXT: v_cndmask_b32_e64 v114, 0, v114, s8
-; GFX10-NEXT: v_cndmask_b32_e64 v116, v87, v31, s17
-; GFX10-NEXT: v_cndmask_b32_e64 v32, v86, v30, s17
-; GFX10-NEXT: v_cmp_class_f64_e64 s8, v[86:87], 32
-; GFX10-NEXT: v_cmp_eq_f64_e64 s14, 0, v[64:65]
-; GFX10-NEXT: v_cmp_eq_f64_e64 s15, 0, v[50:51]
-; GFX10-NEXT: v_cndmask_b32_e64 v117, 0x7ff80000, v116, s18
-; GFX10-NEXT: v_cndmask_b32_e64 v116, 0, v32, s18
-; GFX10-NEXT: v_cmp_eq_f64_e64 s16, 0, v[52:53]
-; GFX10-NEXT: v_cmp_eq_f64_e64 s17, 0, v[48:49]
-; GFX10-NEXT: v_cmp_eq_f64_e64 s18, 0, v[38:39]
-; GFX10-NEXT: v_cmp_eq_f64_e64 s19, 0, v[33:34]
-; GFX10-NEXT: v_cmp_eq_f64_e64 s20, 0, v[35:36]
-; GFX10-NEXT: v_cmp_eq_f64_e64 s21, 0, v[82:83]
-; GFX10-NEXT: v_cmp_eq_f64_e64 s22, 0, v[112:113]
-; GFX10-NEXT: v_cmp_eq_f64_e64 s23, 0, v[114:115]
-; GFX10-NEXT: v_cmp_eq_f64_e64 s24, 0, v[116:117]
-; GFX10-NEXT: v_cndmask_b32_e64 v22, v22, v68, s4
-; GFX10-NEXT: v_cndmask_b32_e64 v23, v23, v69, s4
-; GFX10-NEXT: v_cndmask_b32_e64 v0, v96, v0, s9
-; GFX10-NEXT: v_cndmask_b32_e32 v24, v82, v24, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e32 v25, v83, v25, vcc_lo
-; GFX10-NEXT: v_cmp_class_f64_e64 vcc_lo, v[26:27], 32
-; GFX10-NEXT: v_cndmask_b32_e64 v2, v98, v2, s10
-; GFX10-NEXT: v_cndmask_b32_e64 v4, v100, v4, s11
-; GFX10-NEXT: v_cndmask_b32_e64 v24, v24, v70, s5
-; GFX10-NEXT: v_cndmask_b32_e64 v25, v25, v71, s5
-; GFX10-NEXT: v_cndmask_b32_e64 v6, v102, v6, s12
-; GFX10-NEXT: v_cndmask_b32_e64 v8, v54, v8, s13
-; GFX10-NEXT: v_cndmask_b32_e64 v10, v64, v10, s14
-; GFX10-NEXT: v_cndmask_b32_e64 v12, v50, v12, s15
-; GFX10-NEXT: v_cndmask_b32_e64 v14, v52, v14, s16
-; GFX10-NEXT: v_cndmask_b32_e64 v16, v48, v16, s17
-; GFX10-NEXT: v_cndmask_b32_e64 v18, v38, v18, s18
-; GFX10-NEXT: v_cndmask_b32_e64 v22, v35, v22, s20
-; GFX10-NEXT: v_cndmask_b32_e64 v24, v82, v24, s21
-; GFX10-NEXT: v_cndmask_b32_e64 v1, v97, v1, s9
-; GFX10-NEXT: v_cndmask_b32_e64 v3, v99, v3, s10
-; GFX10-NEXT: v_cndmask_b32_e64 v5, v101, v5, s11
-; GFX10-NEXT: v_cndmask_b32_e64 v7, v103, v7, s12
-; GFX10-NEXT: v_cndmask_b32_e64 v9, v55, v9, s13
-; GFX10-NEXT: v_cndmask_b32_e64 v11, v65, v11, s14
-; GFX10-NEXT: v_cndmask_b32_e64 v13, v51, v13, s15
-; GFX10-NEXT: v_cndmask_b32_e64 v15, v53, v15, s16
-; GFX10-NEXT: v_cndmask_b32_e32 v26, v112, v26, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e32 v27, v113, v27, vcc_lo
-; GFX10-NEXT: v_cmp_class_f64_e64 vcc_lo, v[28:29], 32
-; GFX10-NEXT: v_cndmask_b32_e64 v17, v49, v17, s17
-; GFX10-NEXT: v_cndmask_b32_e64 v19, v39, v19, s18
-; GFX10-NEXT: v_cndmask_b32_e64 v26, v26, v80, s6
-; GFX10-NEXT: v_cndmask_b32_e64 v27, v27, v81, s6
-; GFX10-NEXT: v_cndmask_b32_e64 v23, v36, v23, s20
-; GFX10-NEXT: v_cndmask_b32_e64 v25, v83, v25, s21
-; GFX10-NEXT: v_cndmask_b32_e64 v26, v112, v26, s22
-; GFX10-NEXT: v_cndmask_b32_e64 v27, v113, v27, s22
-; GFX10-NEXT: v_cndmask_b32_e32 v28, v114, v28, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e32 v29, v115, v29, vcc_lo
-; GFX10-NEXT: v_cmp_class_f64_e64 vcc_lo, v[30:31], 32
-; GFX10-NEXT: v_cndmask_b32_e64 v28, v28, v84, s7
-; GFX10-NEXT: v_cndmask_b32_e64 v29, v29, v85, s7
-; GFX10-NEXT: v_cndmask_b32_e64 v28, v114, v28, s23
-; GFX10-NEXT: v_cndmask_b32_e64 v29, v115, v29, s23
-; GFX10-NEXT: v_cndmask_b32_e32 v30, v116, v30, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e32 v31, v117, v31, vcc_lo
-; GFX10-NEXT: v_cmp_class_f64_e64 vcc_lo, v[66:67], 32
-; GFX10-NEXT: v_cndmask_b32_e64 v30, v30, v86, s8
-; GFX10-NEXT: v_cndmask_b32_e64 v31, v31, v87, s8
-; GFX10-NEXT: v_cndmask_b32_e64 v30, v116, v30, s24
-; GFX10-NEXT: v_cndmask_b32_e64 v31, v117, v31, s24
-; GFX10-NEXT: v_cndmask_b32_e32 v20, v20, v66, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e32 v21, v21, v67, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v20, v33, v20, s19
-; GFX10-NEXT: v_cndmask_b32_e64 v21, v34, v21, s19
+; GFX10-NEXT: v_min_f64 v[80:81], v[30:31], v[6:7]
+; GFX10-NEXT: v_cmp_u_f64_e64 s18, v[30:31], v[6:7]
+; GFX10-NEXT: v_cndmask_b32_e64 v3, v83, 0x7ff80000, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e64 v4, v84, 0, s4
+; GFX10-NEXT: v_cndmask_b32_e64 v5, v85, 0x7ff80000, s4
+; GFX10-NEXT: v_cndmask_b32_e64 v6, v32, 0, s5
+; GFX10-NEXT: v_cndmask_b32_e64 v7, v33, 0x7ff80000, s5
+; GFX10-NEXT: v_cndmask_b32_e64 v28, v70, 0, s17
+; GFX10-NEXT: v_cndmask_b32_e64 v29, v71, 0x7ff80000, s17
+; GFX10-NEXT: v_cndmask_b32_e64 v26, v66, 0, s16
+; GFX10-NEXT: v_cndmask_b32_e64 v27, v67, 0x7ff80000, s16
+; GFX10-NEXT: v_cndmask_b32_e64 v30, v80, 0, s18
+; GFX10-NEXT: v_cndmask_b32_e64 v31, v81, 0x7ff80000, s18
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_minimum_v16f64:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_clause 0x1f
-; GFX11-NEXT: scratch_load_b32 v87, off, s32 offset:8
-; GFX11-NEXT: scratch_load_b32 v86, off, s32 offset:4
-; GFX11-NEXT: scratch_load_b32 v85, off, s32 offset:16
-; GFX11-NEXT: scratch_load_b32 v84, off, s32 offset:12
-; GFX11-NEXT: scratch_load_b32 v33, off, s32 offset:24
-; GFX11-NEXT: scratch_load_b32 v32, off, s32 offset:20
-; GFX11-NEXT: scratch_load_b32 v35, off, s32 offset:32
-; GFX11-NEXT: scratch_load_b32 v34, off, s32 offset:28
-; GFX11-NEXT: scratch_load_b32 v37, off, s32 offset:40
-; GFX11-NEXT: scratch_load_b32 v36, off, s32 offset:36
-; GFX11-NEXT: scratch_load_b32 v39, off, s32 offset:48
-; GFX11-NEXT: scratch_load_b32 v38, off, s32 offset:44
-; GFX11-NEXT: scratch_load_b32 v49, off, s32 offset:56
-; GFX11-NEXT: scratch_load_b32 v48, off, s32 offset:52
-; GFX11-NEXT: scratch_load_b32 v51, off, s32 offset:64
-; GFX11-NEXT: scratch_load_b32 v50, off, s32 offset:60
-; GFX11-NEXT: scratch_load_b32 v53, off, s32 offset:72
-; GFX11-NEXT: scratch_load_b32 v52, off, s32 offset:68
-; GFX11-NEXT: scratch_load_b32 v55, off, s32 offset:80
-; GFX11-NEXT: scratch_load_b32 v54, off, s32 offset:76
-; GFX11-NEXT: scratch_load_b32 v65, off, s32 offset:88
-; GFX11-NEXT: scratch_load_b32 v64, off, s32 offset:84
-; GFX11-NEXT: scratch_load_b32 v67, off, s32 offset:96
-; GFX11-NEXT: scratch_load_b32 v66, off, s32 offset:92
-; GFX11-NEXT: scratch_load_b32 v69, off, s32 offset:104
-; GFX11-NEXT: scratch_load_b32 v68, off, s32 offset:100
-; GFX11-NEXT: scratch_load_b32 v71, off, s32 offset:112
-; GFX11-NEXT: scratch_load_b32 v70, off, s32 offset:108
-; GFX11-NEXT: scratch_load_b32 v81, off, s32 offset:120
-; GFX11-NEXT: scratch_load_b32 v80, off, s32 offset:116
; GFX11-NEXT: scratch_load_b32 v31, off, s32
-; GFX11-NEXT: scratch_load_b32 v83, off, s32 offset:128
-; GFX11-NEXT: scratch_load_b32 v82, off, s32 offset:124
-; GFX11-NEXT: s_waitcnt vmcnt(31)
-; GFX11-NEXT: v_cmp_lt_f64_e64 s9, v[0:1], v[86:87]
-; GFX11-NEXT: v_cmp_o_f64_e64 s11, v[0:1], v[86:87]
-; GFX11-NEXT: s_waitcnt vmcnt(29)
-; GFX11-NEXT: v_cmp_lt_f64_e64 s10, v[2:3], v[84:85]
-; GFX11-NEXT: v_cmp_class_f64_e64 s14, v[86:87], 32
-; GFX11-NEXT: s_waitcnt vmcnt(27)
-; GFX11-NEXT: v_cmp_lt_f64_e64 s0, v[4:5], v[32:33]
-; GFX11-NEXT: v_cmp_o_f64_e32 vcc_lo, v[4:5], v[32:33]
-; GFX11-NEXT: s_waitcnt vmcnt(25)
-; GFX11-NEXT: v_cmp_lt_f64_e64 s2, v[6:7], v[34:35]
-; GFX11-NEXT: v_cmp_o_f64_e64 s12, v[2:3], v[84:85]
-; GFX11-NEXT: v_cmp_o_f64_e64 s1, v[6:7], v[34:35]
-; GFX11-NEXT: s_waitcnt vmcnt(23)
-; GFX11-NEXT: v_cmp_lt_f64_e64 s4, v[8:9], v[36:37]
-; GFX11-NEXT: v_cmp_o_f64_e64 s3, v[8:9], v[36:37]
-; GFX11-NEXT: v_cmp_class_f64_e64 s16, v[84:85], 32
-; GFX11-NEXT: s_waitcnt vmcnt(21)
-; GFX11-NEXT: v_cmp_lt_f64_e64 s6, v[10:11], v[38:39]
-; GFX11-NEXT: v_cmp_o_f64_e64 s5, v[10:11], v[38:39]
-; GFX11-NEXT: s_waitcnt vmcnt(19)
-; GFX11-NEXT: v_cmp_lt_f64_e64 s8, v[12:13], v[48:49]
-; GFX11-NEXT: v_cmp_o_f64_e64 s7, v[12:13], v[48:49]
-; GFX11-NEXT: s_waitcnt vmcnt(17)
-; GFX11-NEXT: v_cmp_lt_f64_e64 s13, v[14:15], v[50:51]
-; GFX11-NEXT: s_waitcnt vmcnt(15)
-; GFX11-NEXT: v_cmp_o_f64_e64 s15, v[16:17], v[52:53]
-; GFX11-NEXT: s_waitcnt vmcnt(13)
-; GFX11-NEXT: v_cmp_lt_f64_e64 s17, v[18:19], v[54:55]
-; GFX11-NEXT: v_cmp_o_f64_e64 s18, v[18:19], v[54:55]
-; GFX11-NEXT: s_waitcnt vmcnt(11)
-; GFX11-NEXT: v_cmp_lt_f64_e64 s19, v[20:21], v[64:65]
-; GFX11-NEXT: v_cmp_o_f64_e64 s20, v[20:21], v[64:65]
-; GFX11-NEXT: s_waitcnt vmcnt(9)
-; GFX11-NEXT: v_cmp_lt_f64_e64 s21, v[22:23], v[66:67]
-; GFX11-NEXT: v_cmp_o_f64_e64 s22, v[22:23], v[66:67]
-; GFX11-NEXT: s_waitcnt vmcnt(7)
-; GFX11-NEXT: v_cmp_lt_f64_e64 s23, v[24:25], v[68:69]
-; GFX11-NEXT: v_cmp_o_f64_e64 s24, v[24:25], v[68:69]
-; GFX11-NEXT: s_waitcnt vmcnt(5)
-; GFX11-NEXT: v_cmp_lt_f64_e64 s25, v[26:27], v[70:71]
-; GFX11-NEXT: v_cmp_o_f64_e64 s26, v[26:27], v[70:71]
-; GFX11-NEXT: s_waitcnt vmcnt(3)
-; GFX11-NEXT: v_cmp_lt_f64_e64 s27, v[28:29], v[80:81]
-; GFX11-NEXT: v_cmp_o_f64_e64 s28, v[28:29], v[80:81]
+; GFX11-NEXT: scratch_load_b32 v33, off, s32 offset:8
+; GFX11-NEXT: scratch_load_b32 v32, off, s32 offset:4
+; GFX11-NEXT: scratch_load_b32 v35, off, s32 offset:16
+; GFX11-NEXT: scratch_load_b32 v34, off, s32 offset:12
+; GFX11-NEXT: scratch_load_b32 v37, off, s32 offset:24
+; GFX11-NEXT: scratch_load_b32 v36, off, s32 offset:20
+; GFX11-NEXT: scratch_load_b32 v39, off, s32 offset:32
+; GFX11-NEXT: scratch_load_b32 v38, off, s32 offset:28
+; GFX11-NEXT: scratch_load_b32 v49, off, s32 offset:40
+; GFX11-NEXT: scratch_load_b32 v48, off, s32 offset:36
+; GFX11-NEXT: scratch_load_b32 v51, off, s32 offset:48
+; GFX11-NEXT: scratch_load_b32 v50, off, s32 offset:44
+; GFX11-NEXT: scratch_load_b32 v53, off, s32 offset:56
+; GFX11-NEXT: scratch_load_b32 v52, off, s32 offset:52
+; GFX11-NEXT: scratch_load_b32 v55, off, s32 offset:64
+; GFX11-NEXT: scratch_load_b32 v54, off, s32 offset:60
+; GFX11-NEXT: scratch_load_b32 v65, off, s32 offset:72
+; GFX11-NEXT: scratch_load_b32 v64, off, s32 offset:68
+; GFX11-NEXT: scratch_load_b32 v67, off, s32 offset:80
+; GFX11-NEXT: scratch_load_b32 v66, off, s32 offset:76
+; GFX11-NEXT: scratch_load_b32 v69, off, s32 offset:88
+; GFX11-NEXT: scratch_load_b32 v68, off, s32 offset:84
+; GFX11-NEXT: scratch_load_b32 v71, off, s32 offset:96
+; GFX11-NEXT: scratch_load_b32 v70, off, s32 offset:92
+; GFX11-NEXT: scratch_load_b32 v81, off, s32 offset:104
+; GFX11-NEXT: scratch_load_b32 v80, off, s32 offset:100
+; GFX11-NEXT: scratch_load_b32 v83, off, s32 offset:112
+; GFX11-NEXT: scratch_load_b32 v82, off, s32 offset:108
+; GFX11-NEXT: scratch_load_b32 v85, off, s32 offset:120
+; GFX11-NEXT: scratch_load_b32 v84, off, s32 offset:116
+; GFX11-NEXT: scratch_load_b32 v87, off, s32 offset:128
+; GFX11-NEXT: scratch_load_b32 v86, off, s32 offset:124
+; GFX11-NEXT: s_waitcnt vmcnt(30)
+; GFX11-NEXT: v_min_f64 v[96:97], v[0:1], v[32:33]
+; GFX11-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[32:33]
+; GFX11-NEXT: s_waitcnt vmcnt(28)
+; GFX11-NEXT: v_min_f64 v[32:33], v[2:3], v[34:35]
+; GFX11-NEXT: v_cmp_u_f64_e64 s0, v[2:3], v[34:35]
+; GFX11-NEXT: s_waitcnt vmcnt(26)
+; GFX11-NEXT: v_min_f64 v[34:35], v[4:5], v[36:37]
+; GFX11-NEXT: v_cmp_u_f64_e64 s1, v[4:5], v[36:37]
+; GFX11-NEXT: s_waitcnt vmcnt(24)
+; GFX11-NEXT: v_min_f64 v[36:37], v[6:7], v[38:39]
+; GFX11-NEXT: v_cmp_u_f64_e64 s2, v[6:7], v[38:39]
+; GFX11-NEXT: s_waitcnt vmcnt(22)
+; GFX11-NEXT: v_min_f64 v[38:39], v[8:9], v[48:49]
+; GFX11-NEXT: v_cmp_u_f64_e64 s3, v[8:9], v[48:49]
+; GFX11-NEXT: s_waitcnt vmcnt(20)
+; GFX11-NEXT: v_min_f64 v[48:49], v[10:11], v[50:51]
+; GFX11-NEXT: v_cmp_u_f64_e64 s4, v[10:11], v[50:51]
+; GFX11-NEXT: s_waitcnt vmcnt(18)
+; GFX11-NEXT: v_min_f64 v[50:51], v[12:13], v[52:53]
+; GFX11-NEXT: v_cmp_u_f64_e64 s5, v[12:13], v[52:53]
+; GFX11-NEXT: s_waitcnt vmcnt(16)
+; GFX11-NEXT: v_min_f64 v[52:53], v[14:15], v[54:55]
+; GFX11-NEXT: v_cmp_u_f64_e64 s6, v[14:15], v[54:55]
+; GFX11-NEXT: s_waitcnt vmcnt(14)
+; GFX11-NEXT: v_min_f64 v[54:55], v[16:17], v[64:65]
+; GFX11-NEXT: v_cmp_u_f64_e64 s7, v[16:17], v[64:65]
+; GFX11-NEXT: s_waitcnt vmcnt(12)
+; GFX11-NEXT: v_min_f64 v[64:65], v[18:19], v[66:67]
+; GFX11-NEXT: v_cmp_u_f64_e64 s8, v[18:19], v[66:67]
+; GFX11-NEXT: s_waitcnt vmcnt(10)
+; GFX11-NEXT: v_min_f64 v[66:67], v[20:21], v[68:69]
+; GFX11-NEXT: v_cmp_u_f64_e64 s9, v[20:21], v[68:69]
+; GFX11-NEXT: s_waitcnt vmcnt(8)
+; GFX11-NEXT: v_min_f64 v[68:69], v[22:23], v[70:71]
+; GFX11-NEXT: v_cmp_u_f64_e64 s10, v[22:23], v[70:71]
+; GFX11-NEXT: s_waitcnt vmcnt(6)
+; GFX11-NEXT: v_min_f64 v[70:71], v[24:25], v[80:81]
+; GFX11-NEXT: v_cmp_u_f64_e64 s11, v[24:25], v[80:81]
+; GFX11-NEXT: s_waitcnt vmcnt(4)
+; GFX11-NEXT: v_min_f64 v[80:81], v[26:27], v[82:83]
+; GFX11-NEXT: v_cmp_u_f64_e64 s12, v[26:27], v[82:83]
+; GFX11-NEXT: s_waitcnt vmcnt(2)
+; GFX11-NEXT: v_min_f64 v[82:83], v[28:29], v[84:85]
+; GFX11-NEXT: v_cmp_u_f64_e64 s13, v[28:29], v[84:85]
; GFX11-NEXT: s_waitcnt vmcnt(0)
-; GFX11-NEXT: v_cmp_lt_f64_e64 s29, v[30:31], v[82:83]
-; GFX11-NEXT: v_cmp_o_f64_e64 vcc_hi, v[30:31], v[82:83]
-; GFX11-NEXT: v_cndmask_b32_e64 v96, v87, v1, s9
-; GFX11-NEXT: v_cndmask_b32_e64 v101, v86, v0, s9
-; GFX11-NEXT: v_cndmask_b32_e64 v98, v85, v3, s10
-; GFX11-NEXT: v_cndmask_b32_e64 v103, v84, v2, s10
-; GFX11-NEXT: v_cmp_class_f64_e64 s10, v[0:1], 32
-; GFX11-NEXT: v_cndmask_b32_e64 v97, 0x7ff80000, v96, s11
-; GFX11-NEXT: v_cndmask_b32_e64 v96, 0, v101, s11
-; GFX11-NEXT: v_cndmask_b32_e64 v100, v33, v5, s0
-; GFX11-NEXT: v_cndmask_b32_e64 v102, v35, v7, s2
-; GFX11-NEXT: v_cndmask_b32_e64 v99, 0x7ff80000, v98, s12
-; GFX11-NEXT: v_cndmask_b32_e64 v98, 0, v103, s12
-; GFX11-NEXT: v_cmp_class_f64_e64 s11, v[2:3], 32
-; GFX11-NEXT: v_cndmask_b32_e32 v101, 0x7ff80000, v100, vcc_lo
-; GFX11-NEXT: v_cndmask_b32_e64 v103, 0x7ff80000, v102, s1
-; GFX11-NEXT: v_cndmask_b32_e64 v112, v37, v9, s4
-; GFX11-NEXT: v_cndmask_b32_e64 v114, v39, v11, s6
-; GFX11-NEXT: v_cndmask_b32_e64 v116, v49, v13, s8
-; GFX11-NEXT: v_cmp_o_f64_e64 s9, v[14:15], v[50:51]
-; GFX11-NEXT: v_cndmask_b32_e64 v118, v51, v15, s13
-; GFX11-NEXT: v_cndmask_b32_e64 v113, 0x7ff80000, v112, s3
-; GFX11-NEXT: v_cndmask_b32_e64 v115, 0x7ff80000, v114, s5
-; GFX11-NEXT: v_cndmask_b32_e64 v117, 0x7ff80000, v116, s7
-; GFX11-NEXT: v_cmp_lt_f64_e64 s12, v[16:17], v[52:53]
-; GFX11-NEXT: v_cndmask_b32_e64 v130, v55, v19, s17
-; GFX11-NEXT: v_cndmask_b32_e64 v132, v65, v21, s19
-; GFX11-NEXT: v_cndmask_b32_e64 v134, v67, v23, s21
-; GFX11-NEXT: v_cndmask_b32_e64 v144, v69, v25, s23
-; GFX11-NEXT: v_cndmask_b32_e64 v145, v71, v27, s25
-; GFX11-NEXT: v_cndmask_b32_e64 v131, 0x7ff80000, v130, s18
-; GFX11-NEXT: v_cndmask_b32_e64 v133, 0x7ff80000, v132, s20
-; GFX11-NEXT: v_cndmask_b32_e64 v135, 0x7ff80000, v134, s22
-; GFX11-NEXT: v_cndmask_b32_e64 v146, v81, v29, s27
-; GFX11-NEXT: v_cndmask_b32_e64 v148, v80, v28, s27
-; GFX11-NEXT: v_cndmask_b32_e64 v147, v83, v31, s29
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_3)
-; GFX11-NEXT: v_cndmask_b32_e64 v147, 0x7ff80000, v147, vcc_hi
-; GFX11-NEXT: v_cndmask_b32_e64 v0, v96, v0, s10
-; GFX11-NEXT: v_cndmask_b32_e64 v1, v97, v1, s10
-; GFX11-NEXT: v_cmp_class_f64_e64 s10, v[36:37], 32
-; GFX11-NEXT: v_cndmask_b32_e64 v0, v0, v86, s14
-; GFX11-NEXT: v_cndmask_b32_e64 v86, v32, v4, s0
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4)
-; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, v87, s14
-; GFX11-NEXT: v_cndmask_b32_e64 v87, v34, v6, s2
-; GFX11-NEXT: v_cndmask_b32_e64 v2, v98, v2, s11
-; GFX11-NEXT: v_cndmask_b32_e64 v3, v99, v3, s11
-; GFX11-NEXT: v_cndmask_b32_e32 v100, 0, v86, vcc_lo
-; GFX11-NEXT: v_cmp_class_f64_e64 vcc_lo, v[4:5], 32
-; GFX11-NEXT: v_cndmask_b32_e64 v102, 0, v87, s1
-; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, v84, s16
-; GFX11-NEXT: v_cndmask_b32_e64 v84, v36, v8, s4
-; GFX11-NEXT: v_cndmask_b32_e64 v86, v38, v10, s6
-; GFX11-NEXT: v_cndmask_b32_e64 v87, v48, v12, s8
-; GFX11-NEXT: v_cndmask_b32_e64 v119, 0x7ff80000, v118, s9
-; GFX11-NEXT: v_cndmask_b32_e64 v128, v53, v17, s12
-; GFX11-NEXT: v_cndmask_b32_e64 v112, 0, v84, s3
-; GFX11-NEXT: v_cndmask_b32_e64 v114, 0, v86, s5
-; GFX11-NEXT: v_cndmask_b32_e64 v116, 0, v87, s7
-; GFX11-NEXT: v_cndmask_b32_e64 v84, v50, v14, s13
-; GFX11-NEXT: v_cndmask_b32_e64 v129, 0x7ff80000, v128, s15
-; GFX11-NEXT: v_cndmask_b32_e64 v86, v52, v16, s12
-; GFX11-NEXT: v_cndmask_b32_e64 v87, v54, v18, s17
-; GFX11-NEXT: v_cndmask_b32_e64 v3, v3, v85, s16
-; GFX11-NEXT: v_cndmask_b32_e64 v118, 0, v84, s9
-; GFX11-NEXT: v_cndmask_b32_e64 v84, v64, v20, s19
-; GFX11-NEXT: v_cndmask_b32_e64 v128, 0, v86, s15
-; GFX11-NEXT: v_cndmask_b32_e64 v130, 0, v87, s18
-; GFX11-NEXT: v_cndmask_b32_e64 v86, v66, v22, s21
-; GFX11-NEXT: v_cndmask_b32_e64 v85, 0x7ff80000, v144, s24
-; GFX11-NEXT: v_cndmask_b32_e64 v132, 0, v84, s20
-; GFX11-NEXT: v_cndmask_b32_e64 v87, v68, v24, s23
-; GFX11-NEXT: v_cndmask_b32_e64 v144, v70, v26, s25
-; GFX11-NEXT: v_cndmask_b32_e64 v134, 0, v86, s22
-; GFX11-NEXT: v_cmp_class_f64_e64 s0, v[68:69], 32
-; GFX11-NEXT: v_cmp_class_f64_e64 s1, v[70:71], 32
-; GFX11-NEXT: v_cndmask_b32_e64 v84, 0, v87, s24
-; GFX11-NEXT: v_cndmask_b32_e64 v87, 0x7ff80000, v145, s26
-; GFX11-NEXT: v_cndmask_b32_e64 v86, 0, v144, s26
-; GFX11-NEXT: v_cndmask_b32_e64 v145, 0x7ff80000, v146, s28
-; GFX11-NEXT: v_cndmask_b32_e64 v144, 0, v148, s28
-; GFX11-NEXT: v_cndmask_b32_e64 v146, v82, v30, s29
-; GFX11-NEXT: v_cmp_class_f64_e64 s2, v[80:81], 32
-; GFX11-NEXT: v_cmp_class_f64_e64 s3, v[82:83], 32
-; GFX11-NEXT: v_cmp_class_f64_e64 s6, v[32:33], 32
-; GFX11-NEXT: v_cmp_class_f64_e64 s8, v[34:35], 32
-; GFX11-NEXT: v_dual_cndmask_b32 v5, v101, v5 :: v_dual_cndmask_b32 v4, v100, v4
-; GFX11-NEXT: v_cmp_class_f64_e64 vcc_lo, v[6:7], 32
-; GFX11-NEXT: v_cndmask_b32_e64 v146, 0, v146, vcc_hi
-; GFX11-NEXT: v_cmp_class_f64_e64 s12, v[38:39], 32
-; GFX11-NEXT: v_cmp_class_f64_e64 s14, v[48:49], 32
-; GFX11-NEXT: v_cmp_class_f64_e64 s16, v[50:51], 32
-; GFX11-NEXT: v_cmp_class_f64_e64 s18, v[52:53], 32
-; GFX11-NEXT: v_cmp_class_f64_e64 s20, v[54:55], 32
-; GFX11-NEXT: v_cmp_class_f64_e64 s21, v[64:65], 32
-; GFX11-NEXT: v_cmp_eq_f64_e64 s4, 0, v[96:97]
-; GFX11-NEXT: v_cmp_eq_f64_e64 s5, 0, v[98:99]
-; GFX11-NEXT: v_cmp_eq_f64_e64 s7, 0, v[100:101]
-; GFX11-NEXT: v_cmp_eq_f64_e64 s9, 0, v[102:103]
-; GFX11-NEXT: v_cmp_eq_f64_e64 s11, 0, v[112:113]
-; GFX11-NEXT: v_cmp_eq_f64_e64 s13, 0, v[114:115]
-; GFX11-NEXT: v_cmp_eq_f64_e64 s15, 0, v[116:117]
-; GFX11-NEXT: v_cmp_eq_f64_e64 s17, 0, v[118:119]
-; GFX11-NEXT: v_cmp_eq_f64_e64 s19, 0, v[128:129]
-; GFX11-NEXT: v_cmp_eq_f64_e64 s22, 0, v[130:131]
-; GFX11-NEXT: v_cmp_eq_f64_e64 s23, 0, v[132:133]
-; GFX11-NEXT: v_cmp_eq_f64_e64 s24, 0, v[134:135]
-; GFX11-NEXT: v_cmp_eq_f64_e64 s25, 0, v[84:85]
-; GFX11-NEXT: v_cmp_eq_f64_e64 s26, 0, v[86:87]
-; GFX11-NEXT: v_cmp_eq_f64_e64 s27, 0, v[144:145]
-; GFX11-NEXT: v_cmp_eq_f64_e64 s28, 0, v[146:147]
-; GFX11-NEXT: v_cndmask_b32_e64 v5, v5, v33, s6
-; GFX11-NEXT: v_dual_cndmask_b32 v7, v103, v7 :: v_dual_cndmask_b32 v6, v102, v6
-; GFX11-NEXT: v_cmp_class_f64_e64 vcc_lo, v[8:9], 32
-; GFX11-NEXT: v_cndmask_b32_e64 v4, v4, v32, s6
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_4)
-; GFX11-NEXT: v_cndmask_b32_e64 v7, v7, v35, s8
-; GFX11-NEXT: v_cndmask_b32_e64 v0, v96, v0, s4
-; GFX11-NEXT: v_cndmask_b32_e64 v2, v98, v2, s5
-; GFX11-NEXT: v_cndmask_b32_e64 v4, v100, v4, s7
-; GFX11-NEXT: v_cndmask_b32_e64 v1, v97, v1, s4
-; GFX11-NEXT: v_cndmask_b32_e64 v3, v99, v3, s5
-; GFX11-NEXT: v_cndmask_b32_e64 v5, v101, v5, s7
-; GFX11-NEXT: v_cndmask_b32_e64 v7, v103, v7, s9
-; GFX11-NEXT: v_dual_cndmask_b32 v9, v113, v9 :: v_dual_cndmask_b32 v8, v112, v8
-; GFX11-NEXT: v_cmp_class_f64_e64 vcc_lo, v[10:11], 32
-; GFX11-NEXT: v_cndmask_b32_e64 v6, v6, v34, s8
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NEXT: v_cndmask_b32_e64 v9, v9, v37, s10
-; GFX11-NEXT: v_cndmask_b32_e64 v6, v102, v6, s9
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(VALU_DEP_3)
-; GFX11-NEXT: v_cndmask_b32_e64 v9, v113, v9, s11
-; GFX11-NEXT: v_dual_cndmask_b32 v11, v115, v11 :: v_dual_cndmask_b32 v10, v114, v10
-; GFX11-NEXT: v_cmp_class_f64_e64 vcc_lo, v[12:13], 32
-; GFX11-NEXT: v_cndmask_b32_e64 v8, v8, v36, s10
-; GFX11-NEXT: v_cndmask_b32_e64 v11, v11, v39, s12
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NEXT: v_cndmask_b32_e64 v8, v112, v8, s11
-; GFX11-NEXT: v_cndmask_b32_e64 v11, v115, v11, s13
-; GFX11-NEXT: v_dual_cndmask_b32 v13, v117, v13 :: v_dual_cndmask_b32 v12, v116, v12
-; GFX11-NEXT: v_cmp_class_f64_e64 vcc_lo, v[14:15], 32
-; GFX11-NEXT: v_cndmask_b32_e64 v10, v10, v38, s12
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NEXT: v_cndmask_b32_e64 v13, v13, v49, s14
-; GFX11-NEXT: v_cndmask_b32_e64 v10, v114, v10, s13
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(VALU_DEP_3)
-; GFX11-NEXT: v_cndmask_b32_e64 v13, v117, v13, s15
-; GFX11-NEXT: v_dual_cndmask_b32 v15, v119, v15 :: v_dual_cndmask_b32 v14, v118, v14
-; GFX11-NEXT: v_cmp_class_f64_e64 vcc_lo, v[16:17], 32
-; GFX11-NEXT: v_cndmask_b32_e64 v12, v12, v48, s14
-; GFX11-NEXT: v_cndmask_b32_e64 v15, v15, v51, s16
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NEXT: v_cndmask_b32_e64 v12, v116, v12, s15
-; GFX11-NEXT: v_cndmask_b32_e64 v15, v119, v15, s17
-; GFX11-NEXT: v_dual_cndmask_b32 v17, v129, v17 :: v_dual_cndmask_b32 v16, v128, v16
-; GFX11-NEXT: v_cmp_class_f64_e64 vcc_lo, v[18:19], 32
-; GFX11-NEXT: v_cndmask_b32_e64 v14, v14, v50, s16
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NEXT: v_cndmask_b32_e64 v17, v17, v53, s18
-; GFX11-NEXT: v_cndmask_b32_e64 v14, v118, v14, s17
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(VALU_DEP_3)
-; GFX11-NEXT: v_cndmask_b32_e64 v17, v129, v17, s19
-; GFX11-NEXT: v_dual_cndmask_b32 v19, v131, v19 :: v_dual_cndmask_b32 v18, v130, v18
-; GFX11-NEXT: v_cmp_class_f64_e64 vcc_lo, v[20:21], 32
-; GFX11-NEXT: v_cndmask_b32_e64 v16, v16, v52, s18
-; GFX11-NEXT: v_cndmask_b32_e64 v19, v19, v55, s20
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NEXT: v_cndmask_b32_e64 v16, v128, v16, s19
-; GFX11-NEXT: v_cndmask_b32_e64 v19, v131, v19, s22
-; GFX11-NEXT: v_dual_cndmask_b32 v21, v133, v21 :: v_dual_cndmask_b32 v20, v132, v20
-; GFX11-NEXT: v_cmp_class_f64_e64 vcc_lo, v[22:23], 32
-; GFX11-NEXT: v_cndmask_b32_e64 v18, v18, v54, s20
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NEXT: v_cndmask_b32_e64 v21, v21, v65, s21
-; GFX11-NEXT: v_cndmask_b32_e64 v18, v130, v18, s22
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(VALU_DEP_1)
-; GFX11-NEXT: v_cndmask_b32_e64 v21, v133, v21, s23
-; GFX11-NEXT: v_dual_cndmask_b32 v23, v135, v23 :: v_dual_cndmask_b32 v22, v134, v22
-; GFX11-NEXT: v_cmp_class_f64_e64 vcc_lo, v[24:25], 32
-; GFX11-NEXT: v_cndmask_b32_e64 v20, v20, v64, s21
-; GFX11-NEXT: v_cndmask_b32_e64 v20, v132, v20, s23
-; GFX11-NEXT: v_dual_cndmask_b32 v25, v85, v25 :: v_dual_cndmask_b32 v24, v84, v24
-; GFX11-NEXT: v_cmp_class_f64_e64 vcc_lo, v[26:27], 32
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT: v_cndmask_b32_e64 v25, v25, v69, s0
-; GFX11-NEXT: v_cndmask_b32_e64 v25, v85, v25, s25
-; GFX11-NEXT: v_dual_cndmask_b32 v27, v87, v27 :: v_dual_cndmask_b32 v26, v86, v26
-; GFX11-NEXT: v_cmp_class_f64_e64 vcc_lo, v[28:29], 32
-; GFX11-NEXT: v_cndmask_b32_e64 v24, v24, v68, s0
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NEXT: v_cndmask_b32_e64 v27, v27, v71, s1
-; GFX11-NEXT: v_cndmask_b32_e64 v24, v84, v24, s25
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(VALU_DEP_3)
-; GFX11-NEXT: v_cndmask_b32_e64 v27, v87, v27, s26
-; GFX11-NEXT: v_dual_cndmask_b32 v29, v145, v29 :: v_dual_cndmask_b32 v28, v144, v28
-; GFX11-NEXT: v_cmp_class_f64_e64 vcc_lo, v[30:31], 32
-; GFX11-NEXT: v_cndmask_b32_e64 v26, v26, v70, s1
-; GFX11-NEXT: v_cndmask_b32_e64 v29, v29, v81, s2
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NEXT: v_cndmask_b32_e64 v26, v86, v26, s26
-; GFX11-NEXT: v_cndmask_b32_e64 v29, v145, v29, s27
-; GFX11-NEXT: v_dual_cndmask_b32 v31, v147, v31 :: v_dual_cndmask_b32 v30, v146, v30
-; GFX11-NEXT: v_cmp_class_f64_e64 vcc_lo, v[66:67], 32
-; GFX11-NEXT: v_cndmask_b32_e64 v28, v28, v80, s2
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NEXT: v_cndmask_b32_e64 v31, v31, v83, s3
-; GFX11-NEXT: v_cndmask_b32_e64 v28, v144, v28, s27
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_2)
-; GFX11-NEXT: v_cndmask_b32_e64 v31, v147, v31, s28
-; GFX11-NEXT: v_dual_cndmask_b32 v23, v23, v67 :: v_dual_cndmask_b32 v22, v22, v66
-; GFX11-NEXT: v_cndmask_b32_e64 v30, v30, v82, s3
-; GFX11-NEXT: v_cndmask_b32_e64 v23, v135, v23, s24
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NEXT: v_cndmask_b32_e64 v22, v134, v22, s24
-; GFX11-NEXT: v_cndmask_b32_e64 v30, v146, v30, s28
+; GFX11-NEXT: v_min_f64 v[84:85], v[30:31], v[86:87]
+; GFX11-NEXT: v_cmp_u_f64_e64 s14, v[30:31], v[86:87]
+; GFX11-NEXT: v_cndmask_b32_e64 v0, v96, 0, vcc_lo
+; GFX11-NEXT: v_cndmask_b32_e64 v1, v97, 0x7ff80000, vcc_lo
+; GFX11-NEXT: v_cndmask_b32_e64 v2, v32, 0, s0
+; GFX11-NEXT: v_cndmask_b32_e64 v3, v33, 0x7ff80000, s0
+; GFX11-NEXT: v_cndmask_b32_e64 v4, v34, 0, s1
+; GFX11-NEXT: v_cndmask_b32_e64 v5, v35, 0x7ff80000, s1
+; GFX11-NEXT: v_cndmask_b32_e64 v6, v36, 0, s2
+; GFX11-NEXT: v_cndmask_b32_e64 v7, v37, 0x7ff80000, s2
+; GFX11-NEXT: v_cndmask_b32_e64 v8, v38, 0, s3
+; GFX11-NEXT: v_cndmask_b32_e64 v9, v39, 0x7ff80000, s3
+; GFX11-NEXT: v_cndmask_b32_e64 v10, v48, 0, s4
+; GFX11-NEXT: v_cndmask_b32_e64 v11, v49, 0x7ff80000, s4
+; GFX11-NEXT: v_cndmask_b32_e64 v12, v50, 0, s5
+; GFX11-NEXT: v_cndmask_b32_e64 v13, v51, 0x7ff80000, s5
+; GFX11-NEXT: v_cndmask_b32_e64 v14, v52, 0, s6
+; GFX11-NEXT: v_cndmask_b32_e64 v15, v53, 0x7ff80000, s6
+; GFX11-NEXT: v_cndmask_b32_e64 v16, v54, 0, s7
+; GFX11-NEXT: v_cndmask_b32_e64 v17, v55, 0x7ff80000, s7
+; GFX11-NEXT: v_cndmask_b32_e64 v18, v64, 0, s8
+; GFX11-NEXT: v_cndmask_b32_e64 v19, v65, 0x7ff80000, s8
+; GFX11-NEXT: v_cndmask_b32_e64 v20, v66, 0, s9
+; GFX11-NEXT: v_cndmask_b32_e64 v21, v67, 0x7ff80000, s9
+; GFX11-NEXT: v_cndmask_b32_e64 v22, v68, 0, s10
+; GFX11-NEXT: v_cndmask_b32_e64 v23, v69, 0x7ff80000, s10
+; GFX11-NEXT: v_cndmask_b32_e64 v24, v70, 0, s11
+; GFX11-NEXT: v_cndmask_b32_e64 v25, v71, 0x7ff80000, s11
+; GFX11-NEXT: v_cndmask_b32_e64 v26, v80, 0, s12
+; GFX11-NEXT: v_cndmask_b32_e64 v27, v81, 0x7ff80000, s12
+; GFX11-NEXT: v_cndmask_b32_e64 v28, v82, 0, s13
+; GFX11-NEXT: v_cndmask_b32_e64 v29, v83, 0x7ff80000, s13
+; GFX11-NEXT: v_cndmask_b32_e64 v30, v84, 0, s14
+; GFX11-NEXT: v_cndmask_b32_e64 v31, v85, 0x7ff80000, s14
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_minimum_v16f64:
diff --git a/llvm/test/CodeGen/AMDGPU/occupancy-levels.ll b/llvm/test/CodeGen/AMDGPU/occupancy-levels.ll
index d87eb9711488..e0ccda1cbb9e 100644
--- a/llvm/test/CodeGen/AMDGPU/occupancy-levels.ll
+++ b/llvm/test/CodeGen/AMDGPU/occupancy-levels.ll
@@ -13,6 +13,8 @@
; RUN: llc -mtriple=amdgcn -mcpu=gfx1150 -mattr=+wavefrontsize64 < %s | FileCheck --check-prefixes=GCN,GFX10,GFX10W64,GFX1030,GFX1030W64 %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx1151 < %s | FileCheck --check-prefixes=GCN,GFX1100,GFX1100W32 %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx1151 -mattr=+wavefrontsize64 < %s | FileCheck --check-prefixes=GCN,GFX1100,GFX1100W64 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1152 < %s | FileCheck --check-prefixes=GCN,GFX1030,GFX1030W32 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1152 -mattr=+wavefrontsize64 < %s | FileCheck --check-prefixes=GCN,GFX1030,GFX1030W64 %s
; GCN-LABEL: {{^}}max_occupancy:
; GFX9: ; Occupancy: 10
diff --git a/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.mir b/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.mir
index 4ecce2842455..6dda1fe1f39d 100644
--- a/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.mir
+++ b/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.mir
@@ -282,3 +282,168 @@ body: |
%13:vreg_64 = REG_SEQUENCE %9, %subreg.sub0, %11, %subreg.sub1
FLAT_STORE_DWORD %13, %0.sub1, 0, 0, implicit $exec, implicit $flat_scr
...
+
+---
+# GCN-LABEL: name: diffoporder_add_global_atomic_cmpswap
+# GFX9: GLOBAL_ATOMIC_CMPSWAP %{{[0-9]+}}, %0, 1000, 0,
+# GFX9: GLOBAL_ATOMIC_CMPSWAP %{{[0-9]+}}, %0, 0, 0,
+
+# GFX8: GLOBAL_ATOMIC_CMPSWAP %{{[0-9]+}}, %0, 0, 0,
+# GFX8: GLOBAL_ATOMIC_CMPSWAP %{{[0-9]+}}, %0, 0, 0,
+
+name: diffoporder_add_global_atomic_cmpswap
+body: |
+ bb.0.entry:
+
+ %0:vreg_64 = COPY $vgpr0_vgpr1
+
+ %1:sgpr_32 = S_MOV_B32 4000
+ %2:vgpr_32, %3:sreg_64_xexec = V_ADD_CO_U32_e64 %0.sub0, %1, 0, implicit $exec
+ %4:vgpr_32, dead %5:sreg_64_xexec = V_ADDC_U32_e64 %0.sub1, 0, %3, 0, implicit $exec
+ %6:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %4, %subreg.sub1
+ GLOBAL_ATOMIC_CMPSWAP %6:vreg_64, %0:vreg_64, 0, 0, implicit $exec
+
+ %8:sgpr_32 = S_MOV_B32 3000
+ %9:vgpr_32, %10:sreg_64_xexec = V_ADD_CO_U32_e64 %0.sub0, %8, 0, implicit $exec
+ %11:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 %0.sub1, 0, %10, 0, implicit $exec
+ %13:vreg_64 = REG_SEQUENCE %9, %subreg.sub0, %11, %subreg.sub1
+ GLOBAL_ATOMIC_CMPSWAP %13:vreg_64, %0:vreg_64, 0, 0, implicit $exec
+...
+
+---
+# GCN-LABEL: name: diffoporder_add_flat_atomic_cmpswap
+# GFX9: FLAT_ATOMIC_CMPSWAP %{{[0-9]+}}, %0, 1000, 0,
+# GFX9: FLAT_ATOMIC_CMPSWAP %{{[0-9]+}}, %0, 0, 0,
+
+# GFX8: FLAT_ATOMIC_CMPSWAP %{{[0-9]+}}, %0, 0, 0,
+# GFX8: FLAT_ATOMIC_CMPSWAP %{{[0-9]+}}, %0, 0, 0,
+
+name: diffoporder_add_flat_atomic_cmpswap
+body: |
+ bb.0.entry:
+
+ %0:vreg_64 = COPY $vgpr0_vgpr1
+
+ %1:sgpr_32 = S_MOV_B32 4000
+ %2:vgpr_32, %3:sreg_64_xexec = V_ADD_CO_U32_e64 %0.sub0, %1, 0, implicit $exec
+ %4:vgpr_32, dead %5:sreg_64_xexec = V_ADDC_U32_e64 %0.sub1, 0, %3, 0, implicit $exec
+ %6:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %4, %subreg.sub1
+ FLAT_ATOMIC_CMPSWAP %6:vreg_64, %0:vreg_64, 0, 0, implicit $exec, implicit $flat_scr
+
+
+ %8:sgpr_32 = S_MOV_B32 3000
+ %9:vgpr_32, %10:sreg_64_xexec = V_ADD_CO_U32_e64 %0.sub0, %8, 0, implicit $exec
+ %11:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 %0.sub1, 0, %10, 0, implicit $exec
+ %13:vreg_64 = REG_SEQUENCE %9, %subreg.sub0, %11, %subreg.sub1
+ FLAT_ATOMIC_CMPSWAP %13:vreg_64, %0:vreg_64, 0, 0, implicit $exec, implicit $flat_scr
+...
+
+---
+# GCN-LABEL: name: diffoporder_add_global_atomic_add
+# GFX9: GLOBAL_ATOMIC_ADD %{{[0-9]+}}, %0.sub0, 1000, 0,
+# GFX9: GLOBAL_ATOMIC_ADD %{{[0-9]+}}, %0.sub0, 0, 0,
+
+# GFX8: GLOBAL_ATOMIC_ADD %{{[0-9]+}}, %0.sub0, 0, 0,
+# GFX8: GLOBAL_ATOMIC_ADD %{{[0-9]+}}, %0.sub0, 0, 0,
+
+name: diffoporder_add_global_atomic_add
+body: |
+ bb.0.entry:
+
+ %0:vreg_64 = COPY $vgpr0_vgpr1
+
+ %1:sgpr_32 = S_MOV_B32 4000
+ %2:vgpr_32, %3:sreg_64_xexec = V_ADD_CO_U32_e64 %0.sub0, %1, 0, implicit $exec
+ %4:vgpr_32, dead %5:sreg_64_xexec = V_ADDC_U32_e64 %0.sub1, 0, %3, 0, implicit $exec
+ %6:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %4, %subreg.sub1
+ GLOBAL_ATOMIC_ADD %6:vreg_64, %0.sub0, 0, 0, implicit $exec
+
+ %8:sgpr_32 = S_MOV_B32 3000
+ %9:vgpr_32, %10:sreg_64_xexec = V_ADD_CO_U32_e64 %0.sub0, %8, 0, implicit $exec
+ %11:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 %0.sub1, 0, %10, 0, implicit $exec
+ %13:vreg_64 = REG_SEQUENCE %9, %subreg.sub0, %11, %subreg.sub1
+ GLOBAL_ATOMIC_ADD %13:vreg_64, %0.sub0, 0, 0, implicit $exec
+...
+
+---
+# GCN-LABEL: name: diffoporder_add_flat_atomic_add
+# GFX9: FLAT_ATOMIC_ADD %{{[0-9]+}}, %0.sub0, 1000, 0,
+# GFX9: FLAT_ATOMIC_ADD %{{[0-9]+}}, %0.sub0, 0, 0,
+
+# GFX8: FLAT_ATOMIC_ADD %{{[0-9]+}}, %0.sub0, 0, 0,
+# GFX8: FLAT_ATOMIC_ADD %{{[0-9]+}}, %0.sub0, 0, 0,
+
+name: diffoporder_add_flat_atomic_add
+body: |
+ bb.0.entry:
+
+ %0:vreg_64 = COPY $vgpr0_vgpr1
+
+ %1:sgpr_32 = S_MOV_B32 4000
+ %2:vgpr_32, %3:sreg_64_xexec = V_ADD_CO_U32_e64 %0.sub0, %1, 0, implicit $exec
+ %4:vgpr_32, dead %5:sreg_64_xexec = V_ADDC_U32_e64 %0.sub1, 0, %3, 0, implicit $exec
+ %6:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %4, %subreg.sub1
+ FLAT_ATOMIC_ADD %6:vreg_64, %0.sub0, 0, 0, implicit $exec, implicit $flat_scr
+
+
+ %8:sgpr_32 = S_MOV_B32 3000
+ %9:vgpr_32, %10:sreg_64_xexec = V_ADD_CO_U32_e64 %0.sub0, %8, 0, implicit $exec
+ %11:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 %0.sub1, 0, %10, 0, implicit $exec
+ %13:vreg_64 = REG_SEQUENCE %9, %subreg.sub0, %11, %subreg.sub1
+ FLAT_ATOMIC_ADD %13:vreg_64, %0.sub0, 0, 0, implicit $exec, implicit $flat_scr
+...
+
+---
+# GCN-LABEL: name: diffoporder_add_global_atomic_add_rtn
+# GFX9: GLOBAL_ATOMIC_ADD_RTN %{{[0-9]+}}, %0.sub0, 1000, 0,
+# GFX9: GLOBAL_ATOMIC_ADD_RTN %{{[0-9]+}}, %0.sub0, 0, 0,
+
+# GFX8: GLOBAL_ATOMIC_ADD_RTN %{{[0-9]+}}, %0.sub0, 0, 0,
+# GFX8: GLOBAL_ATOMIC_ADD_RTN %{{[0-9]+}}, %0.sub0, 0, 0,
+
+name: diffoporder_add_global_atomic_add_rtn
+body: |
+ bb.0.entry:
+
+ %0:vreg_64 = COPY $vgpr0_vgpr1
+
+ %1:sgpr_32 = S_MOV_B32 4000
+ %2:vgpr_32, %3:sreg_64_xexec = V_ADD_CO_U32_e64 %0.sub0, %1, 0, implicit $exec
+ %4:vgpr_32, dead %5:sreg_64_xexec = V_ADDC_U32_e64 %0.sub1, 0, %3, 0, implicit $exec
+ %6:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %4, %subreg.sub1
+ %14:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN %6:vreg_64, %0.sub0, 0, 0, implicit $exec
+
+ %8:sgpr_32 = S_MOV_B32 3000
+ %9:vgpr_32, %10:sreg_64_xexec = V_ADD_CO_U32_e64 %0.sub0, %8, 0, implicit $exec
+ %11:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 %0.sub1, 0, %10, 0, implicit $exec
+ %13:vreg_64 = REG_SEQUENCE %9, %subreg.sub0, %11, %subreg.sub1
+ %15:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN %13:vreg_64, %0.sub0, 0, 0, implicit $exec
+...
+
+---
+# GCN-LABEL: name: diffoporder_add_flat_atomic_add_rtn
+# GFX9: FLAT_ATOMIC_ADD_RTN %{{[0-9]+}}, %0.sub0, 1000, 0,
+# GFX9: FLAT_ATOMIC_ADD_RTN %{{[0-9]+}}, %0.sub0, 0, 0,
+
+# GFX8: FLAT_ATOMIC_ADD_RTN %{{[0-9]+}}, %0.sub0, 0, 0,
+# GFX8: FLAT_ATOMIC_ADD_RTN %{{[0-9]+}}, %0.sub0, 0, 0,
+
+name: diffoporder_add_flat_atomic_add_rtn
+body: |
+ bb.0.entry:
+
+ %0:vreg_64 = COPY $vgpr0_vgpr1
+
+ %1:sgpr_32 = S_MOV_B32 4000
+ %2:vgpr_32, %3:sreg_64_xexec = V_ADD_CO_U32_e64 %0.sub0, %1, 0, implicit $exec
+ %4:vgpr_32, dead %5:sreg_64_xexec = V_ADDC_U32_e64 %0.sub1, 0, %3, 0, implicit $exec
+ %6:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %4, %subreg.sub1
+ %14:vgpr_32 = FLAT_ATOMIC_ADD_RTN %6:vreg_64, %0.sub0, 0, 0, implicit $exec, implicit $flat_scr
+
+
+ %8:sgpr_32 = S_MOV_B32 3000
+ %9:vgpr_32, %10:sreg_64_xexec = V_ADD_CO_U32_e64 %0.sub0, %8, 0, implicit $exec
+ %11:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 %0.sub1, 0, %10, 0, implicit $exec
+ %13:vreg_64 = REG_SEQUENCE %9, %subreg.sub0, %11, %subreg.sub1
+ %15:vgpr_32 = FLAT_ATOMIC_ADD_RTN %13:vreg_64, %0.sub0, 0, 0, implicit $exec, implicit $flat_scr
+...
diff --git a/llvm/test/CodeGen/AMDGPU/unsupported-calls.ll b/llvm/test/CodeGen/AMDGPU/unsupported-calls.ll
index fc00937e6c8a..721114ece56d 100644
--- a/llvm/test/CodeGen/AMDGPU/unsupported-calls.ll
+++ b/llvm/test/CodeGen/AMDGPU/unsupported-calls.ll
@@ -43,25 +43,6 @@ define i32 @test_tail_call(ptr addrspace(1) %out, ptr addrspace(1) %in) {
ret i32 %c
}
-declare void @external.varargs(i32, double, i64, ...)
-
-; GCN: error: <unknown>:0:0: in function test_call_varargs void (): unsupported call to variadic function external.varargs
-; R600: in function test_call_varargs{{.*}}: unsupported call to function external.varargs
-define void @test_call_varargs() {
- call void (i32, double, i64, ...) @external.varargs(i32 42, double 1.0, i64 12, i8 3, i16 1, i32 4, float 1.0, double 2.0)
- ret void
-}
-
-declare i32 @extern_variadic(...)
-
-; GCN: in function test_tail_call_bitcast_extern_variadic{{.*}}: unsupported required tail call to function extern_variadic
-; R600: in function test_tail_call_bitcast_extern_variadic{{.*}}: unsupported call to function extern_variadic
-define i32 @test_tail_call_bitcast_extern_variadic(<4 x float> %arg0, <4 x float> %arg1, i32 %arg2) {
- %add = fadd <4 x float> %arg0, %arg1
- %call = tail call i32 @extern_variadic(<4 x float> %add)
- ret i32 %call
-}
-
; R600: in function test_c_call{{.*}}: unsupported call to function defined_function
define amdgpu_ps i32 @test_c_call_from_shader() {
%call = call i32 @defined_function(i32 0)
diff --git a/llvm/test/CodeGen/ARM/neon_vabd.ll b/llvm/test/CodeGen/ARM/neon_vabd.ll
new file mode 100644
index 000000000000..14ad1a108a72
--- /dev/null
+++ b/llvm/test/CodeGen/ARM/neon_vabd.ll
@@ -0,0 +1,890 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s
+
+;
+; SABD
+;
+
+define <8 x i8> @sabd_8b(<8 x i8> %a, <8 x i8> %b) {
+; CHECK-LABEL: sabd_8b:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vmov d16, r2, r3
+; CHECK-NEXT: vmov d17, r0, r1
+; CHECK-NEXT: vsubl.s8 q8, d17, d16
+; CHECK-NEXT: vabs.s16 q8, q8
+; CHECK-NEXT: vmovn.i16 d16, q8
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: mov pc, lr
+ %a.sext = sext <8 x i8> %a to <8 x i16>
+ %b.sext = sext <8 x i8> %b to <8 x i16>
+ %sub = sub <8 x i16> %a.sext, %b.sext
+ %abs = call <8 x i16> @llvm.abs.v8i16(<8 x i16> %sub, i1 true)
+ %trunc = trunc <8 x i16> %abs to <8 x i8>
+ ret <8 x i8> %trunc
+}
+
+define <16 x i8> @sabd_16b(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: sabd_16b:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: mov r12, sp
+; CHECK-NEXT: vld1.64 {d16, d17}, [r12]
+; CHECK-NEXT: vmov d18, r2, r3
+; CHECK-NEXT: vmov d19, r0, r1
+; CHECK-NEXT: vsubl.s8 q10, d18, d17
+; CHECK-NEXT: vsubl.s8 q8, d19, d16
+; CHECK-NEXT: vabs.s16 q9, q10
+; CHECK-NEXT: vabs.s16 q8, q8
+; CHECK-NEXT: vmovn.i16 d19, q9
+; CHECK-NEXT: vmovn.i16 d18, q8
+; CHECK-NEXT: vmov r2, r3, d19
+; CHECK-NEXT: vmov r0, r1, d18
+; CHECK-NEXT: mov pc, lr
+ %a.sext = sext <16 x i8> %a to <16 x i16>
+ %b.sext = sext <16 x i8> %b to <16 x i16>
+ %sub = sub <16 x i16> %a.sext, %b.sext
+ %abs = call <16 x i16> @llvm.abs.v16i16(<16 x i16> %sub, i1 true)
+ %trunc = trunc <16 x i16> %abs to <16 x i8>
+ ret <16 x i8> %trunc
+}
+
+define <4 x i16> @sabd_4h(<4 x i16> %a, <4 x i16> %b) {
+; CHECK-LABEL: sabd_4h:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vmov d16, r2, r3
+; CHECK-NEXT: vmov d17, r0, r1
+; CHECK-NEXT: vsubl.s16 q8, d17, d16
+; CHECK-NEXT: vabs.s32 q8, q8
+; CHECK-NEXT: vmovn.i32 d16, q8
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: mov pc, lr
+ %a.sext = sext <4 x i16> %a to <4 x i32>
+ %b.sext = sext <4 x i16> %b to <4 x i32>
+ %sub = sub <4 x i32> %a.sext, %b.sext
+ %abs = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %sub, i1 true)
+ %trunc = trunc <4 x i32> %abs to <4 x i16>
+ ret <4 x i16> %trunc
+}
+
+define <4 x i16> @sabd_4h_promoted_ops(<4 x i8> %a, <4 x i8> %b) {
+; CHECK-LABEL: sabd_4h_promoted_ops:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vmov d16, r2, r3
+; CHECK-NEXT: vmov d17, r0, r1
+; CHECK-NEXT: vshl.i16 d16, d16, #8
+; CHECK-NEXT: vshl.i16 d17, d17, #8
+; CHECK-NEXT: vshr.s16 d16, d16, #8
+; CHECK-NEXT: vshr.s16 d17, d17, #8
+; CHECK-NEXT: vsub.i16 d16, d17, d16
+; CHECK-NEXT: vabs.s16 d16, d16
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: mov pc, lr
+ %a.sext = sext <4 x i8> %a to <4 x i16>
+ %b.sext = sext <4 x i8> %b to <4 x i16>
+ %sub = sub <4 x i16> %a.sext, %b.sext
+ %abs = call <4 x i16> @llvm.abs.v4i16(<4 x i16> %sub, i1 true)
+ ret <4 x i16> %abs
+}
+
+define <8 x i16> @sabd_8h(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: sabd_8h:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: mov r12, sp
+; CHECK-NEXT: vld1.64 {d16, d17}, [r12]
+; CHECK-NEXT: vmov d18, r2, r3
+; CHECK-NEXT: vmov d19, r0, r1
+; CHECK-NEXT: vsubl.s16 q10, d18, d17
+; CHECK-NEXT: vsubl.s16 q8, d19, d16
+; CHECK-NEXT: vabs.s32 q9, q10
+; CHECK-NEXT: vabs.s32 q8, q8
+; CHECK-NEXT: vmovn.i32 d19, q9
+; CHECK-NEXT: vmovn.i32 d18, q8
+; CHECK-NEXT: vmov r2, r3, d19
+; CHECK-NEXT: vmov r0, r1, d18
+; CHECK-NEXT: mov pc, lr
+ %a.sext = sext <8 x i16> %a to <8 x i32>
+ %b.sext = sext <8 x i16> %b to <8 x i32>
+ %sub = sub <8 x i32> %a.sext, %b.sext
+ %abs = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %sub, i1 true)
+ %trunc = trunc <8 x i32> %abs to <8 x i16>
+ ret <8 x i16> %trunc
+}
+
+define <8 x i16> @sabd_8h_promoted_ops(<8 x i8> %a, <8 x i8> %b) {
+; CHECK-LABEL: sabd_8h_promoted_ops:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vmov d16, r2, r3
+; CHECK-NEXT: vmov d17, r0, r1
+; CHECK-NEXT: vsubl.s8 q8, d17, d16
+; CHECK-NEXT: vabs.s16 q8, q8
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: vmov r2, r3, d17
+; CHECK-NEXT: mov pc, lr
+ %a.sext = sext <8 x i8> %a to <8 x i16>
+ %b.sext = sext <8 x i8> %b to <8 x i16>
+ %sub = sub <8 x i16> %a.sext, %b.sext
+ %abs = call <8 x i16> @llvm.abs.v8i16(<8 x i16> %sub, i1 true)
+ ret <8 x i16> %abs
+}
+
+define <2 x i32> @sabd_2s(<2 x i32> %a, <2 x i32> %b) {
+; CHECK-LABEL: sabd_2s:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vmov d16, r2, r3
+; CHECK-NEXT: vmov d17, r0, r1
+; CHECK-NEXT: vsubl.s32 q8, d17, d16
+; CHECK-NEXT: vshr.s64 q9, q8, #63
+; CHECK-NEXT: veor q8, q8, q9
+; CHECK-NEXT: vsub.i64 q8, q8, q9
+; CHECK-NEXT: vmovn.i64 d16, q8
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: mov pc, lr
+ %a.sext = sext <2 x i32> %a to <2 x i64>
+ %b.sext = sext <2 x i32> %b to <2 x i64>
+ %sub = sub <2 x i64> %a.sext, %b.sext
+ %abs = call <2 x i64> @llvm.abs.v2i64(<2 x i64> %sub, i1 true)
+ %trunc = trunc <2 x i64> %abs to <2 x i32>
+ ret <2 x i32> %trunc
+}
+
+define <2 x i32> @sabd_2s_promoted_ops(<2 x i16> %a, <2 x i16> %b) {
+; CHECK-LABEL: sabd_2s_promoted_ops:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vmov d16, r2, r3
+; CHECK-NEXT: vmov d17, r0, r1
+; CHECK-NEXT: vshl.i32 d16, d16, #16
+; CHECK-NEXT: vshl.i32 d17, d17, #16
+; CHECK-NEXT: vshr.s32 d16, d16, #16
+; CHECK-NEXT: vshr.s32 d17, d17, #16
+; CHECK-NEXT: vsub.i32 d16, d17, d16
+; CHECK-NEXT: vabs.s32 d16, d16
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: mov pc, lr
+ %a.sext = sext <2 x i16> %a to <2 x i32>
+ %b.sext = sext <2 x i16> %b to <2 x i32>
+ %sub = sub <2 x i32> %a.sext, %b.sext
+ %abs = call <2 x i32> @llvm.abs.v2i32(<2 x i32> %sub, i1 true)
+ ret <2 x i32> %abs
+}
+
+define <4 x i32> @sabd_4s(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: sabd_4s:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: mov r12, sp
+; CHECK-NEXT: vld1.64 {d16, d17}, [r12]
+; CHECK-NEXT: vmov d18, r2, r3
+; CHECK-NEXT: vmov d19, r0, r1
+; CHECK-NEXT: vsubl.s32 q10, d18, d17
+; CHECK-NEXT: vsubl.s32 q8, d19, d16
+; CHECK-NEXT: vshr.s64 q9, q10, #63
+; CHECK-NEXT: vshr.s64 q11, q8, #63
+; CHECK-NEXT: veor q10, q10, q9
+; CHECK-NEXT: veor q8, q8, q11
+; CHECK-NEXT: vsub.i64 q9, q10, q9
+; CHECK-NEXT: vsub.i64 q8, q8, q11
+; CHECK-NEXT: vmovn.i64 d19, q9
+; CHECK-NEXT: vmovn.i64 d18, q8
+; CHECK-NEXT: vmov r2, r3, d19
+; CHECK-NEXT: vmov r0, r1, d18
+; CHECK-NEXT: mov pc, lr
+ %a.sext = sext <4 x i32> %a to <4 x i64>
+ %b.sext = sext <4 x i32> %b to <4 x i64>
+ %sub = sub <4 x i64> %a.sext, %b.sext
+ %abs = call <4 x i64> @llvm.abs.v4i64(<4 x i64> %sub, i1 true)
+ %trunc = trunc <4 x i64> %abs to <4 x i32>
+ ret <4 x i32> %trunc
+}
+
+define <4 x i32> @sabd_4s_promoted_ops(<4 x i16> %a, <4 x i16> %b) {
+; CHECK-LABEL: sabd_4s_promoted_ops:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vmov d16, r2, r3
+; CHECK-NEXT: vmov d17, r0, r1
+; CHECK-NEXT: vsubl.s16 q8, d17, d16
+; CHECK-NEXT: vabs.s32 q8, q8
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: vmov r2, r3, d17
+; CHECK-NEXT: mov pc, lr
+ %a.sext = sext <4 x i16> %a to <4 x i32>
+ %b.sext = sext <4 x i16> %b to <4 x i32>
+ %sub = sub <4 x i32> %a.sext, %b.sext
+ %abs = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %sub, i1 true)
+ ret <4 x i32> %abs
+}
+
+define <2 x i64> @sabd_2d(<2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: sabd_2d:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: .save {r4, r5, r6, r7, r11, lr}
+; CHECK-NEXT: push {r4, r5, r6, r7, r11, lr}
+; CHECK-NEXT: add r12, sp, #24
+; CHECK-NEXT: asr r6, r3, #31
+; CHECK-NEXT: vld1.64 {d16, d17}, [r12]
+; CHECK-NEXT: vmov r12, lr, d17
+; CHECK-NEXT: vmov r7, r5, d16
+; CHECK-NEXT: subs r2, r2, r12
+; CHECK-NEXT: sbcs r3, r3, lr
+; CHECK-NEXT: sbcs r4, r6, lr, asr #31
+; CHECK-NEXT: sbc r6, r6, lr, asr #31
+; CHECK-NEXT: eor r2, r2, r6, asr #31
+; CHECK-NEXT: eor r3, r3, r6, asr #31
+; CHECK-NEXT: subs r2, r2, r6, asr #31
+; CHECK-NEXT: sbc r3, r3, r6, asr #31
+; CHECK-NEXT: subs r0, r0, r7
+; CHECK-NEXT: asr r6, r1, #31
+; CHECK-NEXT: sbcs r1, r1, r5
+; CHECK-NEXT: sbcs r7, r6, r5, asr #31
+; CHECK-NEXT: vmov.32 d17[0], r2
+; CHECK-NEXT: sbc r7, r6, r5, asr #31
+; CHECK-NEXT: eor r0, r0, r7, asr #31
+; CHECK-NEXT: subs r0, r0, r7, asr #31
+; CHECK-NEXT: vmov.32 d16[0], r0
+; CHECK-NEXT: eor r0, r1, r7, asr #31
+; CHECK-NEXT: sbc r0, r0, r7, asr #31
+; CHECK-NEXT: vmov.32 d17[1], r3
+; CHECK-NEXT: vmov.32 d16[1], r0
+; CHECK-NEXT: vmov r2, r3, d17
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: pop {r4, r5, r6, r7, r11, lr}
+; CHECK-NEXT: mov pc, lr
+ %a.sext = sext <2 x i64> %a to <2 x i128>
+ %b.sext = sext <2 x i64> %b to <2 x i128>
+ %sub = sub <2 x i128> %a.sext, %b.sext
+ %abs = call <2 x i128> @llvm.abs.v2i128(<2 x i128> %sub, i1 true)
+ %trunc = trunc <2 x i128> %abs to <2 x i64>
+ ret <2 x i64> %trunc
+}
+
+define <2 x i64> @sabd_2d_promoted_ops(<2 x i32> %a, <2 x i32> %b) {
+; CHECK-LABEL: sabd_2d_promoted_ops:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vmov d16, r2, r3
+; CHECK-NEXT: vmov d17, r0, r1
+; CHECK-NEXT: vsubl.s32 q8, d17, d16
+; CHECK-NEXT: vshr.s64 q9, q8, #63
+; CHECK-NEXT: veor q8, q8, q9
+; CHECK-NEXT: vsub.i64 q8, q8, q9
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: vmov r2, r3, d17
+; CHECK-NEXT: mov pc, lr
+ %a.sext = sext <2 x i32> %a to <2 x i64>
+ %b.sext = sext <2 x i32> %b to <2 x i64>
+ %sub = sub <2 x i64> %a.sext, %b.sext
+ %abs = call <2 x i64> @llvm.abs.v2i64(<2 x i64> %sub, i1 true)
+ ret <2 x i64> %abs
+}
+
+;
+; UABD
+;
+
+define <8 x i8> @uabd_8b(<8 x i8> %a, <8 x i8> %b) {
+; CHECK-LABEL: uabd_8b:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vmov d16, r2, r3
+; CHECK-NEXT: vmov d17, r0, r1
+; CHECK-NEXT: vabdl.u8 q8, d17, d16
+; CHECK-NEXT: vmovn.i16 d16, q8
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: mov pc, lr
+ %a.zext = zext <8 x i8> %a to <8 x i16>
+ %b.zext = zext <8 x i8> %b to <8 x i16>
+ %sub = sub <8 x i16> %a.zext, %b.zext
+ %abs = call <8 x i16> @llvm.abs.v8i16(<8 x i16> %sub, i1 true)
+ %trunc = trunc <8 x i16> %abs to <8 x i8>
+ ret <8 x i8> %trunc
+}
+
+define <16 x i8> @uabd_16b(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: uabd_16b:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: mov r12, sp
+; CHECK-NEXT: vld1.64 {d16, d17}, [r12]
+; CHECK-NEXT: vmov d18, r2, r3
+; CHECK-NEXT: vmov d19, r0, r1
+; CHECK-NEXT: vabdl.u8 q10, d18, d17
+; CHECK-NEXT: vabdl.u8 q8, d19, d16
+; CHECK-NEXT: vmovn.i16 d19, q10
+; CHECK-NEXT: vmovn.i16 d18, q8
+; CHECK-NEXT: vmov r2, r3, d19
+; CHECK-NEXT: vmov r0, r1, d18
+; CHECK-NEXT: mov pc, lr
+ %a.zext = zext <16 x i8> %a to <16 x i16>
+ %b.zext = zext <16 x i8> %b to <16 x i16>
+ %sub = sub <16 x i16> %a.zext, %b.zext
+ %abs = call <16 x i16> @llvm.abs.v16i16(<16 x i16> %sub, i1 true)
+ %trunc = trunc <16 x i16> %abs to <16 x i8>
+ ret <16 x i8> %trunc
+}
+
+define <4 x i16> @uabd_4h(<4 x i16> %a, <4 x i16> %b) {
+; CHECK-LABEL: uabd_4h:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vmov d16, r2, r3
+; CHECK-NEXT: vmov d17, r0, r1
+; CHECK-NEXT: vabdl.u16 q8, d17, d16
+; CHECK-NEXT: vmovn.i32 d16, q8
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: mov pc, lr
+ %a.zext = zext <4 x i16> %a to <4 x i32>
+ %b.zext = zext <4 x i16> %b to <4 x i32>
+ %sub = sub <4 x i32> %a.zext, %b.zext
+ %abs = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %sub, i1 true)
+ %trunc = trunc <4 x i32> %abs to <4 x i16>
+ ret <4 x i16> %trunc
+}
+
+define <4 x i16> @uabd_4h_promoted_ops(<4 x i8> %a, <4 x i8> %b) {
+; CHECK-LABEL: uabd_4h_promoted_ops:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vmov d16, r2, r3
+; CHECK-NEXT: vmov d17, r0, r1
+; CHECK-NEXT: vbic.i16 d16, #0xff00
+; CHECK-NEXT: vbic.i16 d17, #0xff00
+; CHECK-NEXT: vsub.i16 d16, d17, d16
+; CHECK-NEXT: vabs.s16 d16, d16
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: mov pc, lr
+ %a.zext = zext <4 x i8> %a to <4 x i16>
+ %b.zext = zext <4 x i8> %b to <4 x i16>
+ %sub = sub <4 x i16> %a.zext, %b.zext
+ %abs = call <4 x i16> @llvm.abs.v4i16(<4 x i16> %sub, i1 true)
+ ret <4 x i16> %abs
+}
+
+define <8 x i16> @uabd_8h(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: uabd_8h:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: mov r12, sp
+; CHECK-NEXT: vld1.64 {d16, d17}, [r12]
+; CHECK-NEXT: vmov d18, r2, r3
+; CHECK-NEXT: vmov d19, r0, r1
+; CHECK-NEXT: vabdl.u16 q10, d18, d17
+; CHECK-NEXT: vabdl.u16 q8, d19, d16
+; CHECK-NEXT: vmovn.i32 d19, q10
+; CHECK-NEXT: vmovn.i32 d18, q8
+; CHECK-NEXT: vmov r2, r3, d19
+; CHECK-NEXT: vmov r0, r1, d18
+; CHECK-NEXT: mov pc, lr
+ %a.zext = zext <8 x i16> %a to <8 x i32>
+ %b.zext = zext <8 x i16> %b to <8 x i32>
+ %sub = sub <8 x i32> %a.zext, %b.zext
+ %abs = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %sub, i1 true)
+ %trunc = trunc <8 x i32> %abs to <8 x i16>
+ ret <8 x i16> %trunc
+}
+
+define <8 x i16> @uabd_8h_promoted_ops(<8 x i8> %a, <8 x i8> %b) {
+; CHECK-LABEL: uabd_8h_promoted_ops:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vmov d16, r2, r3
+; CHECK-NEXT: vmov d17, r0, r1
+; CHECK-NEXT: vabdl.u8 q8, d17, d16
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: vmov r2, r3, d17
+; CHECK-NEXT: mov pc, lr
+ %a.zext = zext <8 x i8> %a to <8 x i16>
+ %b.zext = zext <8 x i8> %b to <8 x i16>
+ %sub = sub <8 x i16> %a.zext, %b.zext
+ %abs = call <8 x i16> @llvm.abs.v8i16(<8 x i16> %sub, i1 true)
+ ret <8 x i16> %abs
+}
+
+define <2 x i32> @uabd_2s(<2 x i32> %a, <2 x i32> %b) {
+; CHECK-LABEL: uabd_2s:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vmov d16, r2, r3
+; CHECK-NEXT: vmov d17, r0, r1
+; CHECK-NEXT: vsubl.u32 q8, d17, d16
+; CHECK-NEXT: vshr.s64 q9, q8, #63
+; CHECK-NEXT: veor q8, q8, q9
+; CHECK-NEXT: vsub.i64 q8, q8, q9
+; CHECK-NEXT: vmovn.i64 d16, q8
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: mov pc, lr
+ %a.zext = zext <2 x i32> %a to <2 x i64>
+ %b.zext = zext <2 x i32> %b to <2 x i64>
+ %sub = sub <2 x i64> %a.zext, %b.zext
+ %abs = call <2 x i64> @llvm.abs.v2i64(<2 x i64> %sub, i1 true)
+ %trunc = trunc <2 x i64> %abs to <2 x i32>
+ ret <2 x i32> %trunc
+}
+
+define <2 x i32> @uabd_2s_promoted_ops(<2 x i16> %a, <2 x i16> %b) {
+; CHECK-LABEL: uabd_2s_promoted_ops:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vmov.i32 d16, #0xffff
+; CHECK-NEXT: vmov d17, r2, r3
+; CHECK-NEXT: vmov d18, r0, r1
+; CHECK-NEXT: vand d17, d17, d16
+; CHECK-NEXT: vand d16, d18, d16
+; CHECK-NEXT: vsub.i32 d16, d16, d17
+; CHECK-NEXT: vabs.s32 d16, d16
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: mov pc, lr
+ %a.zext = zext <2 x i16> %a to <2 x i32>
+ %b.zext = zext <2 x i16> %b to <2 x i32>
+ %sub = sub <2 x i32> %a.zext, %b.zext
+ %abs = call <2 x i32> @llvm.abs.v2i32(<2 x i32> %sub, i1 true)
+ ret <2 x i32> %abs
+}
+
+define <4 x i32> @uabd_4s(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: uabd_4s:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: mov r12, sp
+; CHECK-NEXT: vld1.64 {d16, d17}, [r12]
+; CHECK-NEXT: vmov d18, r2, r3
+; CHECK-NEXT: vmov d19, r0, r1
+; CHECK-NEXT: vsubl.u32 q10, d18, d17
+; CHECK-NEXT: vsubl.u32 q8, d19, d16
+; CHECK-NEXT: vshr.s64 q9, q10, #63
+; CHECK-NEXT: vshr.s64 q11, q8, #63
+; CHECK-NEXT: veor q10, q10, q9
+; CHECK-NEXT: veor q8, q8, q11
+; CHECK-NEXT: vsub.i64 q9, q10, q9
+; CHECK-NEXT: vsub.i64 q8, q8, q11
+; CHECK-NEXT: vmovn.i64 d19, q9
+; CHECK-NEXT: vmovn.i64 d18, q8
+; CHECK-NEXT: vmov r2, r3, d19
+; CHECK-NEXT: vmov r0, r1, d18
+; CHECK-NEXT: mov pc, lr
+ %a.zext = zext <4 x i32> %a to <4 x i64>
+ %b.zext = zext <4 x i32> %b to <4 x i64>
+ %sub = sub <4 x i64> %a.zext, %b.zext
+ %abs = call <4 x i64> @llvm.abs.v4i64(<4 x i64> %sub, i1 true)
+ %trunc = trunc <4 x i64> %abs to <4 x i32>
+ ret <4 x i32> %trunc
+}
+
+define <4 x i32> @uabd_4s_promoted_ops(<4 x i16> %a, <4 x i16> %b) {
+; CHECK-LABEL: uabd_4s_promoted_ops:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vmov d16, r2, r3
+; CHECK-NEXT: vmov d17, r0, r1
+; CHECK-NEXT: vabdl.u16 q8, d17, d16
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: vmov r2, r3, d17
+; CHECK-NEXT: mov pc, lr
+ %a.zext = zext <4 x i16> %a to <4 x i32>
+ %b.zext = zext <4 x i16> %b to <4 x i32>
+ %sub = sub <4 x i32> %a.zext, %b.zext
+ %abs = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %sub, i1 true)
+ ret <4 x i32> %abs
+}
+
+define <2 x i64> @uabd_2d(<2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: uabd_2d:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: .save {r4, r5, r6, r7, r11, lr}
+; CHECK-NEXT: push {r4, r5, r6, r7, r11, lr}
+; CHECK-NEXT: add r12, sp, #24
+; CHECK-NEXT: mov r6, #0
+; CHECK-NEXT: vld1.64 {d16, d17}, [r12]
+; CHECK-NEXT: vmov r12, lr, d17
+; CHECK-NEXT: vmov r4, r7, d16
+; CHECK-NEXT: subs r2, r2, r12
+; CHECK-NEXT: sbcs r3, r3, lr
+; CHECK-NEXT: sbcs r5, r6, #0
+; CHECK-NEXT: sbc r5, r6, #0
+; CHECK-NEXT: eor r2, r2, r5, asr #31
+; CHECK-NEXT: eor r3, r3, r5, asr #31
+; CHECK-NEXT: subs r2, r2, r5, asr #31
+; CHECK-NEXT: sbc r3, r3, r5, asr #31
+; CHECK-NEXT: subs r0, r0, r4
+; CHECK-NEXT: sbcs r1, r1, r7
+; CHECK-NEXT: vmov.32 d17[0], r2
+; CHECK-NEXT: sbcs r7, r6, #0
+; CHECK-NEXT: sbc r7, r6, #0
+; CHECK-NEXT: eor r0, r0, r7, asr #31
+; CHECK-NEXT: subs r0, r0, r7, asr #31
+; CHECK-NEXT: vmov.32 d16[0], r0
+; CHECK-NEXT: eor r0, r1, r7, asr #31
+; CHECK-NEXT: sbc r0, r0, r7, asr #31
+; CHECK-NEXT: vmov.32 d17[1], r3
+; CHECK-NEXT: vmov.32 d16[1], r0
+; CHECK-NEXT: vmov r2, r3, d17
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: pop {r4, r5, r6, r7, r11, lr}
+; CHECK-NEXT: mov pc, lr
+ %a.zext = zext <2 x i64> %a to <2 x i128>
+ %b.zext = zext <2 x i64> %b to <2 x i128>
+ %sub = sub <2 x i128> %a.zext, %b.zext
+ %abs = call <2 x i128> @llvm.abs.v2i128(<2 x i128> %sub, i1 true)
+ %trunc = trunc <2 x i128> %abs to <2 x i64>
+ ret <2 x i64> %trunc
+}
+
+define <2 x i64> @uabd_2d_promoted_ops(<2 x i32> %a, <2 x i32> %b) {
+; CHECK-LABEL: uabd_2d_promoted_ops:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vmov d16, r2, r3
+; CHECK-NEXT: vmov d17, r0, r1
+; CHECK-NEXT: vsubl.u32 q8, d17, d16
+; CHECK-NEXT: vshr.s64 q9, q8, #63
+; CHECK-NEXT: veor q8, q8, q9
+; CHECK-NEXT: vsub.i64 q8, q8, q9
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: vmov r2, r3, d17
+; CHECK-NEXT: mov pc, lr
+ %a.zext = zext <2 x i32> %a to <2 x i64>
+ %b.zext = zext <2 x i32> %b to <2 x i64>
+ %sub = sub <2 x i64> %a.zext, %b.zext
+ %abs = call <2 x i64> @llvm.abs.v2i64(<2 x i64> %sub, i1 true)
+ ret <2 x i64> %abs
+}
+
+define <16 x i8> @uabd_v16i8_nuw(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: uabd_v16i8_nuw:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vmov d17, r2, r3
+; CHECK-NEXT: mov r12, sp
+; CHECK-NEXT: vld1.64 {d18, d19}, [r12]
+; CHECK-NEXT: vmov d16, r0, r1
+; CHECK-NEXT: vsub.i8 q8, q8, q9
+; CHECK-NEXT: vabs.s8 q8, q8
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: vmov r2, r3, d17
+; CHECK-NEXT: mov pc, lr
+ %sub = sub nuw <16 x i8> %a, %b
+ %abs = call <16 x i8> @llvm.abs.v16i8(<16 x i8> %sub, i1 true)
+ ret <16 x i8> %abs
+}
+
+define <8 x i16> @uabd_v8i16_nuw(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: uabd_v8i16_nuw:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vmov d17, r2, r3
+; CHECK-NEXT: mov r12, sp
+; CHECK-NEXT: vld1.64 {d18, d19}, [r12]
+; CHECK-NEXT: vmov d16, r0, r1
+; CHECK-NEXT: vsub.i16 q8, q8, q9
+; CHECK-NEXT: vabs.s16 q8, q8
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: vmov r2, r3, d17
+; CHECK-NEXT: mov pc, lr
+ %sub = sub nuw <8 x i16> %a, %b
+ %abs = call <8 x i16> @llvm.abs.v8i16(<8 x i16> %sub, i1 true)
+ ret <8 x i16> %abs
+}
+
+define <4 x i32> @uabd_v4i32_nuw(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: uabd_v4i32_nuw:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vmov d17, r2, r3
+; CHECK-NEXT: mov r12, sp
+; CHECK-NEXT: vld1.64 {d18, d19}, [r12]
+; CHECK-NEXT: vmov d16, r0, r1
+; CHECK-NEXT: vsub.i32 q8, q8, q9
+; CHECK-NEXT: vabs.s32 q8, q8
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: vmov r2, r3, d17
+; CHECK-NEXT: mov pc, lr
+ %sub = sub nuw <4 x i32> %a, %b
+ %abs = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %sub, i1 true)
+ ret <4 x i32> %abs
+}
+
+define <2 x i64> @uabd_v2i64_nuw(<2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: uabd_v2i64_nuw:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vmov d17, r2, r3
+; CHECK-NEXT: mov r12, sp
+; CHECK-NEXT: vld1.64 {d18, d19}, [r12]
+; CHECK-NEXT: vmov d16, r0, r1
+; CHECK-NEXT: vsub.i64 q8, q8, q9
+; CHECK-NEXT: vshr.s64 q9, q8, #63
+; CHECK-NEXT: veor q8, q8, q9
+; CHECK-NEXT: vsub.i64 q8, q8, q9
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: vmov r2, r3, d17
+; CHECK-NEXT: mov pc, lr
+ %sub = sub nuw <2 x i64> %a, %b
+ %abs = call <2 x i64> @llvm.abs.v2i64(<2 x i64> %sub, i1 true)
+ ret <2 x i64> %abs
+}
+
+define <16 x i8> @sabd_v16i8_nsw(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: sabd_v16i8_nsw:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vmov d17, r2, r3
+; CHECK-NEXT: mov r12, sp
+; CHECK-NEXT: vld1.64 {d18, d19}, [r12]
+; CHECK-NEXT: vmov d16, r0, r1
+; CHECK-NEXT: vsub.i8 q8, q8, q9
+; CHECK-NEXT: vabs.s8 q8, q8
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: vmov r2, r3, d17
+; CHECK-NEXT: mov pc, lr
+ %sub = sub nsw <16 x i8> %a, %b
+ %abs = call <16 x i8> @llvm.abs.v16i8(<16 x i8> %sub, i1 true)
+ ret <16 x i8> %abs
+}
+
+define <8 x i16> @sabd_v8i16_nsw(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: sabd_v8i16_nsw:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vmov d17, r2, r3
+; CHECK-NEXT: mov r12, sp
+; CHECK-NEXT: vld1.64 {d18, d19}, [r12]
+; CHECK-NEXT: vmov d16, r0, r1
+; CHECK-NEXT: vsub.i16 q8, q8, q9
+; CHECK-NEXT: vabs.s16 q8, q8
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: vmov r2, r3, d17
+; CHECK-NEXT: mov pc, lr
+ %sub = sub nsw <8 x i16> %a, %b
+ %abs = call <8 x i16> @llvm.abs.v8i16(<8 x i16> %sub, i1 true)
+ ret <8 x i16> %abs
+}
+
+define <4 x i32> @sabd_v4i32_nsw(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: sabd_v4i32_nsw:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vmov d17, r2, r3
+; CHECK-NEXT: mov r12, sp
+; CHECK-NEXT: vld1.64 {d18, d19}, [r12]
+; CHECK-NEXT: vmov d16, r0, r1
+; CHECK-NEXT: vsub.i32 q8, q8, q9
+; CHECK-NEXT: vabs.s32 q8, q8
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: vmov r2, r3, d17
+; CHECK-NEXT: mov pc, lr
+ %sub = sub nsw <4 x i32> %a, %b
+ %abs = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %sub, i1 true)
+ ret <4 x i32> %abs
+}
+
+define <2 x i64> @sabd_v2i64_nsw(<2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: sabd_v2i64_nsw:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vmov d17, r2, r3
+; CHECK-NEXT: mov r12, sp
+; CHECK-NEXT: vld1.64 {d18, d19}, [r12]
+; CHECK-NEXT: vmov d16, r0, r1
+; CHECK-NEXT: vsub.i64 q8, q8, q9
+; CHECK-NEXT: vshr.s64 q9, q8, #63
+; CHECK-NEXT: veor q8, q8, q9
+; CHECK-NEXT: vsub.i64 q8, q8, q9
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: vmov r2, r3, d17
+; CHECK-NEXT: mov pc, lr
+ %sub = sub nsw <2 x i64> %a, %b
+ %abs = call <2 x i64> @llvm.abs.v2i64(<2 x i64> %sub, i1 true)
+ ret <2 x i64> %abs
+}
+
+define <16 x i8> @smaxmin_v16i8(<16 x i8> %0, <16 x i8> %1) {
+; CHECK-LABEL: smaxmin_v16i8:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vmov d17, r2, r3
+; CHECK-NEXT: mov r12, sp
+; CHECK-NEXT: vld1.64 {d18, d19}, [r12]
+; CHECK-NEXT: vmov d16, r0, r1
+; CHECK-NEXT: vmin.s8 q10, q8, q9
+; CHECK-NEXT: vmax.s8 q8, q8, q9
+; CHECK-NEXT: vsub.i8 q8, q8, q10
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: vmov r2, r3, d17
+; CHECK-NEXT: mov pc, lr
+ %a = tail call <16 x i8> @llvm.smax.v16i8(<16 x i8> %0, <16 x i8> %1)
+ %b = tail call <16 x i8> @llvm.smin.v16i8(<16 x i8> %0, <16 x i8> %1)
+ %sub = sub <16 x i8> %a, %b
+ ret <16 x i8> %sub
+}
+
+define <8 x i16> @smaxmin_v8i16(<8 x i16> %0, <8 x i16> %1) {
+; CHECK-LABEL: smaxmin_v8i16:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vmov d17, r2, r3
+; CHECK-NEXT: mov r12, sp
+; CHECK-NEXT: vld1.64 {d18, d19}, [r12]
+; CHECK-NEXT: vmov d16, r0, r1
+; CHECK-NEXT: vmin.s16 q10, q8, q9
+; CHECK-NEXT: vmax.s16 q8, q8, q9
+; CHECK-NEXT: vsub.i16 q8, q8, q10
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: vmov r2, r3, d17
+; CHECK-NEXT: mov pc, lr
+ %a = tail call <8 x i16> @llvm.smax.v8i16(<8 x i16> %0, <8 x i16> %1)
+ %b = tail call <8 x i16> @llvm.smin.v8i16(<8 x i16> %0, <8 x i16> %1)
+ %sub = sub <8 x i16> %a, %b
+ ret <8 x i16> %sub
+}
+
+define <4 x i32> @smaxmin_v4i32(<4 x i32> %0, <4 x i32> %1) {
+; CHECK-LABEL: smaxmin_v4i32:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vmov d17, r2, r3
+; CHECK-NEXT: mov r12, sp
+; CHECK-NEXT: vld1.64 {d18, d19}, [r12]
+; CHECK-NEXT: vmov d16, r0, r1
+; CHECK-NEXT: vmin.s32 q10, q8, q9
+; CHECK-NEXT: vmax.s32 q8, q8, q9
+; CHECK-NEXT: vsub.i32 q8, q8, q10
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: vmov r2, r3, d17
+; CHECK-NEXT: mov pc, lr
+ %a = tail call <4 x i32> @llvm.smax.v4i32(<4 x i32> %0, <4 x i32> %1)
+ %b = tail call <4 x i32> @llvm.smin.v4i32(<4 x i32> %0, <4 x i32> %1)
+ %sub = sub <4 x i32> %a, %b
+ ret <4 x i32> %sub
+}
+
+define <2 x i64> @smaxmin_v2i64(<2 x i64> %0, <2 x i64> %1) {
+; CHECK-LABEL: smaxmin_v2i64:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: .save {r4, r5, r6, r7, r8, lr}
+; CHECK-NEXT: push {r4, r5, r6, r7, r8, lr}
+; CHECK-NEXT: add r6, sp, #24
+; CHECK-NEXT: mov r8, #0
+; CHECK-NEXT: vld1.64 {d18, d19}, [r6]
+; CHECK-NEXT: vmov r7, r12, d19
+; CHECK-NEXT: vmov r4, lr, d18
+; CHECK-NEXT: subs r5, r2, r7
+; CHECK-NEXT: sbcs r5, r3, r12
+; CHECK-NEXT: mov r6, r7
+; CHECK-NEXT: mov r5, #0
+; CHECK-NEXT: movlt r5, #1
+; CHECK-NEXT: cmp r5, #0
+; CHECK-NEXT: movne r6, r2
+; CHECK-NEXT: mov r5, r12
+; CHECK-NEXT: vmov.32 d17[0], r6
+; CHECK-NEXT: movne r5, r3
+; CHECK-NEXT: mov r6, r4
+; CHECK-NEXT: vmov.32 d17[1], r5
+; CHECK-NEXT: subs r5, r4, r0
+; CHECK-NEXT: sbcs r5, lr, r1
+; CHECK-NEXT: mov r5, #0
+; CHECK-NEXT: movlt r5, #1
+; CHECK-NEXT: cmp r5, #0
+; CHECK-NEXT: movne r6, r0
+; CHECK-NEXT: vmov.32 d18[0], r6
+; CHECK-NEXT: subs r6, r7, r2
+; CHECK-NEXT: sbcs r6, r12, r3
+; CHECK-NEXT: mov r6, #0
+; CHECK-NEXT: movlt r6, #1
+; CHECK-NEXT: cmp r6, #0
+; CHECK-NEXT: movne r7, r2
+; CHECK-NEXT: subs r2, r0, r4
+; CHECK-NEXT: sbcs r2, r1, lr
+; CHECK-NEXT: vmov.32 d19[0], r7
+; CHECK-NEXT: movlt r8, #1
+; CHECK-NEXT: cmp r8, #0
+; CHECK-NEXT: movne r4, r0
+; CHECK-NEXT: mov r0, lr
+; CHECK-NEXT: vmov.32 d16[0], r4
+; CHECK-NEXT: movne r0, r1
+; CHECK-NEXT: cmp r6, #0
+; CHECK-NEXT: movne r12, r3
+; CHECK-NEXT: cmp r5, #0
+; CHECK-NEXT: vmov.32 d16[1], r0
+; CHECK-NEXT: movne lr, r1
+; CHECK-NEXT: vmov.32 d19[1], r12
+; CHECK-NEXT: vmov.32 d18[1], lr
+; CHECK-NEXT: vsub.i64 q8, q9, q8
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: vmov r2, r3, d17
+; CHECK-NEXT: pop {r4, r5, r6, r7, r8, lr}
+; CHECK-NEXT: mov pc, lr
+ %a = tail call <2 x i64> @llvm.smax.v2i64(<2 x i64> %0, <2 x i64> %1)
+ %b = tail call <2 x i64> @llvm.smin.v2i64(<2 x i64> %0, <2 x i64> %1)
+ %sub = sub <2 x i64> %a, %b
+ ret <2 x i64> %sub
+}
+
+define <16 x i8> @umaxmin_v16i8(<16 x i8> %0, <16 x i8> %1) {
+; CHECK-LABEL: umaxmin_v16i8:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vmov d17, r2, r3
+; CHECK-NEXT: mov r12, sp
+; CHECK-NEXT: vld1.64 {d18, d19}, [r12]
+; CHECK-NEXT: vmov d16, r0, r1
+; CHECK-NEXT: vmin.u8 q10, q8, q9
+; CHECK-NEXT: vmax.u8 q8, q8, q9
+; CHECK-NEXT: vsub.i8 q8, q8, q10
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: vmov r2, r3, d17
+; CHECK-NEXT: mov pc, lr
+ %a = tail call <16 x i8> @llvm.umax.v16i8(<16 x i8> %0, <16 x i8> %1)
+ %b = tail call <16 x i8> @llvm.umin.v16i8(<16 x i8> %0, <16 x i8> %1)
+ %sub = sub <16 x i8> %a, %b
+ ret <16 x i8> %sub
+}
+
+define <8 x i16> @umaxmin_v8i16(<8 x i16> %0, <8 x i16> %1) {
+; CHECK-LABEL: umaxmin_v8i16:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vmov d17, r2, r3
+; CHECK-NEXT: mov r12, sp
+; CHECK-NEXT: vld1.64 {d18, d19}, [r12]
+; CHECK-NEXT: vmov d16, r0, r1
+; CHECK-NEXT: vmin.u16 q10, q8, q9
+; CHECK-NEXT: vmax.u16 q8, q8, q9
+; CHECK-NEXT: vsub.i16 q8, q8, q10
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: vmov r2, r3, d17
+; CHECK-NEXT: mov pc, lr
+ %a = tail call <8 x i16> @llvm.umax.v8i16(<8 x i16> %0, <8 x i16> %1)
+ %b = tail call <8 x i16> @llvm.umin.v8i16(<8 x i16> %0, <8 x i16> %1)
+ %sub = sub <8 x i16> %a, %b
+ ret <8 x i16> %sub
+}
+
+define <4 x i32> @umaxmin_v4i32(<4 x i32> %0, <4 x i32> %1) {
+; CHECK-LABEL: umaxmin_v4i32:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vmov d17, r2, r3
+; CHECK-NEXT: mov r12, sp
+; CHECK-NEXT: vld1.64 {d18, d19}, [r12]
+; CHECK-NEXT: vmov d16, r0, r1
+; CHECK-NEXT: vmin.u32 q10, q8, q9
+; CHECK-NEXT: vmax.u32 q8, q8, q9
+; CHECK-NEXT: vsub.i32 q8, q8, q10
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: vmov r2, r3, d17
+; CHECK-NEXT: mov pc, lr
+ %a = tail call <4 x i32> @llvm.umax.v4i32(<4 x i32> %0, <4 x i32> %1)
+ %b = tail call <4 x i32> @llvm.umin.v4i32(<4 x i32> %0, <4 x i32> %1)
+ %sub = sub <4 x i32> %a, %b
+ ret <4 x i32> %sub
+}
+
+define <2 x i64> @umaxmin_v2i64(<2 x i64> %0, <2 x i64> %1) {
+; CHECK-LABEL: umaxmin_v2i64:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vmov d17, r2, r3
+; CHECK-NEXT: mov r12, sp
+; CHECK-NEXT: vld1.64 {d18, d19}, [r12]
+; CHECK-NEXT: vmov d16, r0, r1
+; CHECK-NEXT: vqsub.u64 q10, q8, q9
+; CHECK-NEXT: vqsub.u64 q9, q9, q8
+; CHECK-NEXT: vsub.i64 q10, q10, q8
+; CHECK-NEXT: vadd.i64 q8, q8, q9
+; CHECK-NEXT: vadd.i64 q8, q8, q10
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: vmov r2, r3, d17
+; CHECK-NEXT: mov pc, lr
+ %a = tail call <2 x i64> @llvm.umax.v2i64(<2 x i64> %0, <2 x i64> %1)
+ %b = tail call <2 x i64> @llvm.umin.v2i64(<2 x i64> %0, <2 x i64> %1)
+ %sub = sub <2 x i64> %a, %b
+ ret <2 x i64> %sub
+}
+
+define <16 x i8> @umaxmin_v16i8_com1(<16 x i8> %0, <16 x i8> %1) {
+; CHECK-LABEL: umaxmin_v16i8_com1:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vmov d17, r2, r3
+; CHECK-NEXT: vmov d16, r0, r1
+; CHECK-NEXT: mov r0, sp
+; CHECK-NEXT: vld1.64 {d18, d19}, [r0]
+; CHECK-NEXT: vmin.u8 q10, q9, q8
+; CHECK-NEXT: vmax.u8 q8, q8, q9
+; CHECK-NEXT: vsub.i8 q8, q8, q10
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: vmov r2, r3, d17
+; CHECK-NEXT: mov pc, lr
+ %a = tail call <16 x i8> @llvm.umax.v16i8(<16 x i8> %0, <16 x i8> %1)
+ %b = tail call <16 x i8> @llvm.umin.v16i8(<16 x i8> %1, <16 x i8> %0)
+ %sub = sub <16 x i8> %a, %b
+ ret <16 x i8> %sub
+}
diff --git a/llvm/test/CodeGen/ARM/vaba.ll b/llvm/test/CodeGen/ARM/vaba.ll
index e4a61ea7d91f..14419a345d82 100644
--- a/llvm/test/CodeGen/ARM/vaba.ll
+++ b/llvm/test/CodeGen/ARM/vaba.ll
@@ -1,8 +1,15 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s
define <8 x i8> @vabas8(ptr %A, ptr %B, ptr %C) nounwind {
-;CHECK-LABEL: vabas8:
-;CHECK: vaba.s8
+; CHECK-LABEL: vabas8:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vldr d16, [r2]
+; CHECK-NEXT: vldr d17, [r1]
+; CHECK-NEXT: vldr d18, [r0]
+; CHECK-NEXT: vaba.s8 d18, d17, d16
+; CHECK-NEXT: vmov r0, r1, d18
+; CHECK-NEXT: mov pc, lr
%tmp1 = load <8 x i8>, ptr %A
%tmp2 = load <8 x i8>, ptr %B
%tmp3 = load <8 x i8>, ptr %C
@@ -12,8 +19,14 @@ define <8 x i8> @vabas8(ptr %A, ptr %B, ptr %C) nounwind {
}
define <4 x i16> @vabas16(ptr %A, ptr %B, ptr %C) nounwind {
-;CHECK-LABEL: vabas16:
-;CHECK: vaba.s16
+; CHECK-LABEL: vabas16:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vldr d16, [r2]
+; CHECK-NEXT: vldr d17, [r1]
+; CHECK-NEXT: vldr d18, [r0]
+; CHECK-NEXT: vaba.s16 d18, d17, d16
+; CHECK-NEXT: vmov r0, r1, d18
+; CHECK-NEXT: mov pc, lr
%tmp1 = load <4 x i16>, ptr %A
%tmp2 = load <4 x i16>, ptr %B
%tmp3 = load <4 x i16>, ptr %C
@@ -23,8 +36,14 @@ define <4 x i16> @vabas16(ptr %A, ptr %B, ptr %C) nounwind {
}
define <2 x i32> @vabas32(ptr %A, ptr %B, ptr %C) nounwind {
-;CHECK-LABEL: vabas32:
-;CHECK: vaba.s32
+; CHECK-LABEL: vabas32:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vldr d16, [r2]
+; CHECK-NEXT: vldr d17, [r1]
+; CHECK-NEXT: vldr d18, [r0]
+; CHECK-NEXT: vaba.s32 d18, d17, d16
+; CHECK-NEXT: vmov r0, r1, d18
+; CHECK-NEXT: mov pc, lr
%tmp1 = load <2 x i32>, ptr %A
%tmp2 = load <2 x i32>, ptr %B
%tmp3 = load <2 x i32>, ptr %C
@@ -34,8 +53,14 @@ define <2 x i32> @vabas32(ptr %A, ptr %B, ptr %C) nounwind {
}
define <8 x i8> @vabau8(ptr %A, ptr %B, ptr %C) nounwind {
-;CHECK-LABEL: vabau8:
-;CHECK: vaba.u8
+; CHECK-LABEL: vabau8:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vldr d16, [r2]
+; CHECK-NEXT: vldr d17, [r1]
+; CHECK-NEXT: vldr d18, [r0]
+; CHECK-NEXT: vaba.u8 d18, d17, d16
+; CHECK-NEXT: vmov r0, r1, d18
+; CHECK-NEXT: mov pc, lr
%tmp1 = load <8 x i8>, ptr %A
%tmp2 = load <8 x i8>, ptr %B
%tmp3 = load <8 x i8>, ptr %C
@@ -45,8 +70,14 @@ define <8 x i8> @vabau8(ptr %A, ptr %B, ptr %C) nounwind {
}
define <4 x i16> @vabau16(ptr %A, ptr %B, ptr %C) nounwind {
-;CHECK-LABEL: vabau16:
-;CHECK: vaba.u16
+; CHECK-LABEL: vabau16:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vldr d16, [r2]
+; CHECK-NEXT: vldr d17, [r1]
+; CHECK-NEXT: vldr d18, [r0]
+; CHECK-NEXT: vaba.u16 d18, d17, d16
+; CHECK-NEXT: vmov r0, r1, d18
+; CHECK-NEXT: mov pc, lr
%tmp1 = load <4 x i16>, ptr %A
%tmp2 = load <4 x i16>, ptr %B
%tmp3 = load <4 x i16>, ptr %C
@@ -56,8 +87,14 @@ define <4 x i16> @vabau16(ptr %A, ptr %B, ptr %C) nounwind {
}
define <2 x i32> @vabau32(ptr %A, ptr %B, ptr %C) nounwind {
-;CHECK-LABEL: vabau32:
-;CHECK: vaba.u32
+; CHECK-LABEL: vabau32:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vldr d16, [r2]
+; CHECK-NEXT: vldr d17, [r1]
+; CHECK-NEXT: vldr d18, [r0]
+; CHECK-NEXT: vaba.u32 d18, d17, d16
+; CHECK-NEXT: vmov r0, r1, d18
+; CHECK-NEXT: mov pc, lr
%tmp1 = load <2 x i32>, ptr %A
%tmp2 = load <2 x i32>, ptr %B
%tmp3 = load <2 x i32>, ptr %C
@@ -67,8 +104,15 @@ define <2 x i32> @vabau32(ptr %A, ptr %B, ptr %C) nounwind {
}
define <16 x i8> @vabaQs8(ptr %A, ptr %B, ptr %C) nounwind {
-;CHECK-LABEL: vabaQs8:
-;CHECK: vaba.s8
+; CHECK-LABEL: vabaQs8:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vld1.64 {d16, d17}, [r2]
+; CHECK-NEXT: vld1.64 {d18, d19}, [r1]
+; CHECK-NEXT: vld1.64 {d20, d21}, [r0]
+; CHECK-NEXT: vaba.s8 q10, q9, q8
+; CHECK-NEXT: vmov r0, r1, d20
+; CHECK-NEXT: vmov r2, r3, d21
+; CHECK-NEXT: mov pc, lr
%tmp1 = load <16 x i8>, ptr %A
%tmp2 = load <16 x i8>, ptr %B
%tmp3 = load <16 x i8>, ptr %C
@@ -78,8 +122,15 @@ define <16 x i8> @vabaQs8(ptr %A, ptr %B, ptr %C) nounwind {
}
define <8 x i16> @vabaQs16(ptr %A, ptr %B, ptr %C) nounwind {
-;CHECK-LABEL: vabaQs16:
-;CHECK: vaba.s16
+; CHECK-LABEL: vabaQs16:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vld1.64 {d16, d17}, [r2]
+; CHECK-NEXT: vld1.64 {d18, d19}, [r1]
+; CHECK-NEXT: vld1.64 {d20, d21}, [r0]
+; CHECK-NEXT: vaba.s16 q10, q9, q8
+; CHECK-NEXT: vmov r0, r1, d20
+; CHECK-NEXT: vmov r2, r3, d21
+; CHECK-NEXT: mov pc, lr
%tmp1 = load <8 x i16>, ptr %A
%tmp2 = load <8 x i16>, ptr %B
%tmp3 = load <8 x i16>, ptr %C
@@ -89,8 +140,15 @@ define <8 x i16> @vabaQs16(ptr %A, ptr %B, ptr %C) nounwind {
}
define <4 x i32> @vabaQs32(ptr %A, ptr %B, ptr %C) nounwind {
-;CHECK-LABEL: vabaQs32:
-;CHECK: vaba.s32
+; CHECK-LABEL: vabaQs32:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vld1.64 {d16, d17}, [r2]
+; CHECK-NEXT: vld1.64 {d18, d19}, [r1]
+; CHECK-NEXT: vld1.64 {d20, d21}, [r0]
+; CHECK-NEXT: vaba.s32 q10, q9, q8
+; CHECK-NEXT: vmov r0, r1, d20
+; CHECK-NEXT: vmov r2, r3, d21
+; CHECK-NEXT: mov pc, lr
%tmp1 = load <4 x i32>, ptr %A
%tmp2 = load <4 x i32>, ptr %B
%tmp3 = load <4 x i32>, ptr %C
@@ -100,8 +158,15 @@ define <4 x i32> @vabaQs32(ptr %A, ptr %B, ptr %C) nounwind {
}
define <16 x i8> @vabaQu8(ptr %A, ptr %B, ptr %C) nounwind {
-;CHECK-LABEL: vabaQu8:
-;CHECK: vaba.u8
+; CHECK-LABEL: vabaQu8:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vld1.64 {d16, d17}, [r2]
+; CHECK-NEXT: vld1.64 {d18, d19}, [r1]
+; CHECK-NEXT: vld1.64 {d20, d21}, [r0]
+; CHECK-NEXT: vaba.u8 q10, q9, q8
+; CHECK-NEXT: vmov r0, r1, d20
+; CHECK-NEXT: vmov r2, r3, d21
+; CHECK-NEXT: mov pc, lr
%tmp1 = load <16 x i8>, ptr %A
%tmp2 = load <16 x i8>, ptr %B
%tmp3 = load <16 x i8>, ptr %C
@@ -111,8 +176,15 @@ define <16 x i8> @vabaQu8(ptr %A, ptr %B, ptr %C) nounwind {
}
define <8 x i16> @vabaQu16(ptr %A, ptr %B, ptr %C) nounwind {
-;CHECK-LABEL: vabaQu16:
-;CHECK: vaba.u16
+; CHECK-LABEL: vabaQu16:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vld1.64 {d16, d17}, [r2]
+; CHECK-NEXT: vld1.64 {d18, d19}, [r1]
+; CHECK-NEXT: vld1.64 {d20, d21}, [r0]
+; CHECK-NEXT: vaba.u16 q10, q9, q8
+; CHECK-NEXT: vmov r0, r1, d20
+; CHECK-NEXT: vmov r2, r3, d21
+; CHECK-NEXT: mov pc, lr
%tmp1 = load <8 x i16>, ptr %A
%tmp2 = load <8 x i16>, ptr %B
%tmp3 = load <8 x i16>, ptr %C
@@ -122,8 +194,15 @@ define <8 x i16> @vabaQu16(ptr %A, ptr %B, ptr %C) nounwind {
}
define <4 x i32> @vabaQu32(ptr %A, ptr %B, ptr %C) nounwind {
-;CHECK-LABEL: vabaQu32:
-;CHECK: vaba.u32
+; CHECK-LABEL: vabaQu32:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vld1.64 {d16, d17}, [r2]
+; CHECK-NEXT: vld1.64 {d18, d19}, [r1]
+; CHECK-NEXT: vld1.64 {d20, d21}, [r0]
+; CHECK-NEXT: vaba.u32 q10, q9, q8
+; CHECK-NEXT: vmov r0, r1, d20
+; CHECK-NEXT: vmov r2, r3, d21
+; CHECK-NEXT: mov pc, lr
%tmp1 = load <4 x i32>, ptr %A
%tmp2 = load <4 x i32>, ptr %B
%tmp3 = load <4 x i32>, ptr %C
@@ -149,8 +228,15 @@ declare <8 x i16> @llvm.arm.neon.vabdu.v8i16(<8 x i16>, <8 x i16>) nounwind read
declare <4 x i32> @llvm.arm.neon.vabdu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
define <8 x i16> @vabals8(ptr %A, ptr %B, ptr %C) nounwind {
-;CHECK-LABEL: vabals8:
-;CHECK: vabal.s8
+; CHECK-LABEL: vabals8:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vldr d16, [r2]
+; CHECK-NEXT: vldr d17, [r1]
+; CHECK-NEXT: vld1.64 {d18, d19}, [r0]
+; CHECK-NEXT: vabal.s8 q9, d17, d16
+; CHECK-NEXT: vmov r0, r1, d18
+; CHECK-NEXT: vmov r2, r3, d19
+; CHECK-NEXT: mov pc, lr
%tmp1 = load <8 x i16>, ptr %A
%tmp2 = load <8 x i8>, ptr %B
%tmp3 = load <8 x i8>, ptr %C
@@ -161,8 +247,15 @@ define <8 x i16> @vabals8(ptr %A, ptr %B, ptr %C) nounwind {
}
define <4 x i32> @vabals16(ptr %A, ptr %B, ptr %C) nounwind {
-;CHECK-LABEL: vabals16:
-;CHECK: vabal.s16
+; CHECK-LABEL: vabals16:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vldr d16, [r2]
+; CHECK-NEXT: vldr d17, [r1]
+; CHECK-NEXT: vld1.64 {d18, d19}, [r0]
+; CHECK-NEXT: vabal.s16 q9, d17, d16
+; CHECK-NEXT: vmov r0, r1, d18
+; CHECK-NEXT: vmov r2, r3, d19
+; CHECK-NEXT: mov pc, lr
%tmp1 = load <4 x i32>, ptr %A
%tmp2 = load <4 x i16>, ptr %B
%tmp3 = load <4 x i16>, ptr %C
@@ -173,8 +266,15 @@ define <4 x i32> @vabals16(ptr %A, ptr %B, ptr %C) nounwind {
}
define <2 x i64> @vabals32(ptr %A, ptr %B, ptr %C) nounwind {
-;CHECK-LABEL: vabals32:
-;CHECK: vabal.s32
+; CHECK-LABEL: vabals32:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vldr d16, [r2]
+; CHECK-NEXT: vldr d17, [r1]
+; CHECK-NEXT: vld1.64 {d18, d19}, [r0]
+; CHECK-NEXT: vabal.s32 q9, d17, d16
+; CHECK-NEXT: vmov r0, r1, d18
+; CHECK-NEXT: vmov r2, r3, d19
+; CHECK-NEXT: mov pc, lr
%tmp1 = load <2 x i64>, ptr %A
%tmp2 = load <2 x i32>, ptr %B
%tmp3 = load <2 x i32>, ptr %C
@@ -185,8 +285,15 @@ define <2 x i64> @vabals32(ptr %A, ptr %B, ptr %C) nounwind {
}
define <8 x i16> @vabalu8(ptr %A, ptr %B, ptr %C) nounwind {
-;CHECK-LABEL: vabalu8:
-;CHECK: vabal.u8
+; CHECK-LABEL: vabalu8:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vldr d16, [r2]
+; CHECK-NEXT: vldr d17, [r1]
+; CHECK-NEXT: vld1.64 {d18, d19}, [r0]
+; CHECK-NEXT: vabal.u8 q9, d17, d16
+; CHECK-NEXT: vmov r0, r1, d18
+; CHECK-NEXT: vmov r2, r3, d19
+; CHECK-NEXT: mov pc, lr
%tmp1 = load <8 x i16>, ptr %A
%tmp2 = load <8 x i8>, ptr %B
%tmp3 = load <8 x i8>, ptr %C
@@ -197,8 +304,15 @@ define <8 x i16> @vabalu8(ptr %A, ptr %B, ptr %C) nounwind {
}
define <4 x i32> @vabalu16(ptr %A, ptr %B, ptr %C) nounwind {
-;CHECK-LABEL: vabalu16:
-;CHECK: vabal.u16
+; CHECK-LABEL: vabalu16:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vldr d16, [r2]
+; CHECK-NEXT: vldr d17, [r1]
+; CHECK-NEXT: vld1.64 {d18, d19}, [r0]
+; CHECK-NEXT: vabal.u16 q9, d17, d16
+; CHECK-NEXT: vmov r0, r1, d18
+; CHECK-NEXT: vmov r2, r3, d19
+; CHECK-NEXT: mov pc, lr
%tmp1 = load <4 x i32>, ptr %A
%tmp2 = load <4 x i16>, ptr %B
%tmp3 = load <4 x i16>, ptr %C
@@ -209,8 +323,15 @@ define <4 x i32> @vabalu16(ptr %A, ptr %B, ptr %C) nounwind {
}
define <2 x i64> @vabalu32(ptr %A, ptr %B, ptr %C) nounwind {
-;CHECK-LABEL: vabalu32:
-;CHECK: vabal.u32
+; CHECK-LABEL: vabalu32:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vldr d16, [r2]
+; CHECK-NEXT: vldr d17, [r1]
+; CHECK-NEXT: vld1.64 {d18, d19}, [r0]
+; CHECK-NEXT: vabal.u32 q9, d17, d16
+; CHECK-NEXT: vmov r0, r1, d18
+; CHECK-NEXT: vmov r2, r3, d19
+; CHECK-NEXT: mov pc, lr
%tmp1 = load <2 x i64>, ptr %A
%tmp2 = load <2 x i32>, ptr %B
%tmp3 = load <2 x i32>, ptr %C
diff --git a/llvm/test/CodeGen/ARM/vabd.ll b/llvm/test/CodeGen/ARM/vabd.ll
index eb5eed83d4ca..4184e9275a25 100644
--- a/llvm/test/CodeGen/ARM/vabd.ll
+++ b/llvm/test/CodeGen/ARM/vabd.ll
@@ -1,8 +1,14 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s
define <8 x i8> @vabds8(ptr %A, ptr %B) nounwind {
-;CHECK-LABEL: vabds8:
-;CHECK: vabd.s8
+; CHECK-LABEL: vabds8:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vldr d16, [r1]
+; CHECK-NEXT: vldr d17, [r0]
+; CHECK-NEXT: vabd.s8 d16, d17, d16
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: mov pc, lr
%tmp1 = load <8 x i8>, ptr %A
%tmp2 = load <8 x i8>, ptr %B
%tmp3 = call <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
@@ -10,8 +16,13 @@ define <8 x i8> @vabds8(ptr %A, ptr %B) nounwind {
}
define <4 x i16> @vabds16(ptr %A, ptr %B) nounwind {
-;CHECK-LABEL: vabds16:
-;CHECK: vabd.s16
+; CHECK-LABEL: vabds16:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vldr d16, [r1]
+; CHECK-NEXT: vldr d17, [r0]
+; CHECK-NEXT: vabd.s16 d16, d17, d16
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: mov pc, lr
%tmp1 = load <4 x i16>, ptr %A
%tmp2 = load <4 x i16>, ptr %B
%tmp3 = call <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
@@ -19,8 +30,13 @@ define <4 x i16> @vabds16(ptr %A, ptr %B) nounwind {
}
define <2 x i32> @vabds32(ptr %A, ptr %B) nounwind {
-;CHECK-LABEL: vabds32:
-;CHECK: vabd.s32
+; CHECK-LABEL: vabds32:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vldr d16, [r1]
+; CHECK-NEXT: vldr d17, [r0]
+; CHECK-NEXT: vabd.s32 d16, d17, d16
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: mov pc, lr
%tmp1 = load <2 x i32>, ptr %A
%tmp2 = load <2 x i32>, ptr %B
%tmp3 = call <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
@@ -28,8 +44,13 @@ define <2 x i32> @vabds32(ptr %A, ptr %B) nounwind {
}
define <8 x i8> @vabdu8(ptr %A, ptr %B) nounwind {
-;CHECK-LABEL: vabdu8:
-;CHECK: vabd.u8
+; CHECK-LABEL: vabdu8:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vldr d16, [r1]
+; CHECK-NEXT: vldr d17, [r0]
+; CHECK-NEXT: vabd.u8 d16, d17, d16
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: mov pc, lr
%tmp1 = load <8 x i8>, ptr %A
%tmp2 = load <8 x i8>, ptr %B
%tmp3 = call <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
@@ -37,8 +58,13 @@ define <8 x i8> @vabdu8(ptr %A, ptr %B) nounwind {
}
define <4 x i16> @vabdu16(ptr %A, ptr %B) nounwind {
-;CHECK-LABEL: vabdu16:
-;CHECK: vabd.u16
+; CHECK-LABEL: vabdu16:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vldr d16, [r1]
+; CHECK-NEXT: vldr d17, [r0]
+; CHECK-NEXT: vabd.u16 d16, d17, d16
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: mov pc, lr
%tmp1 = load <4 x i16>, ptr %A
%tmp2 = load <4 x i16>, ptr %B
%tmp3 = call <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
@@ -46,8 +72,13 @@ define <4 x i16> @vabdu16(ptr %A, ptr %B) nounwind {
}
define <2 x i32> @vabdu32(ptr %A, ptr %B) nounwind {
-;CHECK-LABEL: vabdu32:
-;CHECK: vabd.u32
+; CHECK-LABEL: vabdu32:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vldr d16, [r1]
+; CHECK-NEXT: vldr d17, [r0]
+; CHECK-NEXT: vabd.u32 d16, d17, d16
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: mov pc, lr
%tmp1 = load <2 x i32>, ptr %A
%tmp2 = load <2 x i32>, ptr %B
%tmp3 = call <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
@@ -55,8 +86,13 @@ define <2 x i32> @vabdu32(ptr %A, ptr %B) nounwind {
}
define <2 x float> @vabdf32(ptr %A, ptr %B) nounwind {
-;CHECK-LABEL: vabdf32:
-;CHECK: vabd.f32
+; CHECK-LABEL: vabdf32:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vldr d16, [r1]
+; CHECK-NEXT: vldr d17, [r0]
+; CHECK-NEXT: vabd.f32 d16, d17, d16
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: mov pc, lr
%tmp1 = load <2 x float>, ptr %A
%tmp2 = load <2 x float>, ptr %B
%tmp3 = call <2 x float> @llvm.arm.neon.vabds.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
@@ -64,8 +100,14 @@ define <2 x float> @vabdf32(ptr %A, ptr %B) nounwind {
}
define <16 x i8> @vabdQs8(ptr %A, ptr %B) nounwind {
-;CHECK-LABEL: vabdQs8:
-;CHECK: vabd.s8
+; CHECK-LABEL: vabdQs8:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vld1.64 {d16, d17}, [r1]
+; CHECK-NEXT: vld1.64 {d18, d19}, [r0]
+; CHECK-NEXT: vabd.s8 q8, q9, q8
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: vmov r2, r3, d17
+; CHECK-NEXT: mov pc, lr
%tmp1 = load <16 x i8>, ptr %A
%tmp2 = load <16 x i8>, ptr %B
%tmp3 = call <16 x i8> @llvm.arm.neon.vabds.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
@@ -73,8 +115,14 @@ define <16 x i8> @vabdQs8(ptr %A, ptr %B) nounwind {
}
define <8 x i16> @vabdQs16(ptr %A, ptr %B) nounwind {
-;CHECK-LABEL: vabdQs16:
-;CHECK: vabd.s16
+; CHECK-LABEL: vabdQs16:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vld1.64 {d16, d17}, [r1]
+; CHECK-NEXT: vld1.64 {d18, d19}, [r0]
+; CHECK-NEXT: vabd.s16 q8, q9, q8
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: vmov r2, r3, d17
+; CHECK-NEXT: mov pc, lr
%tmp1 = load <8 x i16>, ptr %A
%tmp2 = load <8 x i16>, ptr %B
%tmp3 = call <8 x i16> @llvm.arm.neon.vabds.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
@@ -82,8 +130,14 @@ define <8 x i16> @vabdQs16(ptr %A, ptr %B) nounwind {
}
define <4 x i32> @vabdQs32(ptr %A, ptr %B) nounwind {
-;CHECK-LABEL: vabdQs32:
-;CHECK: vabd.s32
+; CHECK-LABEL: vabdQs32:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vld1.64 {d16, d17}, [r1]
+; CHECK-NEXT: vld1.64 {d18, d19}, [r0]
+; CHECK-NEXT: vabd.s32 q8, q9, q8
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: vmov r2, r3, d17
+; CHECK-NEXT: mov pc, lr
%tmp1 = load <4 x i32>, ptr %A
%tmp2 = load <4 x i32>, ptr %B
%tmp3 = call <4 x i32> @llvm.arm.neon.vabds.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
@@ -91,8 +145,14 @@ define <4 x i32> @vabdQs32(ptr %A, ptr %B) nounwind {
}
define <16 x i8> @vabdQu8(ptr %A, ptr %B) nounwind {
-;CHECK-LABEL: vabdQu8:
-;CHECK: vabd.u8
+; CHECK-LABEL: vabdQu8:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vld1.64 {d16, d17}, [r1]
+; CHECK-NEXT: vld1.64 {d18, d19}, [r0]
+; CHECK-NEXT: vabd.u8 q8, q9, q8
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: vmov r2, r3, d17
+; CHECK-NEXT: mov pc, lr
%tmp1 = load <16 x i8>, ptr %A
%tmp2 = load <16 x i8>, ptr %B
%tmp3 = call <16 x i8> @llvm.arm.neon.vabdu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
@@ -100,8 +160,14 @@ define <16 x i8> @vabdQu8(ptr %A, ptr %B) nounwind {
}
define <8 x i16> @vabdQu16(ptr %A, ptr %B) nounwind {
-;CHECK-LABEL: vabdQu16:
-;CHECK: vabd.u16
+; CHECK-LABEL: vabdQu16:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vld1.64 {d16, d17}, [r1]
+; CHECK-NEXT: vld1.64 {d18, d19}, [r0]
+; CHECK-NEXT: vabd.u16 q8, q9, q8
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: vmov r2, r3, d17
+; CHECK-NEXT: mov pc, lr
%tmp1 = load <8 x i16>, ptr %A
%tmp2 = load <8 x i16>, ptr %B
%tmp3 = call <8 x i16> @llvm.arm.neon.vabdu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
@@ -109,8 +175,14 @@ define <8 x i16> @vabdQu16(ptr %A, ptr %B) nounwind {
}
define <4 x i32> @vabdQu32(ptr %A, ptr %B) nounwind {
-;CHECK-LABEL: vabdQu32:
-;CHECK: vabd.u32
+; CHECK-LABEL: vabdQu32:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vld1.64 {d16, d17}, [r1]
+; CHECK-NEXT: vld1.64 {d18, d19}, [r0]
+; CHECK-NEXT: vabd.u32 q8, q9, q8
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: vmov r2, r3, d17
+; CHECK-NEXT: mov pc, lr
%tmp1 = load <4 x i32>, ptr %A
%tmp2 = load <4 x i32>, ptr %B
%tmp3 = call <4 x i32> @llvm.arm.neon.vabdu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
@@ -118,8 +190,14 @@ define <4 x i32> @vabdQu32(ptr %A, ptr %B) nounwind {
}
define <4 x float> @vabdQf32(ptr %A, ptr %B) nounwind {
-;CHECK-LABEL: vabdQf32:
-;CHECK: vabd.f32
+; CHECK-LABEL: vabdQf32:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vld1.64 {d16, d17}, [r1]
+; CHECK-NEXT: vld1.64 {d18, d19}, [r0]
+; CHECK-NEXT: vabd.f32 q8, q9, q8
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: vmov r2, r3, d17
+; CHECK-NEXT: mov pc, lr
%tmp1 = load <4 x float>, ptr %A
%tmp2 = load <4 x float>, ptr %B
%tmp3 = call <4 x float> @llvm.arm.neon.vabds.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
@@ -147,8 +225,14 @@ declare <4 x i32> @llvm.arm.neon.vabdu.v4i32(<4 x i32>, <4 x i32>) nounwind read
declare <4 x float> @llvm.arm.neon.vabds.v4f32(<4 x float>, <4 x float>) nounwind readnone
define <8 x i16> @vabdls8(ptr %A, ptr %B) nounwind {
-;CHECK-LABEL: vabdls8:
-;CHECK: vabdl.s8
+; CHECK-LABEL: vabdls8:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vldr d16, [r1]
+; CHECK-NEXT: vldr d17, [r0]
+; CHECK-NEXT: vabdl.s8 q8, d17, d16
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: vmov r2, r3, d17
+; CHECK-NEXT: mov pc, lr
%tmp1 = load <8 x i8>, ptr %A
%tmp2 = load <8 x i8>, ptr %B
%tmp3 = call <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
@@ -157,8 +241,14 @@ define <8 x i16> @vabdls8(ptr %A, ptr %B) nounwind {
}
define <4 x i32> @vabdls16(ptr %A, ptr %B) nounwind {
-;CHECK-LABEL: vabdls16:
-;CHECK: vabdl.s16
+; CHECK-LABEL: vabdls16:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vldr d16, [r1]
+; CHECK-NEXT: vldr d17, [r0]
+; CHECK-NEXT: vabdl.s16 q8, d17, d16
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: vmov r2, r3, d17
+; CHECK-NEXT: mov pc, lr
%tmp1 = load <4 x i16>, ptr %A
%tmp2 = load <4 x i16>, ptr %B
%tmp3 = call <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
@@ -167,8 +257,14 @@ define <4 x i32> @vabdls16(ptr %A, ptr %B) nounwind {
}
define <2 x i64> @vabdls32(ptr %A, ptr %B) nounwind {
-;CHECK-LABEL: vabdls32:
-;CHECK: vabdl.s32
+; CHECK-LABEL: vabdls32:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vldr d16, [r1]
+; CHECK-NEXT: vldr d17, [r0]
+; CHECK-NEXT: vabdl.s32 q8, d17, d16
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: vmov r2, r3, d17
+; CHECK-NEXT: mov pc, lr
%tmp1 = load <2 x i32>, ptr %A
%tmp2 = load <2 x i32>, ptr %B
%tmp3 = call <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
@@ -177,8 +273,14 @@ define <2 x i64> @vabdls32(ptr %A, ptr %B) nounwind {
}
define <8 x i16> @vabdlu8(ptr %A, ptr %B) nounwind {
-;CHECK-LABEL: vabdlu8:
-;CHECK: vabdl.u8
+; CHECK-LABEL: vabdlu8:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vldr d16, [r1]
+; CHECK-NEXT: vldr d17, [r0]
+; CHECK-NEXT: vabdl.u8 q8, d17, d16
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: vmov r2, r3, d17
+; CHECK-NEXT: mov pc, lr
%tmp1 = load <8 x i8>, ptr %A
%tmp2 = load <8 x i8>, ptr %B
%tmp3 = call <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
@@ -187,8 +289,14 @@ define <8 x i16> @vabdlu8(ptr %A, ptr %B) nounwind {
}
define <4 x i32> @vabdlu16(ptr %A, ptr %B) nounwind {
-;CHECK-LABEL: vabdlu16:
-;CHECK: vabdl.u16
+; CHECK-LABEL: vabdlu16:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vldr d16, [r1]
+; CHECK-NEXT: vldr d17, [r0]
+; CHECK-NEXT: vabdl.u16 q8, d17, d16
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: vmov r2, r3, d17
+; CHECK-NEXT: mov pc, lr
%tmp1 = load <4 x i16>, ptr %A
%tmp2 = load <4 x i16>, ptr %B
%tmp3 = call <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
@@ -197,8 +305,14 @@ define <4 x i32> @vabdlu16(ptr %A, ptr %B) nounwind {
}
define <2 x i64> @vabdlu32(ptr %A, ptr %B) nounwind {
-;CHECK-LABEL: vabdlu32:
-;CHECK: vabdl.u32
+; CHECK-LABEL: vabdlu32:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vldr d16, [r1]
+; CHECK-NEXT: vldr d17, [r0]
+; CHECK-NEXT: vabdl.u32 q8, d17, d16
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: vmov r2, r3, d17
+; CHECK-NEXT: mov pc, lr
%tmp1 = load <2 x i32>, ptr %A
%tmp2 = load <2 x i32>, ptr %B
%tmp3 = call <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
diff --git a/llvm/test/CodeGen/LoongArch/emutls.ll b/llvm/test/CodeGen/LoongArch/emutls.ll
index 19b6b887f6f0..88007a86e795 100644
--- a/llvm/test/CodeGen/LoongArch/emutls.ll
+++ b/llvm/test/CodeGen/LoongArch/emutls.ll
@@ -1,9 +1,9 @@
; RUN: not llc --mtriple=loongarch64 -emulated-tls -mattr=+d \
; RUN: -relocation-model=pic < %s 2>&1 | FileCheck %s
-; CHECK: error: the emulated TLS is prohibited.
-; CHECK: error: the emulated TLS is prohibited.
-; CHECK: error: the emulated TLS is prohibited.
+; CHECK: error: the emulated TLS is prohibited
+; CHECK: error: the emulated TLS is prohibited
+; CHECK: error: the emulated TLS is prohibited
@external_x = external thread_local global i32, align 8
@y = thread_local global i8 7, align 2
diff --git a/llvm/test/CodeGen/LoongArch/fp16-promote.ll b/llvm/test/CodeGen/LoongArch/fp16-promote.ll
new file mode 100644
index 000000000000..75f920b43a06
--- /dev/null
+++ b/llvm/test/CodeGen/LoongArch/fp16-promote.ll
@@ -0,0 +1,326 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc --mtriple=loongarch32 --mattr=+d < %s | FileCheck %s --check-prefix=LA32
+; RUN: llc --mtriple=loongarch64 --mattr=+d < %s | FileCheck %s --check-prefix=LA64
+
+define void @test_load_store(ptr %p, ptr %q) nounwind {
+; LA32-LABEL: test_load_store:
+; LA32: # %bb.0:
+; LA32-NEXT: ld.h $a0, $a0, 0
+; LA32-NEXT: st.h $a0, $a1, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: test_load_store:
+; LA64: # %bb.0:
+; LA64-NEXT: ld.h $a0, $a0, 0
+; LA64-NEXT: st.h $a0, $a1, 0
+; LA64-NEXT: ret
+ %a = load half, ptr %p
+ store half %a, ptr %q
+ ret void
+}
+
+define float @test_fpextend_float(ptr %p) nounwind {
+; LA32-LABEL: test_fpextend_float:
+; LA32: # %bb.0:
+; LA32-NEXT: ld.hu $a0, $a0, 0
+; LA32-NEXT: b %plt(__gnu_h2f_ieee)
+;
+; LA64-LABEL: test_fpextend_float:
+; LA64: # %bb.0:
+; LA64-NEXT: ld.hu $a0, $a0, 0
+; LA64-NEXT: b %plt(__gnu_h2f_ieee)
+ %a = load half, ptr %p
+ %r = fpext half %a to float
+ ret float %r
+}
+
+define double @test_fpextend_double(ptr %p) nounwind {
+; LA32-LABEL: test_fpextend_double:
+; LA32: # %bb.0:
+; LA32-NEXT: addi.w $sp, $sp, -16
+; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NEXT: ld.hu $a0, $a0, 0
+; LA32-NEXT: bl %plt(__gnu_h2f_ieee)
+; LA32-NEXT: fcvt.d.s $fa0, $fa0
+; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NEXT: addi.w $sp, $sp, 16
+; LA32-NEXT: ret
+;
+; LA64-LABEL: test_fpextend_double:
+; LA64: # %bb.0:
+; LA64-NEXT: addi.d $sp, $sp, -16
+; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NEXT: ld.hu $a0, $a0, 0
+; LA64-NEXT: bl %plt(__gnu_h2f_ieee)
+; LA64-NEXT: fcvt.d.s $fa0, $fa0
+; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NEXT: addi.d $sp, $sp, 16
+; LA64-NEXT: ret
+ %a = load half, ptr %p
+ %r = fpext half %a to double
+ ret double %r
+}
+
+define void @test_fptrunc_float(float %f, ptr %p) nounwind {
+; LA32-LABEL: test_fptrunc_float:
+; LA32: # %bb.0:
+; LA32-NEXT: addi.w $sp, $sp, -16
+; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NEXT: st.w $fp, $sp, 8 # 4-byte Folded Spill
+; LA32-NEXT: move $fp, $a0
+; LA32-NEXT: bl %plt(__gnu_f2h_ieee)
+; LA32-NEXT: st.h $a0, $fp, 0
+; LA32-NEXT: ld.w $fp, $sp, 8 # 4-byte Folded Reload
+; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NEXT: addi.w $sp, $sp, 16
+; LA32-NEXT: ret
+;
+; LA64-LABEL: test_fptrunc_float:
+; LA64: # %bb.0:
+; LA64-NEXT: addi.d $sp, $sp, -16
+; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NEXT: st.d $fp, $sp, 0 # 8-byte Folded Spill
+; LA64-NEXT: move $fp, $a0
+; LA64-NEXT: bl %plt(__gnu_f2h_ieee)
+; LA64-NEXT: st.h $a0, $fp, 0
+; LA64-NEXT: ld.d $fp, $sp, 0 # 8-byte Folded Reload
+; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NEXT: addi.d $sp, $sp, 16
+; LA64-NEXT: ret
+ %a = fptrunc float %f to half
+ store half %a, ptr %p
+ ret void
+}
+
+define void @test_fptrunc_double(double %d, ptr %p) nounwind {
+; LA32-LABEL: test_fptrunc_double:
+; LA32: # %bb.0:
+; LA32-NEXT: addi.w $sp, $sp, -16
+; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NEXT: st.w $fp, $sp, 8 # 4-byte Folded Spill
+; LA32-NEXT: move $fp, $a0
+; LA32-NEXT: bl %plt(__truncdfhf2)
+; LA32-NEXT: st.h $a0, $fp, 0
+; LA32-NEXT: ld.w $fp, $sp, 8 # 4-byte Folded Reload
+; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NEXT: addi.w $sp, $sp, 16
+; LA32-NEXT: ret
+;
+; LA64-LABEL: test_fptrunc_double:
+; LA64: # %bb.0:
+; LA64-NEXT: addi.d $sp, $sp, -16
+; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NEXT: st.d $fp, $sp, 0 # 8-byte Folded Spill
+; LA64-NEXT: move $fp, $a0
+; LA64-NEXT: bl %plt(__truncdfhf2)
+; LA64-NEXT: st.h $a0, $fp, 0
+; LA64-NEXT: ld.d $fp, $sp, 0 # 8-byte Folded Reload
+; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NEXT: addi.d $sp, $sp, 16
+; LA64-NEXT: ret
+ %a = fptrunc double %d to half
+ store half %a, ptr %p
+ ret void
+}
+
+define half @test_fadd_reg(half %a, half %b) nounwind {
+; LA32-LABEL: test_fadd_reg:
+; LA32: # %bb.0:
+; LA32-NEXT: addi.w $sp, $sp, -32
+; LA32-NEXT: st.w $ra, $sp, 28 # 4-byte Folded Spill
+; LA32-NEXT: fst.d $fs0, $sp, 16 # 8-byte Folded Spill
+; LA32-NEXT: fst.d $fs1, $sp, 8 # 8-byte Folded Spill
+; LA32-NEXT: fmov.s $fs0, $fa0
+; LA32-NEXT: fmov.s $fa0, $fa1
+; LA32-NEXT: bl %plt(__gnu_f2h_ieee)
+; LA32-NEXT: bl %plt(__gnu_h2f_ieee)
+; LA32-NEXT: fmov.s $fs1, $fa0
+; LA32-NEXT: fmov.s $fa0, $fs0
+; LA32-NEXT: bl %plt(__gnu_f2h_ieee)
+; LA32-NEXT: bl %plt(__gnu_h2f_ieee)
+; LA32-NEXT: fadd.s $fa0, $fa0, $fs1
+; LA32-NEXT: fld.d $fs1, $sp, 8 # 8-byte Folded Reload
+; LA32-NEXT: fld.d $fs0, $sp, 16 # 8-byte Folded Reload
+; LA32-NEXT: ld.w $ra, $sp, 28 # 4-byte Folded Reload
+; LA32-NEXT: addi.w $sp, $sp, 32
+; LA32-NEXT: ret
+;
+; LA64-LABEL: test_fadd_reg:
+; LA64: # %bb.0:
+; LA64-NEXT: addi.d $sp, $sp, -32
+; LA64-NEXT: st.d $ra, $sp, 24 # 8-byte Folded Spill
+; LA64-NEXT: fst.d $fs0, $sp, 16 # 8-byte Folded Spill
+; LA64-NEXT: fst.d $fs1, $sp, 8 # 8-byte Folded Spill
+; LA64-NEXT: fmov.s $fs0, $fa0
+; LA64-NEXT: fmov.s $fa0, $fa1
+; LA64-NEXT: bl %plt(__gnu_f2h_ieee)
+; LA64-NEXT: bl %plt(__gnu_h2f_ieee)
+; LA64-NEXT: fmov.s $fs1, $fa0
+; LA64-NEXT: fmov.s $fa0, $fs0
+; LA64-NEXT: bl %plt(__gnu_f2h_ieee)
+; LA64-NEXT: bl %plt(__gnu_h2f_ieee)
+; LA64-NEXT: fadd.s $fa0, $fa0, $fs1
+; LA64-NEXT: fld.d $fs1, $sp, 8 # 8-byte Folded Reload
+; LA64-NEXT: fld.d $fs0, $sp, 16 # 8-byte Folded Reload
+; LA64-NEXT: ld.d $ra, $sp, 24 # 8-byte Folded Reload
+; LA64-NEXT: addi.d $sp, $sp, 32
+; LA64-NEXT: ret
+ %r = fadd half %a, %b
+ ret half %r
+}
+
+define void @test_fadd_mem(ptr %p, ptr %q) nounwind {
+; LA32-LABEL: test_fadd_mem:
+; LA32: # %bb.0:
+; LA32-NEXT: addi.w $sp, $sp, -32
+; LA32-NEXT: st.w $ra, $sp, 28 # 4-byte Folded Spill
+; LA32-NEXT: st.w $fp, $sp, 24 # 4-byte Folded Spill
+; LA32-NEXT: st.w $s0, $sp, 20 # 4-byte Folded Spill
+; LA32-NEXT: fst.d $fs0, $sp, 8 # 8-byte Folded Spill
+; LA32-NEXT: move $fp, $a1
+; LA32-NEXT: move $s0, $a0
+; LA32-NEXT: ld.hu $a0, $a0, 0
+; LA32-NEXT: bl %plt(__gnu_h2f_ieee)
+; LA32-NEXT: fmov.s $fs0, $fa0
+; LA32-NEXT: ld.hu $a0, $fp, 0
+; LA32-NEXT: bl %plt(__gnu_h2f_ieee)
+; LA32-NEXT: fadd.s $fa0, $fs0, $fa0
+; LA32-NEXT: bl %plt(__gnu_f2h_ieee)
+; LA32-NEXT: st.h $a0, $s0, 0
+; LA32-NEXT: fld.d $fs0, $sp, 8 # 8-byte Folded Reload
+; LA32-NEXT: ld.w $s0, $sp, 20 # 4-byte Folded Reload
+; LA32-NEXT: ld.w $fp, $sp, 24 # 4-byte Folded Reload
+; LA32-NEXT: ld.w $ra, $sp, 28 # 4-byte Folded Reload
+; LA32-NEXT: addi.w $sp, $sp, 32
+; LA32-NEXT: ret
+;
+; LA64-LABEL: test_fadd_mem:
+; LA64: # %bb.0:
+; LA64-NEXT: addi.d $sp, $sp, -32
+; LA64-NEXT: st.d $ra, $sp, 24 # 8-byte Folded Spill
+; LA64-NEXT: st.d $fp, $sp, 16 # 8-byte Folded Spill
+; LA64-NEXT: st.d $s0, $sp, 8 # 8-byte Folded Spill
+; LA64-NEXT: fst.d $fs0, $sp, 0 # 8-byte Folded Spill
+; LA64-NEXT: move $fp, $a1
+; LA64-NEXT: move $s0, $a0
+; LA64-NEXT: ld.hu $a0, $a0, 0
+; LA64-NEXT: bl %plt(__gnu_h2f_ieee)
+; LA64-NEXT: fmov.s $fs0, $fa0
+; LA64-NEXT: ld.hu $a0, $fp, 0
+; LA64-NEXT: bl %plt(__gnu_h2f_ieee)
+; LA64-NEXT: fadd.s $fa0, $fs0, $fa0
+; LA64-NEXT: bl %plt(__gnu_f2h_ieee)
+; LA64-NEXT: st.h $a0, $s0, 0
+; LA64-NEXT: fld.d $fs0, $sp, 0 # 8-byte Folded Reload
+; LA64-NEXT: ld.d $s0, $sp, 8 # 8-byte Folded Reload
+; LA64-NEXT: ld.d $fp, $sp, 16 # 8-byte Folded Reload
+; LA64-NEXT: ld.d $ra, $sp, 24 # 8-byte Folded Reload
+; LA64-NEXT: addi.d $sp, $sp, 32
+; LA64-NEXT: ret
+ %a = load half, ptr %p
+ %b = load half, ptr %q
+ %r = fadd half %a, %b
+ store half %r, ptr %p
+ ret void
+}
+
+define half @test_fmul_reg(half %a, half %b) nounwind {
+; LA32-LABEL: test_fmul_reg:
+; LA32: # %bb.0:
+; LA32-NEXT: addi.w $sp, $sp, -32
+; LA32-NEXT: st.w $ra, $sp, 28 # 4-byte Folded Spill
+; LA32-NEXT: fst.d $fs0, $sp, 16 # 8-byte Folded Spill
+; LA32-NEXT: fst.d $fs1, $sp, 8 # 8-byte Folded Spill
+; LA32-NEXT: fmov.s $fs0, $fa0
+; LA32-NEXT: fmov.s $fa0, $fa1
+; LA32-NEXT: bl %plt(__gnu_f2h_ieee)
+; LA32-NEXT: bl %plt(__gnu_h2f_ieee)
+; LA32-NEXT: fmov.s $fs1, $fa0
+; LA32-NEXT: fmov.s $fa0, $fs0
+; LA32-NEXT: bl %plt(__gnu_f2h_ieee)
+; LA32-NEXT: bl %plt(__gnu_h2f_ieee)
+; LA32-NEXT: fmul.s $fa0, $fa0, $fs1
+; LA32-NEXT: fld.d $fs1, $sp, 8 # 8-byte Folded Reload
+; LA32-NEXT: fld.d $fs0, $sp, 16 # 8-byte Folded Reload
+; LA32-NEXT: ld.w $ra, $sp, 28 # 4-byte Folded Reload
+; LA32-NEXT: addi.w $sp, $sp, 32
+; LA32-NEXT: ret
+;
+; LA64-LABEL: test_fmul_reg:
+; LA64: # %bb.0:
+; LA64-NEXT: addi.d $sp, $sp, -32
+; LA64-NEXT: st.d $ra, $sp, 24 # 8-byte Folded Spill
+; LA64-NEXT: fst.d $fs0, $sp, 16 # 8-byte Folded Spill
+; LA64-NEXT: fst.d $fs1, $sp, 8 # 8-byte Folded Spill
+; LA64-NEXT: fmov.s $fs0, $fa0
+; LA64-NEXT: fmov.s $fa0, $fa1
+; LA64-NEXT: bl %plt(__gnu_f2h_ieee)
+; LA64-NEXT: bl %plt(__gnu_h2f_ieee)
+; LA64-NEXT: fmov.s $fs1, $fa0
+; LA64-NEXT: fmov.s $fa0, $fs0
+; LA64-NEXT: bl %plt(__gnu_f2h_ieee)
+; LA64-NEXT: bl %plt(__gnu_h2f_ieee)
+; LA64-NEXT: fmul.s $fa0, $fa0, $fs1
+; LA64-NEXT: fld.d $fs1, $sp, 8 # 8-byte Folded Reload
+; LA64-NEXT: fld.d $fs0, $sp, 16 # 8-byte Folded Reload
+; LA64-NEXT: ld.d $ra, $sp, 24 # 8-byte Folded Reload
+; LA64-NEXT: addi.d $sp, $sp, 32
+; LA64-NEXT: ret
+ %r = fmul half %a, %b
+ ret half %r
+}
+
+define void @test_fmul_mem(ptr %p, ptr %q) nounwind {
+; LA32-LABEL: test_fmul_mem:
+; LA32: # %bb.0:
+; LA32-NEXT: addi.w $sp, $sp, -32
+; LA32-NEXT: st.w $ra, $sp, 28 # 4-byte Folded Spill
+; LA32-NEXT: st.w $fp, $sp, 24 # 4-byte Folded Spill
+; LA32-NEXT: st.w $s0, $sp, 20 # 4-byte Folded Spill
+; LA32-NEXT: fst.d $fs0, $sp, 8 # 8-byte Folded Spill
+; LA32-NEXT: move $fp, $a1
+; LA32-NEXT: move $s0, $a0
+; LA32-NEXT: ld.hu $a0, $a0, 0
+; LA32-NEXT: bl %plt(__gnu_h2f_ieee)
+; LA32-NEXT: fmov.s $fs0, $fa0
+; LA32-NEXT: ld.hu $a0, $fp, 0
+; LA32-NEXT: bl %plt(__gnu_h2f_ieee)
+; LA32-NEXT: fmul.s $fa0, $fs0, $fa0
+; LA32-NEXT: bl %plt(__gnu_f2h_ieee)
+; LA32-NEXT: st.h $a0, $s0, 0
+; LA32-NEXT: fld.d $fs0, $sp, 8 # 8-byte Folded Reload
+; LA32-NEXT: ld.w $s0, $sp, 20 # 4-byte Folded Reload
+; LA32-NEXT: ld.w $fp, $sp, 24 # 4-byte Folded Reload
+; LA32-NEXT: ld.w $ra, $sp, 28 # 4-byte Folded Reload
+; LA32-NEXT: addi.w $sp, $sp, 32
+; LA32-NEXT: ret
+;
+; LA64-LABEL: test_fmul_mem:
+; LA64: # %bb.0:
+; LA64-NEXT: addi.d $sp, $sp, -32
+; LA64-NEXT: st.d $ra, $sp, 24 # 8-byte Folded Spill
+; LA64-NEXT: st.d $fp, $sp, 16 # 8-byte Folded Spill
+; LA64-NEXT: st.d $s0, $sp, 8 # 8-byte Folded Spill
+; LA64-NEXT: fst.d $fs0, $sp, 0 # 8-byte Folded Spill
+; LA64-NEXT: move $fp, $a1
+; LA64-NEXT: move $s0, $a0
+; LA64-NEXT: ld.hu $a0, $a0, 0
+; LA64-NEXT: bl %plt(__gnu_h2f_ieee)
+; LA64-NEXT: fmov.s $fs0, $fa0
+; LA64-NEXT: ld.hu $a0, $fp, 0
+; LA64-NEXT: bl %plt(__gnu_h2f_ieee)
+; LA64-NEXT: fmul.s $fa0, $fs0, $fa0
+; LA64-NEXT: bl %plt(__gnu_f2h_ieee)
+; LA64-NEXT: st.h $a0, $s0, 0
+; LA64-NEXT: fld.d $fs0, $sp, 0 # 8-byte Folded Reload
+; LA64-NEXT: ld.d $s0, $sp, 8 # 8-byte Folded Reload
+; LA64-NEXT: ld.d $fp, $sp, 16 # 8-byte Folded Reload
+; LA64-NEXT: ld.d $ra, $sp, 24 # 8-byte Folded Reload
+; LA64-NEXT: addi.d $sp, $sp, 32
+; LA64-NEXT: ret
+ %a = load half, ptr %p
+ %b = load half, ptr %q
+ %r = fmul half %a, %b
+ store half %r, ptr %p
+ ret void
+}
diff --git a/llvm/test/CodeGen/NVPTX/intr-range.ll b/llvm/test/CodeGen/NVPTX/intr-range.ll
new file mode 100644
index 000000000000..2f3e08a039f5
--- /dev/null
+++ b/llvm/test/CodeGen/NVPTX/intr-range.ll
@@ -0,0 +1,88 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-attributes --version 5
+; RUN: opt < %s -S -mtriple=nvptx-nvidia-cuda -mcpu=sm_20 -passes=nvvm-intr-range | FileCheck %s
+
+define i32 @test_maxntid() {
+; CHECK-LABEL: define i32 @test_maxntid(
+; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: [[TMP1:%.*]] = call range(i32 0, 96) i32 @llvm.nvvm.read.ptx.sreg.tid.x()
+; CHECK-NEXT: [[TMP3:%.*]] = call range(i32 0, 96) i32 @llvm.nvvm.read.ptx.sreg.tid.y()
+; CHECK-NEXT: [[TMP2:%.*]] = call range(i32 0, 64) i32 @llvm.nvvm.read.ptx.sreg.tid.z()
+; CHECK-NEXT: [[TMP11:%.*]] = call range(i32 1, 97) i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
+; CHECK-NEXT: [[TMP4:%.*]] = call range(i32 1, 97) i32 @llvm.nvvm.read.ptx.sreg.ntid.y()
+; CHECK-NEXT: [[TMP6:%.*]] = call range(i32 1, 65) i32 @llvm.nvvm.read.ptx.sreg.ntid.z()
+; CHECK-NEXT: [[TMP7:%.*]] = add i32 [[TMP1]], [[TMP3]]
+; CHECK-NEXT: [[TMP8:%.*]] = add i32 [[TMP7]], [[TMP2]]
+; CHECK-NEXT: [[TMP9:%.*]] = add i32 [[TMP8]], [[TMP11]]
+; CHECK-NEXT: [[TMP10:%.*]] = add i32 [[TMP9]], [[TMP4]]
+; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[TMP10]], [[TMP6]]
+; CHECK-NEXT: ret i32 [[TMP5]]
+;
+ %1 = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
+ %2 = call i32 @llvm.nvvm.read.ptx.sreg.tid.y()
+ %3 = call i32 @llvm.nvvm.read.ptx.sreg.tid.z()
+ %4 = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
+ %5 = call i32 @llvm.nvvm.read.ptx.sreg.ntid.y()
+ %6 = call i32 @llvm.nvvm.read.ptx.sreg.ntid.z()
+ %7 = add i32 %1, %2
+ %8 = add i32 %7, %3
+ %9 = add i32 %8, %4
+ %10 = add i32 %9, %5
+ %11 = add i32 %10, %6
+ ret i32 %11
+}
+
+define i32 @test_reqntid() {
+; CHECK-LABEL: define i32 @test_reqntid(
+; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = call range(i32 0, 20) i32 @llvm.nvvm.read.ptx.sreg.tid.x()
+; CHECK-NEXT: [[TMP5:%.*]] = call range(i32 0, 20) i32 @llvm.nvvm.read.ptx.sreg.tid.y()
+; CHECK-NEXT: [[TMP2:%.*]] = call range(i32 0, 20) i32 @llvm.nvvm.read.ptx.sreg.tid.z()
+; CHECK-NEXT: [[TMP4:%.*]] = call range(i32 1, 21) i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
+; CHECK-NEXT: [[TMP3:%.*]] = call range(i32 1, 21) i32 @llvm.nvvm.read.ptx.sreg.ntid.y()
+; CHECK-NEXT: [[TMP6:%.*]] = call range(i32 1, 21) i32 @llvm.nvvm.read.ptx.sreg.ntid.z()
+; CHECK-NEXT: [[TMP7:%.*]] = add i32 [[TMP1]], [[TMP5]]
+; CHECK-NEXT: [[TMP8:%.*]] = add i32 [[TMP7]], [[TMP2]]
+; CHECK-NEXT: [[TMP9:%.*]] = add i32 [[TMP8]], [[TMP4]]
+; CHECK-NEXT: [[TMP10:%.*]] = add i32 [[TMP9]], [[TMP3]]
+; CHECK-NEXT: [[TMP11:%.*]] = add i32 [[TMP10]], [[TMP6]]
+; CHECK-NEXT: ret i32 [[TMP3]]
+;
+ %1 = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
+ %2 = call i32 @llvm.nvvm.read.ptx.sreg.tid.y()
+ %3 = call i32 @llvm.nvvm.read.ptx.sreg.tid.z()
+ %4 = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
+ %5 = call i32 @llvm.nvvm.read.ptx.sreg.ntid.y()
+ %6 = call i32 @llvm.nvvm.read.ptx.sreg.ntid.z()
+ %7 = add i32 %1, %2
+ %8 = add i32 %7, %3
+ %9 = add i32 %8, %4
+ %10 = add i32 %9, %5
+ %11 = add i32 %10, %6
+ ret i32 %5
+}
+
+;; A case like this could occur if a function with the sreg intrinsic was
+;; inlined into a kernel where the tid metadata is present, ensure the range is
+;; updated.
+define i32 @test_inlined() {
+; CHECK-LABEL: define i32 @test_inlined(
+; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = call range(i32 0, 4) i32 @llvm.nvvm.read.ptx.sreg.tid.x()
+; CHECK-NEXT: ret i32 [[TMP1]]
+;
+ %1 = call range(i32 0, 1024) i32 @llvm.nvvm.read.ptx.sreg.tid.x()
+ ret i32 %1
+}
+
+declare i32 @llvm.nvvm.read.ptx.sreg.tid.x()
+declare i32 @llvm.nvvm.read.ptx.sreg.tid.y()
+declare i32 @llvm.nvvm.read.ptx.sreg.tid.z()
+
+declare i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
+declare i32 @llvm.nvvm.read.ptx.sreg.ntid.y()
+declare i32 @llvm.nvvm.read.ptx.sreg.ntid.z()
+
+!nvvm.annotations = !{!0, !1, !2}
+!0 = !{ptr @test_maxntid, !"kernel", i32 1, !"maxntidx", i32 32, !"maxntidz", i32 3}
+!1 = !{ptr @test_reqntid, !"kernel", i32 1, !"reqntidx", i32 20}
+!2 = !{ptr @test_inlined, !"kernel", i32 1, !"maxntidx", i32 4}
diff --git a/llvm/test/CodeGen/NVPTX/intrinsic-old.ll b/llvm/test/CodeGen/NVPTX/intrinsic-old.ll
index 3930e6d77418..85f7817f08dc 100644
--- a/llvm/test/CodeGen/NVPTX/intrinsic-old.ll
+++ b/llvm/test/CodeGen/NVPTX/intrinsic-old.ll
@@ -1,21 +1,13 @@
; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck -allow-deprecated-dag-overlap %s
; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck -allow-deprecated-dag-overlap %s
; RUN: opt < %s -S -mtriple=nvptx-nvidia-cuda -passes=nvvm-intr-range \
-; RUN: | FileCheck -allow-deprecated-dag-overlap --check-prefix=RANGE --check-prefix=RANGE_20 %s
-; RUN: opt < %s -S -mtriple=nvptx-nvidia-cuda -passes=nvvm-intr-range \
-; RUN: | FileCheck -allow-deprecated-dag-overlap --check-prefix=RANGE --check-prefix=RANGE_20 %s
-; RUN: opt < %s -S -mtriple=nvptx-nvidia-cuda \
-; RUN: -passes=nvvm-intr-range -nvvm-intr-range-sm=30 \
-; RUN: | FileCheck -allow-deprecated-dag-overlap --check-prefix=RANGE --check-prefix=RANGE_30 %s
-; RUN: opt < %s -S -mtriple=nvptx-nvidia-cuda \
-; RUN: -passes=nvvm-intr-range -nvvm-intr-range-sm=30 \
-; RUN: | FileCheck -allow-deprecated-dag-overlap --check-prefix=RANGE --check-prefix=RANGE_30 %s
+; RUN: | FileCheck -allow-deprecated-dag-overlap --check-prefix=RANGE %s
; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %}
; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 | %ptxas-verify %}
define ptx_device i32 @test_tid_x() {
; CHECK: mov.u32 %r{{[0-9]+}}, %tid.x;
-; RANGE: call i32 @llvm.nvvm.read.ptx.sreg.tid.x(), !range ![[BLK_IDX_XY:[0-9]+]]
+; RANGE: call range(i32 0, 1024) i32 @llvm.nvvm.read.ptx.sreg.tid.x()
; CHECK: ret;
%x = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
ret i32 %x
@@ -23,7 +15,7 @@ define ptx_device i32 @test_tid_x() {
define ptx_device i32 @test_tid_y() {
; CHECK: mov.u32 %r{{[0-9]+}}, %tid.y;
-; RANGE: call i32 @llvm.nvvm.read.ptx.sreg.tid.y(), !range ![[BLK_IDX_XY]]
+; RANGE: call range(i32 0, 1024) i32 @llvm.nvvm.read.ptx.sreg.tid.y()
; CHECK: ret;
%x = call i32 @llvm.nvvm.read.ptx.sreg.tid.y()
ret i32 %x
@@ -31,7 +23,7 @@ define ptx_device i32 @test_tid_y() {
define ptx_device i32 @test_tid_z() {
; CHECK: mov.u32 %r{{[0-9]+}}, %tid.z;
-; RANGE: call i32 @llvm.nvvm.read.ptx.sreg.tid.z(), !range ![[BLK_IDX_Z:[0-9]+]]
+; RANGE: call range(i32 0, 64) i32 @llvm.nvvm.read.ptx.sreg.tid.z()
; CHECK: ret;
%x = call i32 @llvm.nvvm.read.ptx.sreg.tid.z()
ret i32 %x
@@ -46,7 +38,7 @@ define ptx_device i32 @test_tid_w() {
define ptx_device i32 @test_ntid_x() {
; CHECK: mov.u32 %r{{[0-9]+}}, %ntid.x;
-; RANGE: call i32 @llvm.nvvm.read.ptx.sreg.ntid.x(), !range ![[BLK_SIZE_XY:[0-9]+]]
+; RANGE: call range(i32 1, 1025) i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
; CHECK: ret;
%x = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
ret i32 %x
@@ -54,7 +46,7 @@ define ptx_device i32 @test_ntid_x() {
define ptx_device i32 @test_ntid_y() {
; CHECK: mov.u32 %r{{[0-9]+}}, %ntid.y;
-; RANGE: call i32 @llvm.nvvm.read.ptx.sreg.ntid.y(), !range ![[BLK_SIZE_XY]]
+; RANGE: call range(i32 1, 1025) i32 @llvm.nvvm.read.ptx.sreg.ntid.y()
; CHECK: ret;
%x = call i32 @llvm.nvvm.read.ptx.sreg.ntid.y()
ret i32 %x
@@ -62,7 +54,7 @@ define ptx_device i32 @test_ntid_y() {
define ptx_device i32 @test_ntid_z() {
; CHECK: mov.u32 %r{{[0-9]+}}, %ntid.z;
-; RANGE: call i32 @llvm.nvvm.read.ptx.sreg.ntid.z(), !range ![[BLK_SIZE_Z:[0-9]+]]
+; RANGE: call range(i32 1, 65) i32 @llvm.nvvm.read.ptx.sreg.ntid.z()
; CHECK: ret;
%x = call i32 @llvm.nvvm.read.ptx.sreg.ntid.z()
ret i32 %x
@@ -77,7 +69,7 @@ define ptx_device i32 @test_ntid_w() {
define ptx_device i32 @test_laneid() {
; CHECK: mov.u32 %r{{[0-9]+}}, %laneid;
-; RANGE: call i32 @llvm.nvvm.read.ptx.sreg.laneid(), !range ![[LANEID:[0-9]+]]
+; RANGE: call range(i32 0, 32) i32 @llvm.nvvm.read.ptx.sreg.laneid()
; CHECK: ret;
%x = call i32 @llvm.nvvm.read.ptx.sreg.laneid()
ret i32 %x
@@ -85,7 +77,7 @@ define ptx_device i32 @test_laneid() {
define ptx_device i32 @test_warpsize() {
; CHECK: mov.u32 %r{{[0-9]+}}, WARP_SZ;
-; RANGE: call i32 @llvm.nvvm.read.ptx.sreg.warpsize(), !range ![[WARPSIZE:[0-9]+]]
+; RANGE: call range(i32 32, 33) i32 @llvm.nvvm.read.ptx.sreg.warpsize()
; CHECK: ret;
%x = call i32 @llvm.nvvm.read.ptx.sreg.warpsize()
ret i32 %x
@@ -107,7 +99,7 @@ define ptx_device i32 @test_nwarpid() {
define ptx_device i32 @test_ctaid_y() {
; CHECK: mov.u32 %r{{[0-9]+}}, %ctaid.y;
-; RANGE: call i32 @llvm.nvvm.read.ptx.sreg.ctaid.y(), !range ![[GRID_IDX_YZ:[0-9]+]]
+; RANGE: call range(i32 0, 65535) i32 @llvm.nvvm.read.ptx.sreg.ctaid.y()
; CHECK: ret;
%x = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.y()
ret i32 %x
@@ -115,7 +107,7 @@ define ptx_device i32 @test_ctaid_y() {
define ptx_device i32 @test_ctaid_z() {
; CHECK: mov.u32 %r{{[0-9]+}}, %ctaid.z;
-; RANGE: call i32 @llvm.nvvm.read.ptx.sreg.ctaid.z(), !range ![[GRID_IDX_YZ]]
+; RANGE: call range(i32 0, 65535) i32 @llvm.nvvm.read.ptx.sreg.ctaid.z()
; CHECK: ret;
%x = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.z()
ret i32 %x
@@ -123,8 +115,7 @@ define ptx_device i32 @test_ctaid_z() {
define ptx_device i32 @test_ctaid_x() {
; CHECK: mov.u32 %r{{[0-9]+}}, %ctaid.x;
-; RANGE_30: call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x(), !range ![[GRID_IDX_X:[0-9]+]]
-; RANGE_20: call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x(), !range ![[GRID_IDX_YZ]]
+; RANGE: call range(i32 0, 2147483647) i32 @llvm.nvvm.read.ptx.sreg.ctaid.x()
; CHECK: ret;
%x = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x()
ret i32 %x
@@ -139,7 +130,7 @@ define ptx_device i32 @test_ctaid_w() {
define ptx_device i32 @test_nctaid_y() {
; CHECK: mov.u32 %r{{[0-9]+}}, %nctaid.y;
-; RANGE: call i32 @llvm.nvvm.read.ptx.sreg.nctaid.y(), !range ![[GRID_SIZE_YZ:[0-9]+]]
+; RANGE: call range(i32 1, 65536) i32 @llvm.nvvm.read.ptx.sreg.nctaid.y()
; CHECK: ret;
%x = call i32 @llvm.nvvm.read.ptx.sreg.nctaid.y()
ret i32 %x
@@ -147,7 +138,7 @@ define ptx_device i32 @test_nctaid_y() {
define ptx_device i32 @test_nctaid_z() {
; CHECK: mov.u32 %r{{[0-9]+}}, %nctaid.z;
-; RANGE: call i32 @llvm.nvvm.read.ptx.sreg.nctaid.z(), !range ![[GRID_SIZE_YZ]]
+; RANGE: call range(i32 1, 65536) i32 @llvm.nvvm.read.ptx.sreg.nctaid.z()
; CHECK: ret;
%x = call i32 @llvm.nvvm.read.ptx.sreg.nctaid.z()
ret i32 %x
@@ -155,8 +146,7 @@ define ptx_device i32 @test_nctaid_z() {
define ptx_device i32 @test_nctaid_x() {
; CHECK: mov.u32 %r{{[0-9]+}}, %nctaid.x;
-; RANGE_30: call i32 @llvm.nvvm.read.ptx.sreg.nctaid.x(), !range ![[GRID_SIZE_X:[0-9]+]]
-; RANGE_20: call i32 @llvm.nvvm.read.ptx.sreg.nctaid.x(), !range ![[GRID_SIZE_YZ]]
+; RANGE: call range(i32 1, -2147483648) i32 @llvm.nvvm.read.ptx.sreg.nctaid.x()
; CHECK: ret;
%x = call i32 @llvm.nvvm.read.ptx.sreg.nctaid.x()
ret i32 %x
@@ -327,14 +317,3 @@ declare void @llvm.nvvm.bar.sync(i32 %i)
!0 = !{i32 0, i32 19}
; RANGE-DAG: ![[ALREADY]] = !{i32 0, i32 19}
-; RANGE-DAG: ![[BLK_IDX_XY]] = !{i32 0, i32 1024}
-; RANGE-DAG: ![[BLK_IDX_XY]] = !{i32 0, i32 1024}
-; RANGE-DAG: ![[BLK_IDX_Z]] = !{i32 0, i32 64}
-; RANGE-DAG: ![[BLK_SIZE_XY]] = !{i32 1, i32 1025}
-; RANGE-DAG: ![[BLK_SIZE_Z]] = !{i32 1, i32 65}
-; RANGE-DAG: ![[LANEID]] = !{i32 0, i32 32}
-; RANGE-DAG: ![[WARPSIZE]] = !{i32 32, i32 33}
-; RANGE_30-DAG: ![[GRID_IDX_X]] = !{i32 0, i32 2147483647}
-; RANGE-DAG: ![[GRID_IDX_YZ]] = !{i32 0, i32 65535}
-; RANGE_30-DAG: ![[GRID_SIZE_X]] = !{i32 1, i32 -2147483648}
-; RANGE-DAG: ![[GRID_SIZE_YZ]] = !{i32 1, i32 65536}
diff --git a/llvm/test/CodeGen/PowerPC/toc-data-common.ll b/llvm/test/CodeGen/PowerPC/toc-data-common.ll
index 7747f2eecc93..3b7ca44f5bbb 100644
--- a/llvm/test/CodeGen/PowerPC/toc-data-common.ll
+++ b/llvm/test/CodeGen/PowerPC/toc-data-common.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple powerpc-ibm-aix-xcoff -verify-machineinstrs < %s | FileCheck %s -DINSTR=lwz --check-prefix=CHECK
-; RUN: llc -mtriple powerpc64-ibm-aix-xcoff -verify-machineinstrs < %s | FileCheck %s -DINSTR=ld --check-prefix=CHECK
+; RUN: llc -mtriple powerpc-ibm-aix-xcoff -verify-machineinstrs < %s | FileCheck %s --check-prefix=CHECK
+; RUN: llc -mtriple powerpc64-ibm-aix-xcoff -verify-machineinstrs < %s | FileCheck %s --check-prefix=CHECK-64
; RUN: llc -filetype=obj -mtriple powerpc-ibm-aix-xcoff -verify-machineinstrs < %s -o %t32.o
; RUN: llvm-objdump -t --symbol-description %t32.o | FileCheck %s --check-prefix=OBJ32
@@ -15,16 +15,28 @@
define void @set(i32 noundef %_a) {
; CHECK-LABEL: set:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: la 4, a2[TD](2)
-; CHECK-NEXT: la 5, a1[TD](2)
-; CHECK-NEXT: stw 3, 0(4)
-; CHECK-NEXT: [[INSTR]] 4, L..C0(2) # @a4
-; CHECK-NEXT: stw 3, 0(5)
-; CHECK-NEXT: [[INSTR]] 5, L..C1(2) # @a3
-; CHECK-NEXT: stw 3, 0(4)
-; CHECK-NEXT: stw 3, 0(5)
-; CHECK-NEXT: blr
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: la 4, a2[TD](2)
+; CHECK-NEXT: lwz 5, L..C0(2) # @a4
+; CHECK-NEXT: stw 3, 0(4)
+; CHECK-NEXT: la 4, a1[TD](2)
+; CHECK-NEXT: stw 3, 0(4)
+; CHECK-NEXT: lwz 4, L..C1(2) # @a3
+; CHECK-NEXT: stw 3, 0(5)
+; CHECK-NEXT: stw 3, 0(4)
+; CHECK-NEXT: blr
+;
+; CHECK-64-LABEL: set:
+; CHECK-64: # %bb.0: # %entry
+; CHECK-64-NEXT: la 4, a2[TD](2)
+; CHECK-64-NEXT: ld 5, L..C0(2) # @a4
+; CHECK-64-NEXT: stw 3, 0(4)
+; CHECK-64-NEXT: la 4, a1[TD](2)
+; CHECK-64-NEXT: stw 3, 0(4)
+; CHECK-64-NEXT: ld 4, L..C1(2) # @a3
+; CHECK-64-NEXT: stw 3, 0(5)
+; CHECK-64-NEXT: stw 3, 0(4)
+; CHECK-64-NEXT: blr
entry:
store i32 %_a, ptr @a2, align 4
store i32 %_a, ptr @a1, align 4
@@ -35,10 +47,16 @@ ret void
define i32 @get1() {
; CHECK-LABEL: get1:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: la 3, a2[TD](2)
-; CHECK-NEXT: lwz 3, 0(3)
-; CHECK-NEXT: blr
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: la 3, a2[TD](2)
+; CHECK-NEXT: lwz 3, 0(3)
+; CHECK-NEXT: blr
+;
+; CHECK-64-LABEL: get1:
+; CHECK-64: # %bb.0: # %entry
+; CHECK-64-NEXT: la 3, a2[TD](2)
+; CHECK-64-NEXT: lwz 3, 0(3)
+; CHECK-64-NEXT: blr
entry:
%0 = load i32, ptr @a2, align 4
ret i32 %0
@@ -46,10 +64,16 @@ ret i32 %0
define i32 @get2() {
; CHECK-LABEL: get2:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: la 3, a1[TD](2)
-; CHECK-NEXT: lwz 3, 0(3)
-; CHECK-NEXT: blr
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: la 3, a1[TD](2)
+; CHECK-NEXT: lwz 3, 0(3)
+; CHECK-NEXT: blr
+;
+; CHECK-64-LABEL: get2:
+; CHECK-64: # %bb.0: # %entry
+; CHECK-64-NEXT: la 3, a1[TD](2)
+; CHECK-64-NEXT: lwz 3, 0(3)
+; CHECK-64-NEXT: blr
entry:
%0 = load i32, ptr @a1, align 4
ret i32 %0
@@ -57,10 +81,16 @@ ret i32 %0
define i32 @get3() {
; CHECK-LABEL: get3:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: [[INSTR]] 3, L..C0(2) # @a4
-; CHECK-NEXT: lwz 3, 0(3)
-; CHECK-NEXT: blr
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: lwz 3, L..C0(2) # @a4
+; CHECK-NEXT: lwz 3, 0(3)
+; CHECK-NEXT: blr
+;
+; CHECK-64-LABEL: get3:
+; CHECK-64: # %bb.0: # %entry
+; CHECK-64-NEXT: ld 3, L..C0(2) # @a4
+; CHECK-64-NEXT: lwz 3, 0(3)
+; CHECK-64-NEXT: blr
entry:
%0 = load i32, ptr @a4, align 4
ret i32 %0
@@ -68,10 +98,16 @@ ret i32 %0
define i32 @get4() {
; CHECK-LABEL: get4:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: [[INSTR]] 3, L..C1(2) # @a3
-; CHECK-NEXT: lwz 3, 0(3)
-; CHECK-NEXT: blr
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: lwz 3, L..C1(2) # @a3
+; CHECK-NEXT: lwz 3, 0(3)
+; CHECK-NEXT: blr
+;
+; CHECK-64-LABEL: get4:
+; CHECK-64: # %bb.0: # %entry
+; CHECK-64-NEXT: ld 3, L..C1(2) # @a3
+; CHECK-64-NEXT: lwz 3, 0(3)
+; CHECK-64-NEXT: blr
entry:
%0 = load i32, ptr @a3, align 4
ret i32 %0
@@ -79,36 +115,56 @@ ret i32 %0
define nonnull ptr @escape1() {
; CHECK-LABEL: escape1:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: la 3, a2[TD](2)
-; CHECK-NEXT: blr
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: la 3, a2[TD](2)
+; CHECK-NEXT: blr
+;
+; CHECK-64-LABEL: escape1:
+; CHECK-64: # %bb.0: # %entry
+; CHECK-64-NEXT: la 3, a2[TD](2)
+; CHECK-64-NEXT: blr
entry:
ret ptr @a2
}
define nonnull ptr @escape2() {
; CHECK-LABEL: escape2:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: la 3, a1[TD](2)
-; CHECK-NEXT: blr
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: la 3, a1[TD](2)
+; CHECK-NEXT: blr
+;
+; CHECK-64-LABEL: escape2:
+; CHECK-64: # %bb.0: # %entry
+; CHECK-64-NEXT: la 3, a1[TD](2)
+; CHECK-64-NEXT: blr
entry:
ret ptr @a1
}
define nonnull ptr @escape3() {
; CHECK-LABEL: escape3:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: [[INSTR]] 3, L..C0(2) # @a4
-; CHECK-NEXT: blr
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: lwz 3, L..C0(2) # @a4
+; CHECK-NEXT: blr
+;
+; CHECK-64-LABEL: escape3:
+; CHECK-64: # %bb.0: # %entry
+; CHECK-64-NEXT: ld 3, L..C0(2) # @a4
+; CHECK-64-NEXT: blr
entry:
ret ptr @a4
}
define nonnull ptr @escape4() {
; CHECK-LABEL: escape4:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: [[INSTR]] 3, L..C1(2) # @a3
-; CHECK-NEXT: blr
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: lwz 3, L..C1(2) # @a3
+; CHECK-NEXT: blr
+;
+; CHECK-64-LABEL: escape4:
+; CHECK-64: # %bb.0: # %entry
+; CHECK-64-NEXT: ld 3, L..C1(2) # @a3
+; CHECK-64-NEXT: blr
entry:
ret ptr @a3
}
diff --git a/llvm/test/CodeGen/PowerPC/toc-data.ll b/llvm/test/CodeGen/PowerPC/toc-data.ll
index 12286657488d..ee1dde190bb2 100644
--- a/llvm/test/CodeGen/PowerPC/toc-data.ll
+++ b/llvm/test/CodeGen/PowerPC/toc-data.ll
@@ -36,7 +36,7 @@ define dso_local void @write_int(i32 signext %in) {
ret void
}
; CHECK32: name: write_int
-; CHECK32: %[[SCRATCH:[0-9]+]]:gprc_and_gprc_nor0 = ADDItoc @i, $r2
+; CHECK32: %[[SCRATCH:[0-9]+]]:gprc_and_gprc_nor0 = ADDItoc $r2, @i
; CHECK32-NEXT: STW %{{[0-9]+}}, 0, killed %[[SCRATCH]] :: (store (s32) into @i)
; TEST32: .write_int:
@@ -44,12 +44,12 @@ define dso_local void @write_int(i32 signext %in) {
; TEST32-NEXT: stw 3, 0(4)
; CHECK64: name: write_int
-; CHECK64: %[[SCRATCH:[0-9]+]]:g8rc_and_g8rc_nox0 = ADDItoc8 @i, $x2
+; CHECK64: %[[SCRATCH:[0-9]+]]:g8rc_and_g8rc_nox0 = ADDItoc8 $x2, @i
; CHECK64-NEXT: STW8 %{{[0-9]+}}, 0, killed %[[SCRATCH]] :: (store (s32) into @i)
; CHECK64-NOOPT: name: write_int
; CHECK64-NOOPT: %[[SUBREG:[0-9]+]]:gprc = COPY %{{[0-9]}}.sub_32
-; CHECK64-NOOPT: %[[ADDR:[0-9]+]]:g8rc_and_g8rc_nox0 = ADDItoc8 @i, $x2
+; CHECK64-NOOPT: %[[ADDR:[0-9]+]]:g8rc_and_g8rc_nox0 = ADDItoc8 $x2, @i
; CHECK64-NOOPT: STW %[[SUBREG]], 0, %[[ADDR]]
; TEST64: .write_int:
@@ -128,7 +128,7 @@ define dso_local float @read_float() {
ret float %0
}
; CHECK32: name: read_float
-; CHECK32: %[[SCRATCH:[0-9]+]]:gprc_and_gprc_nor0 = ADDItoc @f, $r2
+; CHECK32: %[[SCRATCH:[0-9]+]]:gprc_and_gprc_nor0 = ADDItoc $r2, @f
; CHECK32: %{{[0-9]+}}:f4rc = LFS 0, killed %[[SCRATCH]] :: (dereferenceable load (s32) from @f)
; TEST32: .read_float:
@@ -136,11 +136,11 @@ define dso_local float @read_float() {
; TEST32-NEXT: lfs 1, 0(3)
; CHECK64: name: read_float
-; CHECK64: %[[SCRATCH:[0-9]+]]:g8rc_and_g8rc_nox0 = ADDItoc8 @f, $x2
+; CHECK64: %[[SCRATCH:[0-9]+]]:g8rc_and_g8rc_nox0 = ADDItoc8 $x2, @f
; CHECK64: %{{[0-9]+}}:f4rc = LFS 0, killed %[[SCRATCH]] :: (dereferenceable load (s32) from @f)
; CHECK64-NOOPT: name: read_float
-; CHECK64-NOOPT: %[[SCRATCH:[0-9]+]]:g8rc_and_g8rc_nox0 = ADDItoc8 @f, $x2
+; CHECK64-NOOPT: %[[SCRATCH:[0-9]+]]:g8rc_and_g8rc_nox0 = ADDItoc8 $x2, @f
; CHECK64-NOOPT: %{{[0-9]+}}:f4rc = LFS 0, %[[SCRATCH]]
; TEST64: .read_float:
@@ -217,18 +217,18 @@ define dso_local nonnull ptr @addr() {
ret ptr @i
}
; CHECK32: name: addr
-; CHECK32: %[[SCRATCH:[0-9]+]]:gprc = ADDItoc @i, $r2
+; CHECK32: %[[SCRATCH:[0-9]+]]:gprc = ADDItoc $r2, @i
; CHECK32-NEXT: $r3 = COPY %[[SCRATCH]]
; TEST32: .addr
; TEST32: la 3, i[TD](2)
; CHECK64: name: addr
-; CHECK64: %[[SCRATCH:[0-9]+]]:g8rc = ADDItoc8 @i, $x2
+; CHECK64: %[[SCRATCH:[0-9]+]]:g8rc = ADDItoc8 $x2, @i
; CHECK64-NEXT: $x3 = COPY %[[SCRATCH]]
; CHECK64-NOOPT: name: addr
-; CHECK64-NOOPT: %[[SCRATCH:[0-9]+]]:g8rc_and_g8rc_nox0 = ADDItoc8 @i, $x2
+; CHECK64-NOOPT: %[[SCRATCH:[0-9]+]]:g8rc_and_g8rc_nox0 = ADDItoc8 $x2, @i
; CHECK64-NOOPT: $x3 = COPY %[[SCRATCH]]
; TEST64: .addr
diff --git a/llvm/test/CodeGen/PowerPC/tocdata-firm-alignment.ll b/llvm/test/CodeGen/PowerPC/tocdata-firm-alignment.ll
new file mode 100644
index 000000000000..c982713d4f8d
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/tocdata-firm-alignment.ll
@@ -0,0 +1,24 @@
+; RUN: opt -S -passes='default<O3>' < %s | FileCheck %s
+
+target datalayout = "E-m:a-p:32:32-Fi32-i64:64-n32"
+target triple = "powerpc-ibm-aix7.2.0.0"
+
+%struct.widget = type { i8, i8, i8 }
+
+; CHECK: @global = {{.*}}constant %struct.widget { i8 4, i8 0, i8 0 }, align 8 #0
+@global = constant %struct.widget { i8 4, i8 0, i8 0 }, align 4 #0
+
+define void @baz() #1 {
+bb:
+ call void @snork(ptr @global)
+ ret void
+}
+
+define void @snork(ptr byval(%struct.widget) %arg) #1 {
+bb:
+ %load = load volatile ptr, ptr null, align 4
+ ret void
+}
+
+attributes #0 = { "toc-data" }
+attributes #1 = { "target-cpu"="pwr7" "target-features"="+altivec,+bpermd,+extdiv,+isa-v206-instructions,+vsx,-aix-shared-lib-tls-model-opt,-aix-small-local-dynamic-tls,-aix-small-local-exec-tls,-crbits,-crypto,-direct-move,-htm,-isa-v207-instructions,-isa-v30-instructions,-power8-vector,-power9-vector,-privileged,-quadword-atomics,-rop-protect,-spe" }
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/merge-unmerge-rv32.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/merge-unmerge-rv32.mir
index 2e4a39c46811..46a7df449558 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/merge-unmerge-rv32.mir
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/merge-unmerge-rv32.mir
@@ -68,12 +68,14 @@ body: |
; RV32: liveins: $x10
; RV32-NEXT: {{ $}}
; RV32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10
- ; RV32-NEXT: $x10 = COPY [[COPY]](s32)
+ ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+ ; RV32-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]]
+ ; RV32-NEXT: $x10 = COPY [[AND]](s32)
; RV32-NEXT: PseudoRET implicit $x10
%0:_(s32) = COPY $x10
- %1:_(s64) = G_ZEXT %0(s32)
- %2:_(s32), %3:_(s32) = G_UNMERGE_VALUES %1(s64)
- $x10 = COPY %2(s32)
+ %2:_(s16), %3:_(s16) = G_UNMERGE_VALUES %0(s32)
+ %4:_(s32) = G_ZEXT %2(s16)
+ $x10 = COPY %4(s32)
PseudoRET implicit $x10
...
---
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-select-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-select-fp.ll
index 7a96aad31f08..d945cf561698 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-select-fp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-select-fp.ll
@@ -1,11 +1,11 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v,+experimental-zfbfmin,+experimental-zvfbfmin -target-abi=ilp32d \
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \
; RUN: -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v,+experimental-zfbfmin,+experimental-zvfbfmin -target-abi=lp64d \
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \
; RUN: -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+v,+m,+experimental-zfbfmin,+experimental-zvfbfmin -target-abi=ilp32d -riscv-v-vector-bits-min=128 \
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+v,+m -target-abi=ilp32d -riscv-v-vector-bits-min=128 \
; RUN: -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+v,+m,+experimental-zfbfmin,+experimental-zvfbfmin -target-abi=lp64d -riscv-v-vector-bits-min=128 \
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+v,+m -target-abi=lp64d -riscv-v-vector-bits-min=128 \
; RUN: -verify-machineinstrs < %s | FileCheck %s
define <2 x half> @select_v2f16(i1 zeroext %c, <2 x half> %a, <2 x half> %b) {
@@ -343,123 +343,3 @@ define <16 x double> @selectcc_v16f64(double %a, double %b, <16 x double> %c, <1
%v = select i1 %cmp, <16 x double> %c, <16 x double> %d
ret <16 x double> %v
}
-
-define <2 x bfloat> @select_v2bf16(i1 zeroext %c, <2 x bfloat> %a, <2 x bfloat> %b) {
-; CHECK-LABEL: select_v2bf16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
-; CHECK-NEXT: vmv.v.x v10, a0
-; CHECK-NEXT: vmsne.vi v0, v10, 0
-; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
-; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0
-; CHECK-NEXT: ret
- %v = select i1 %c, <2 x bfloat> %a, <2 x bfloat> %b
- ret <2 x bfloat> %v
-}
-
-define <2 x bfloat> @selectcc_v2bf16(bfloat %a, bfloat %b, <2 x bfloat> %c, <2 x bfloat> %d) {
-; CHECK-LABEL: selectcc_v2bf16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: fcvt.s.bf16 fa5, fa1
-; CHECK-NEXT: fcvt.s.bf16 fa4, fa0
-; CHECK-NEXT: feq.s a0, fa4, fa5
-; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
-; CHECK-NEXT: vmv.v.x v10, a0
-; CHECK-NEXT: vmsne.vi v0, v10, 0
-; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
-; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0
-; CHECK-NEXT: ret
- %cmp = fcmp oeq bfloat %a, %b
- %v = select i1 %cmp, <2 x bfloat> %c, <2 x bfloat> %d
- ret <2 x bfloat> %v
-}
-
-define <4 x bfloat> @select_v4bf16(i1 zeroext %c, <4 x bfloat> %a, <4 x bfloat> %b) {
-; CHECK-LABEL: select_v4bf16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
-; CHECK-NEXT: vmv.v.x v10, a0
-; CHECK-NEXT: vmsne.vi v0, v10, 0
-; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0
-; CHECK-NEXT: ret
- %v = select i1 %c, <4 x bfloat> %a, <4 x bfloat> %b
- ret <4 x bfloat> %v
-}
-
-define <4 x bfloat> @selectcc_v4bf16(bfloat %a, bfloat %b, <4 x bfloat> %c, <4 x bfloat> %d) {
-; CHECK-LABEL: selectcc_v4bf16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: fcvt.s.bf16 fa5, fa1
-; CHECK-NEXT: fcvt.s.bf16 fa4, fa0
-; CHECK-NEXT: feq.s a0, fa4, fa5
-; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
-; CHECK-NEXT: vmv.v.x v10, a0
-; CHECK-NEXT: vmsne.vi v0, v10, 0
-; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0
-; CHECK-NEXT: ret
- %cmp = fcmp oeq bfloat %a, %b
- %v = select i1 %cmp, <4 x bfloat> %c, <4 x bfloat> %d
- ret <4 x bfloat> %v
-}
-
-define <8 x bfloat> @select_v8bf16(i1 zeroext %c, <8 x bfloat> %a, <8 x bfloat> %b) {
-; CHECK-LABEL: select_v8bf16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
-; CHECK-NEXT: vmv.v.x v10, a0
-; CHECK-NEXT: vmsne.vi v0, v10, 0
-; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0
-; CHECK-NEXT: ret
- %v = select i1 %c, <8 x bfloat> %a, <8 x bfloat> %b
- ret <8 x bfloat> %v
-}
-
-define <8 x bfloat> @selectcc_v8bf16(bfloat %a, bfloat %b, <8 x bfloat> %c, <8 x bfloat> %d) {
-; CHECK-LABEL: selectcc_v8bf16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: fcvt.s.bf16 fa5, fa1
-; CHECK-NEXT: fcvt.s.bf16 fa4, fa0
-; CHECK-NEXT: feq.s a0, fa4, fa5
-; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
-; CHECK-NEXT: vmv.v.x v10, a0
-; CHECK-NEXT: vmsne.vi v0, v10, 0
-; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0
-; CHECK-NEXT: ret
- %cmp = fcmp oeq bfloat %a, %b
- %v = select i1 %cmp, <8 x bfloat> %c, <8 x bfloat> %d
- ret <8 x bfloat> %v
-}
-
-define <16 x bfloat> @select_v16bf16(i1 zeroext %c, <16 x bfloat> %a, <16 x bfloat> %b) {
-; CHECK-LABEL: select_v16bf16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
-; CHECK-NEXT: vmv.v.x v12, a0
-; CHECK-NEXT: vmsne.vi v0, v12, 0
-; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0
-; CHECK-NEXT: ret
- %v = select i1 %c, <16 x bfloat> %a, <16 x bfloat> %b
- ret <16 x bfloat> %v
-}
-
-define <16 x bfloat> @selectcc_v16bf16(bfloat %a, bfloat %b, <16 x bfloat> %c, <16 x bfloat> %d) {
-; CHECK-LABEL: selectcc_v16bf16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: fcvt.s.bf16 fa5, fa1
-; CHECK-NEXT: fcvt.s.bf16 fa4, fa0
-; CHECK-NEXT: feq.s a0, fa4, fa5
-; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
-; CHECK-NEXT: vmv.v.x v12, a0
-; CHECK-NEXT: vmsne.vi v0, v12, 0
-; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0
-; CHECK-NEXT: ret
- %cmp = fcmp oeq bfloat %a, %b
- %v = select i1 %cmp, <16 x bfloat> %c, <16 x bfloat> %d
- ret <16 x bfloat> %v
-}
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpmerge.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpmerge.ll
index d360c3f635b5..9f0561b394b8 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpmerge.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpmerge.ll
@@ -1,11 +1,11 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v,+m,+experimental-zfbfmin,+experimental-zvfbfmin -target-abi=ilp32d \
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v,+m -target-abi=ilp32d \
; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH,RV32
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v,+m,+experimental-zfbfmin,+experimental-zvfbfmin -target-abi=lp64d \
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v,+m -target-abi=lp64d \
; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH,RV64
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+v,+m,+experimental-zfbfmin,+experimental-zvfbfmin -target-abi=ilp32d \
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+v,+m -target-abi=ilp32d \
; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,RV32ZVFHMIN
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+v,+m,+experimental-zfbfmin,+experimental-zvfbfmin -target-abi=lp64d \
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+v,+m -target-abi=lp64d \
; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,RV64ZVFHMIN
declare <4 x i1> @llvm.vp.merge.v4i1(<4 x i1>, <4 x i1>, <4 x i1>, i32)
@@ -1240,139 +1240,3 @@ define <32 x double> @vpmerge_vf_v32f64(double %a, <32 x double> %vb, <32 x i1>
%v = call <32 x double> @llvm.vp.merge.v32f64(<32 x i1> %m, <32 x double> %va, <32 x double> %vb, i32 %evl)
ret <32 x double> %v
}
-
-declare <2 x bfloat> @llvm.vp.merge.v2bf16(<2 x i1>, <2 x bfloat>, <2 x bfloat>, i32)
-
-define <2 x bfloat> @vpmerge_vv_v2bf16(<2 x bfloat> %va, <2 x bfloat> %vb, <2 x i1> %m, i32 zeroext %evl) {
-; CHECK-LABEL: vpmerge_vv_v2bf16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, ma
-; CHECK-NEXT: vmerge.vvm v9, v9, v8, v0
-; CHECK-NEXT: vmv1r.v v8, v9
-; CHECK-NEXT: ret
- %v = call <2 x bfloat> @llvm.vp.merge.v2bf16(<2 x i1> %m, <2 x bfloat> %va, <2 x bfloat> %vb, i32 %evl)
- ret <2 x bfloat> %v
-}
-
-define <2 x bfloat> @vpmerge_vf_v2bf16(bfloat %a, <2 x bfloat> %vb, <2 x i1> %m, i32 zeroext %evl) {
-; ZVFH-LABEL: vpmerge_vf_v2bf16:
-; ZVFH: # %bb.0:
-; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, tu, ma
-; ZVFH-NEXT: vfmerge.vfm v8, v8, fa0, v0
-; ZVFH-NEXT: ret
-;
-; ZVFHMIN-LABEL: vpmerge_vf_v2bf16:
-; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: fcvt.s.bf16 fa5, fa0
-; ZVFHMIN-NEXT: vsetvli a1, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT: vfmv.v.f v9, fa5
-; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, tu, mu
-; ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v9, v0.t
-; ZVFHMIN-NEXT: ret
- %elt.head = insertelement <2 x bfloat> poison, bfloat %a, i32 0
- %va = shufflevector <2 x bfloat> %elt.head, <2 x bfloat> poison, <2 x i32> zeroinitializer
- %v = call <2 x bfloat> @llvm.vp.merge.v2bf16(<2 x i1> %m, <2 x bfloat> %va, <2 x bfloat> %vb, i32 %evl)
- ret <2 x bfloat> %v
-}
-
-declare <4 x bfloat> @llvm.vp.merge.v4bf16(<4 x i1>, <4 x bfloat>, <4 x bfloat>, i32)
-
-define <4 x bfloat> @vpmerge_vv_v4bf16(<4 x bfloat> %va, <4 x bfloat> %vb, <4 x i1> %m, i32 zeroext %evl) {
-; CHECK-LABEL: vpmerge_vv_v4bf16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, ma
-; CHECK-NEXT: vmerge.vvm v9, v9, v8, v0
-; CHECK-NEXT: vmv1r.v v8, v9
-; CHECK-NEXT: ret
- %v = call <4 x bfloat> @llvm.vp.merge.v4bf16(<4 x i1> %m, <4 x bfloat> %va, <4 x bfloat> %vb, i32 %evl)
- ret <4 x bfloat> %v
-}
-
-define <4 x bfloat> @vpmerge_vf_v4bf16(bfloat %a, <4 x bfloat> %vb, <4 x i1> %m, i32 zeroext %evl) {
-; ZVFH-LABEL: vpmerge_vf_v4bf16:
-; ZVFH: # %bb.0:
-; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, tu, ma
-; ZVFH-NEXT: vfmerge.vfm v8, v8, fa0, v0
-; ZVFH-NEXT: ret
-;
-; ZVFHMIN-LABEL: vpmerge_vf_v4bf16:
-; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: fcvt.s.bf16 fa5, fa0
-; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT: vfmv.v.f v9, fa5
-; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, tu, mu
-; ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v9, v0.t
-; ZVFHMIN-NEXT: ret
- %elt.head = insertelement <4 x bfloat> poison, bfloat %a, i32 0
- %va = shufflevector <4 x bfloat> %elt.head, <4 x bfloat> poison, <4 x i32> zeroinitializer
- %v = call <4 x bfloat> @llvm.vp.merge.v4bf16(<4 x i1> %m, <4 x bfloat> %va, <4 x bfloat> %vb, i32 %evl)
- ret <4 x bfloat> %v
-}
-
-declare <8 x bfloat> @llvm.vp.merge.v8bf16(<8 x i1>, <8 x bfloat>, <8 x bfloat>, i32)
-
-define <8 x bfloat> @vpmerge_vv_v8bf16(<8 x bfloat> %va, <8 x bfloat> %vb, <8 x i1> %m, i32 zeroext %evl) {
-; CHECK-LABEL: vpmerge_vv_v8bf16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, ma
-; CHECK-NEXT: vmerge.vvm v9, v9, v8, v0
-; CHECK-NEXT: vmv1r.v v8, v9
-; CHECK-NEXT: ret
- %v = call <8 x bfloat> @llvm.vp.merge.v8bf16(<8 x i1> %m, <8 x bfloat> %va, <8 x bfloat> %vb, i32 %evl)
- ret <8 x bfloat> %v
-}
-
-define <8 x bfloat> @vpmerge_vf_v8bf16(bfloat %a, <8 x bfloat> %vb, <8 x i1> %m, i32 zeroext %evl) {
-; ZVFH-LABEL: vpmerge_vf_v8bf16:
-; ZVFH: # %bb.0:
-; ZVFH-NEXT: vsetvli zero, a0, e16, m1, tu, ma
-; ZVFH-NEXT: vfmerge.vfm v8, v8, fa0, v0
-; ZVFH-NEXT: ret
-;
-; ZVFHMIN-LABEL: vpmerge_vf_v8bf16:
-; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: fcvt.s.bf16 fa5, fa0
-; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfmv.v.f v10, fa5
-; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, tu, mu
-; ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v10, v0.t
-; ZVFHMIN-NEXT: ret
- %elt.head = insertelement <8 x bfloat> poison, bfloat %a, i32 0
- %va = shufflevector <8 x bfloat> %elt.head, <8 x bfloat> poison, <8 x i32> zeroinitializer
- %v = call <8 x bfloat> @llvm.vp.merge.v8bf16(<8 x i1> %m, <8 x bfloat> %va, <8 x bfloat> %vb, i32 %evl)
- ret <8 x bfloat> %v
-}
-
-declare <16 x bfloat> @llvm.vp.merge.v16bf16(<16 x i1>, <16 x bfloat>, <16 x bfloat>, i32)
-
-define <16 x bfloat> @vpmerge_vv_v16bf16(<16 x bfloat> %va, <16 x bfloat> %vb, <16 x i1> %m, i32 zeroext %evl) {
-; CHECK-LABEL: vpmerge_vv_v16bf16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, ma
-; CHECK-NEXT: vmerge.vvm v10, v10, v8, v0
-; CHECK-NEXT: vmv2r.v v8, v10
-; CHECK-NEXT: ret
- %v = call <16 x bfloat> @llvm.vp.merge.v16bf16(<16 x i1> %m, <16 x bfloat> %va, <16 x bfloat> %vb, i32 %evl)
- ret <16 x bfloat> %v
-}
-
-define <16 x bfloat> @vpmerge_vf_v16bf16(bfloat %a, <16 x bfloat> %vb, <16 x i1> %m, i32 zeroext %evl) {
-; ZVFH-LABEL: vpmerge_vf_v16bf16:
-; ZVFH: # %bb.0:
-; ZVFH-NEXT: vsetvli zero, a0, e16, m2, tu, ma
-; ZVFH-NEXT: vfmerge.vfm v8, v8, fa0, v0
-; ZVFH-NEXT: ret
-;
-; ZVFHMIN-LABEL: vpmerge_vf_v16bf16:
-; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: fcvt.s.bf16 fa5, fa0
-; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfmv.v.f v12, fa5
-; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, tu, mu
-; ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v12, v0.t
-; ZVFHMIN-NEXT: ret
- %elt.head = insertelement <16 x bfloat> poison, bfloat %a, i32 0
- %va = shufflevector <16 x bfloat> %elt.head, <16 x bfloat> poison, <16 x i32> zeroinitializer
- %v = call <16 x bfloat> @llvm.vp.merge.v16bf16(<16 x i1> %m, <16 x bfloat> %va, <16 x bfloat> %vb, i32 %evl)
- ret <16 x bfloat> %v
-}
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect-vp.ll
index c5d9cdacae74..0a2ed3eb1ffb 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect-vp.ll
@@ -1,11 +1,11 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v,+m,+experimental-zfbfmin,+experimental-zvfbfmin -target-abi=ilp32d \
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v,+m -target-abi=ilp32d \
; RUN: -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v,+m,+experimental-zfbfmin,+experimental-zvfbfmin -target-abi=lp64d \
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v,+m -target-abi=lp64d \
; RUN: -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+v,+m,+experimental-zfbfmin,+experimental-zvfbfmin -target-abi=ilp32d -riscv-v-vector-bits-min=128 \
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+v,+m -target-abi=ilp32d -riscv-v-vector-bits-min=128 \
; RUN: -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+v,+m,+experimental-zfbfmin,+experimental-zvfbfmin -target-abi=lp64d -riscv-v-vector-bits-min=128 \
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+v,+m -target-abi=lp64d -riscv-v-vector-bits-min=128 \
; RUN: -verify-machineinstrs < %s | FileCheck %s
declare <1 x i1> @llvm.vp.select.v1i1(<1 x i1>, <1 x i1>, <1 x i1>, i32)
@@ -683,51 +683,3 @@ define <16 x double> @select_v16f64(<16 x i1> %a, <16 x double> %b, <16 x double
%v = call <16 x double> @llvm.vp.select.v16f64(<16 x i1> %a, <16 x double> %b, <16 x double> %c, i32 %evl)
ret <16 x double> %v
}
-
-declare <2 x bfloat> @llvm.vp.select.v2bf16(<2 x i1>, <2 x bfloat>, <2 x bfloat>, i32)
-
-define <2 x bfloat> @select_v2bf16(<2 x i1> %a, <2 x bfloat> %b, <2 x bfloat> %c, i32 zeroext %evl) {
-; CHECK-LABEL: select_v2bf16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
-; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0
-; CHECK-NEXT: ret
- %v = call <2 x bfloat> @llvm.vp.select.v2bf16(<2 x i1> %a, <2 x bfloat> %b, <2 x bfloat> %c, i32 %evl)
- ret <2 x bfloat> %v
-}
-
-declare <4 x bfloat> @llvm.vp.select.v4bf16(<4 x i1>, <4 x bfloat>, <4 x bfloat>, i32)
-
-define <4 x bfloat> @select_v4bf16(<4 x i1> %a, <4 x bfloat> %b, <4 x bfloat> %c, i32 zeroext %evl) {
-; CHECK-LABEL: select_v4bf16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
-; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0
-; CHECK-NEXT: ret
- %v = call <4 x bfloat> @llvm.vp.select.v4bf16(<4 x i1> %a, <4 x bfloat> %b, <4 x bfloat> %c, i32 %evl)
- ret <4 x bfloat> %v
-}
-
-declare <8 x bfloat> @llvm.vp.select.v8bf16(<8 x i1>, <8 x bfloat>, <8 x bfloat>, i32)
-
-define <8 x bfloat> @select_v8bf16(<8 x i1> %a, <8 x bfloat> %b, <8 x bfloat> %c, i32 zeroext %evl) {
-; CHECK-LABEL: select_v8bf16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
-; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0
-; CHECK-NEXT: ret
- %v = call <8 x bfloat> @llvm.vp.select.v8bf16(<8 x i1> %a, <8 x bfloat> %b, <8 x bfloat> %c, i32 %evl)
- ret <8 x bfloat> %v
-}
-
-declare <16 x bfloat> @llvm.vp.select.v16bf16(<16 x i1>, <16 x bfloat>, <16 x bfloat>, i32)
-
-define <16 x bfloat> @select_v16bf16(<16 x i1> %a, <16 x bfloat> %b, <16 x bfloat> %c, i32 zeroext %evl) {
-; CHECK-LABEL: select_v16bf16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
-; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0
-; CHECK-NEXT: ret
- %v = call <16 x bfloat> @llvm.vp.select.v16bf16(<16 x i1> %a, <16 x bfloat> %b, <16 x bfloat> %c, i32 %evl)
- ret <16 x bfloat> %v
-}
diff --git a/llvm/test/CodeGen/RISCV/rvv/select-fp.ll b/llvm/test/CodeGen/RISCV/rvv/select-fp.ll
index 2b9d847a9e87..f8581d8e21b3 100644
--- a/llvm/test/CodeGen/RISCV/rvv/select-fp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/select-fp.ll
@@ -1,11 +1,11 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v,+experimental-zfbfmin,+experimental-zvfbfmin -target-abi=ilp32d \
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \
; RUN: -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v,+experimental-zfbfmin,+experimental-zvfbfmin -target-abi=lp64d \
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \
; RUN: -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v,+experimental-zfbfmin,+experimental-zvfbfmin -target-abi=ilp32d \
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \
; RUN: -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v,+experimental-zfbfmin,+experimental-zvfbfmin -target-abi=lp64d \
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \
; RUN: -verify-machineinstrs < %s | FileCheck %s
define <vscale x 1 x half> @select_nxv1f16(i1 zeroext %c, <vscale x 1 x half> %a, <vscale x 1 x half> %b) {
@@ -427,183 +427,3 @@ define <vscale x 8 x double> @selectcc_nxv8f64(double %a, double %b, <vscale x 8
%v = select i1 %cmp, <vscale x 8 x double> %c, <vscale x 8 x double> %d
ret <vscale x 8 x double> %v
}
-
-define <vscale x 1 x bfloat> @select_nxv1bf16(i1 zeroext %c, <vscale x 1 x bfloat> %a, <vscale x 1 x bfloat> %b) {
-; CHECK-LABEL: select_nxv1bf16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, ma
-; CHECK-NEXT: vmv.v.x v10, a0
-; CHECK-NEXT: vmsne.vi v0, v10, 0
-; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
-; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0
-; CHECK-NEXT: ret
- %v = select i1 %c, <vscale x 1 x bfloat> %a, <vscale x 1 x bfloat> %b
- ret <vscale x 1 x bfloat> %v
-}
-
-define <vscale x 1 x bfloat> @selectcc_nxv1bf16(bfloat %a, bfloat %b, <vscale x 1 x bfloat> %c, <vscale x 1 x bfloat> %d) {
-; CHECK-LABEL: selectcc_nxv1bf16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: fcvt.s.bf16 fa5, fa1
-; CHECK-NEXT: fcvt.s.bf16 fa4, fa0
-; CHECK-NEXT: feq.s a0, fa4, fa5
-; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, ma
-; CHECK-NEXT: vmv.v.x v10, a0
-; CHECK-NEXT: vmsne.vi v0, v10, 0
-; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
-; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0
-; CHECK-NEXT: ret
- %cmp = fcmp oeq bfloat %a, %b
- %v = select i1 %cmp, <vscale x 1 x bfloat> %c, <vscale x 1 x bfloat> %d
- ret <vscale x 1 x bfloat> %v
-}
-
-define <vscale x 2 x bfloat> @select_nxv2bf16(i1 zeroext %c, <vscale x 2 x bfloat> %a, <vscale x 2 x bfloat> %b) {
-; CHECK-LABEL: select_nxv2bf16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma
-; CHECK-NEXT: vmv.v.x v10, a0
-; CHECK-NEXT: vmsne.vi v0, v10, 0
-; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0
-; CHECK-NEXT: ret
- %v = select i1 %c, <vscale x 2 x bfloat> %a, <vscale x 2 x bfloat> %b
- ret <vscale x 2 x bfloat> %v
-}
-
-define <vscale x 2 x bfloat> @selectcc_nxv2bf16(bfloat %a, bfloat %b, <vscale x 2 x bfloat> %c, <vscale x 2 x bfloat> %d) {
-; CHECK-LABEL: selectcc_nxv2bf16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: fcvt.s.bf16 fa5, fa1
-; CHECK-NEXT: fcvt.s.bf16 fa4, fa0
-; CHECK-NEXT: feq.s a0, fa4, fa5
-; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma
-; CHECK-NEXT: vmv.v.x v10, a0
-; CHECK-NEXT: vmsne.vi v0, v10, 0
-; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0
-; CHECK-NEXT: ret
- %cmp = fcmp oeq bfloat %a, %b
- %v = select i1 %cmp, <vscale x 2 x bfloat> %c, <vscale x 2 x bfloat> %d
- ret <vscale x 2 x bfloat> %v
-}
-
-define <vscale x 4 x bfloat> @select_nxv4bf16(i1 zeroext %c, <vscale x 4 x bfloat> %a, <vscale x 4 x bfloat> %b) {
-; CHECK-LABEL: select_nxv4bf16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
-; CHECK-NEXT: vmv.v.x v10, a0
-; CHECK-NEXT: vmsne.vi v0, v10, 0
-; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0
-; CHECK-NEXT: ret
- %v = select i1 %c, <vscale x 4 x bfloat> %a, <vscale x 4 x bfloat> %b
- ret <vscale x 4 x bfloat> %v
-}
-
-define <vscale x 4 x bfloat> @selectcc_nxv4bf16(bfloat %a, bfloat %b, <vscale x 4 x bfloat> %c, <vscale x 4 x bfloat> %d) {
-; CHECK-LABEL: selectcc_nxv4bf16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: fcvt.s.bf16 fa5, fa1
-; CHECK-NEXT: fcvt.s.bf16 fa4, fa0
-; CHECK-NEXT: feq.s a0, fa4, fa5
-; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
-; CHECK-NEXT: vmv.v.x v10, a0
-; CHECK-NEXT: vmsne.vi v0, v10, 0
-; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0
-; CHECK-NEXT: ret
- %cmp = fcmp oeq bfloat %a, %b
- %v = select i1 %cmp, <vscale x 4 x bfloat> %c, <vscale x 4 x bfloat> %d
- ret <vscale x 4 x bfloat> %v
-}
-
-define <vscale x 8 x bfloat> @select_nxv8bf16(i1 zeroext %c, <vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) {
-; CHECK-LABEL: select_nxv8bf16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma
-; CHECK-NEXT: vmv.v.x v12, a0
-; CHECK-NEXT: vmsne.vi v0, v12, 0
-; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0
-; CHECK-NEXT: ret
- %v = select i1 %c, <vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b
- ret <vscale x 8 x bfloat> %v
-}
-
-define <vscale x 8 x bfloat> @selectcc_nxv8bf16(bfloat %a, bfloat %b, <vscale x 8 x bfloat> %c, <vscale x 8 x bfloat> %d) {
-; CHECK-LABEL: selectcc_nxv8bf16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: fcvt.s.bf16 fa5, fa1
-; CHECK-NEXT: fcvt.s.bf16 fa4, fa0
-; CHECK-NEXT: feq.s a0, fa4, fa5
-; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma
-; CHECK-NEXT: vmv.v.x v12, a0
-; CHECK-NEXT: vmsne.vi v0, v12, 0
-; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0
-; CHECK-NEXT: ret
- %cmp = fcmp oeq bfloat %a, %b
- %v = select i1 %cmp, <vscale x 8 x bfloat> %c, <vscale x 8 x bfloat> %d
- ret <vscale x 8 x bfloat> %v
-}
-
-define <vscale x 16 x bfloat> @select_nxv16bf16(i1 zeroext %c, <vscale x 16 x bfloat> %a, <vscale x 16 x bfloat> %b) {
-; CHECK-LABEL: select_nxv16bf16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a1, zero, e8, m2, ta, ma
-; CHECK-NEXT: vmv.v.x v16, a0
-; CHECK-NEXT: vmsne.vi v0, v16, 0
-; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0
-; CHECK-NEXT: ret
- %v = select i1 %c, <vscale x 16 x bfloat> %a, <vscale x 16 x bfloat> %b
- ret <vscale x 16 x bfloat> %v
-}
-
-define <vscale x 16 x bfloat> @selectcc_nxv16bf16(bfloat %a, bfloat %b, <vscale x 16 x bfloat> %c, <vscale x 16 x bfloat> %d) {
-; CHECK-LABEL: selectcc_nxv16bf16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: fcvt.s.bf16 fa5, fa1
-; CHECK-NEXT: fcvt.s.bf16 fa4, fa0
-; CHECK-NEXT: feq.s a0, fa4, fa5
-; CHECK-NEXT: vsetvli a1, zero, e8, m2, ta, ma
-; CHECK-NEXT: vmv.v.x v16, a0
-; CHECK-NEXT: vmsne.vi v0, v16, 0
-; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0
-; CHECK-NEXT: ret
- %cmp = fcmp oeq bfloat %a, %b
- %v = select i1 %cmp, <vscale x 16 x bfloat> %c, <vscale x 16 x bfloat> %d
- ret <vscale x 16 x bfloat> %v
-}
-
-define <vscale x 32 x bfloat> @select_nxv32bf16(i1 zeroext %c, <vscale x 32 x bfloat> %a, <vscale x 32 x bfloat> %b) {
-; CHECK-LABEL: select_nxv32bf16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a1, zero, e8, m4, ta, ma
-; CHECK-NEXT: vmv.v.x v24, a0
-; CHECK-NEXT: vmsne.vi v0, v24, 0
-; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, ma
-; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0
-; CHECK-NEXT: ret
- %v = select i1 %c, <vscale x 32 x bfloat> %a, <vscale x 32 x bfloat> %b
- ret <vscale x 32 x bfloat> %v
-}
-
-define <vscale x 32 x bfloat> @selectcc_nxv32bf16(bfloat %a, bfloat %b, <vscale x 32 x bfloat> %c, <vscale x 32 x bfloat> %d) {
-; CHECK-LABEL: selectcc_nxv32bf16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: fcvt.s.bf16 fa5, fa1
-; CHECK-NEXT: fcvt.s.bf16 fa4, fa0
-; CHECK-NEXT: feq.s a0, fa4, fa5
-; CHECK-NEXT: vsetvli a1, zero, e8, m4, ta, ma
-; CHECK-NEXT: vmv.v.x v24, a0
-; CHECK-NEXT: vmsne.vi v0, v24, 0
-; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, ma
-; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0
-; CHECK-NEXT: ret
- %cmp = fcmp oeq bfloat %a, %b
- %v = select i1 %cmp, <vscale x 32 x bfloat> %c, <vscale x 32 x bfloat> %d
- ret <vscale x 32 x bfloat> %v
-}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll
index e33c795169fa..094e6c9cc754 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll
@@ -1,11 +1,11 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v,+m,+experimental-zfbfmin,+experimental-zvfbfmin -target-abi=ilp32d \
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v,+m -target-abi=ilp32d \
; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32ZVFH
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v,+m,+experimental-zfbfmin,+experimental-zvfbfmin -target-abi=lp64d \
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v,+m -target-abi=lp64d \
; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64ZVFH
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+v,+m,+experimental-zfbfmin,+experimental-zvfbfmin -target-abi=ilp32d \
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+v,+m -target-abi=ilp32d \
; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32ZVFHMIN
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+v,+m,+experimental-zfbfmin,+experimental-zvfbfmin -target-abi=lp64d \
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+v,+m -target-abi=lp64d \
; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64ZVFHMIN
declare <vscale x 1 x i1> @llvm.vp.merge.nxv1i1(<vscale x 1 x i1>, <vscale x 1 x i1>, <vscale x 1 x i1>, i32)
@@ -1547,303 +1547,3 @@ define <vscale x 8 x double> @vpmerge_vf_nxv8f64(double %a, <vscale x 8 x double
%v = call <vscale x 8 x double> @llvm.vp.merge.nxv8f64(<vscale x 8 x i1> %m, <vscale x 8 x double> %va, <vscale x 8 x double> %vb, i32 %evl)
ret <vscale x 8 x double> %v
}
-
-declare <vscale x 1 x bfloat> @llvm.vp.merge.nxv1bf16(<vscale x 1 x i1>, <vscale x 1 x bfloat>, <vscale x 1 x bfloat>, i32)
-
-define <vscale x 1 x bfloat> @vpmerge_vv_nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, <vscale x 1 x i1> %m, i32 zeroext %evl) {
-; CHECK-LABEL: vpmerge_vv_nxv1bf16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, ma
-; CHECK-NEXT: vmerge.vvm v9, v9, v8, v0
-; CHECK-NEXT: vmv1r.v v8, v9
-; CHECK-NEXT: ret
- %v = call <vscale x 1 x bfloat> @llvm.vp.merge.nxv1bf16(<vscale x 1 x i1> %m, <vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, i32 %evl)
- ret <vscale x 1 x bfloat> %v
-}
-
-define <vscale x 1 x bfloat> @vpmerge_vf_nxv1bf16(bfloat %a, <vscale x 1 x bfloat> %vb, <vscale x 1 x i1> %m, i32 zeroext %evl) {
-; RV32ZVFH-LABEL: vpmerge_vf_nxv1bf16:
-; RV32ZVFH: # %bb.0:
-; RV32ZVFH-NEXT: vsetvli zero, a0, e16, mf4, tu, ma
-; RV32ZVFH-NEXT: vfmerge.vfm v8, v8, fa0, v0
-; RV32ZVFH-NEXT: ret
-;
-; RV64ZVFH-LABEL: vpmerge_vf_nxv1bf16:
-; RV64ZVFH: # %bb.0:
-; RV64ZVFH-NEXT: vsetvli zero, a0, e16, mf4, tu, ma
-; RV64ZVFH-NEXT: vfmerge.vfm v8, v8, fa0, v0
-; RV64ZVFH-NEXT: ret
-;
-; RV32ZVFHMIN-LABEL: vpmerge_vf_nxv1bf16:
-; RV32ZVFHMIN: # %bb.0:
-; RV32ZVFHMIN-NEXT: fcvt.s.bf16 fa5, fa0
-; RV32ZVFHMIN-NEXT: vsetvli a1, zero, e32, mf2, ta, ma
-; RV32ZVFHMIN-NEXT: vfmv.v.f v9, fa5
-; RV32ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, tu, mu
-; RV32ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v9, v0.t
-; RV32ZVFHMIN-NEXT: ret
-;
-; RV64ZVFHMIN-LABEL: vpmerge_vf_nxv1bf16:
-; RV64ZVFHMIN: # %bb.0:
-; RV64ZVFHMIN-NEXT: fcvt.s.bf16 fa5, fa0
-; RV64ZVFHMIN-NEXT: vsetvli a1, zero, e32, mf2, ta, ma
-; RV64ZVFHMIN-NEXT: vfmv.v.f v9, fa5
-; RV64ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, tu, mu
-; RV64ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v9, v0.t
-; RV64ZVFHMIN-NEXT: ret
- %elt.head = insertelement <vscale x 1 x bfloat> poison, bfloat %a, i32 0
- %va = shufflevector <vscale x 1 x bfloat> %elt.head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer
- %v = call <vscale x 1 x bfloat> @llvm.vp.merge.nxv1bf16(<vscale x 1 x i1> %m, <vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, i32 %evl)
- ret <vscale x 1 x bfloat> %v
-}
-
-declare <vscale x 2 x bfloat> @llvm.vp.merge.nxv2bf16(<vscale x 2 x i1>, <vscale x 2 x bfloat>, <vscale x 2 x bfloat>, i32)
-
-define <vscale x 2 x bfloat> @vpmerge_vv_nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vb, <vscale x 2 x i1> %m, i32 zeroext %evl) {
-; CHECK-LABEL: vpmerge_vv_nxv2bf16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, ma
-; CHECK-NEXT: vmerge.vvm v9, v9, v8, v0
-; CHECK-NEXT: vmv1r.v v8, v9
-; CHECK-NEXT: ret
- %v = call <vscale x 2 x bfloat> @llvm.vp.merge.nxv2bf16(<vscale x 2 x i1> %m, <vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vb, i32 %evl)
- ret <vscale x 2 x bfloat> %v
-}
-
-define <vscale x 2 x bfloat> @vpmerge_vf_nxv2bf16(bfloat %a, <vscale x 2 x bfloat> %vb, <vscale x 2 x i1> %m, i32 zeroext %evl) {
-; RV32ZVFH-LABEL: vpmerge_vf_nxv2bf16:
-; RV32ZVFH: # %bb.0:
-; RV32ZVFH-NEXT: vsetvli zero, a0, e16, mf2, tu, ma
-; RV32ZVFH-NEXT: vfmerge.vfm v8, v8, fa0, v0
-; RV32ZVFH-NEXT: ret
-;
-; RV64ZVFH-LABEL: vpmerge_vf_nxv2bf16:
-; RV64ZVFH: # %bb.0:
-; RV64ZVFH-NEXT: vsetvli zero, a0, e16, mf2, tu, ma
-; RV64ZVFH-NEXT: vfmerge.vfm v8, v8, fa0, v0
-; RV64ZVFH-NEXT: ret
-;
-; RV32ZVFHMIN-LABEL: vpmerge_vf_nxv2bf16:
-; RV32ZVFHMIN: # %bb.0:
-; RV32ZVFHMIN-NEXT: fcvt.s.bf16 fa5, fa0
-; RV32ZVFHMIN-NEXT: vsetvli a1, zero, e32, m1, ta, ma
-; RV32ZVFHMIN-NEXT: vfmv.v.f v9, fa5
-; RV32ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, tu, mu
-; RV32ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v9, v0.t
-; RV32ZVFHMIN-NEXT: ret
-;
-; RV64ZVFHMIN-LABEL: vpmerge_vf_nxv2bf16:
-; RV64ZVFHMIN: # %bb.0:
-; RV64ZVFHMIN-NEXT: fcvt.s.bf16 fa5, fa0
-; RV64ZVFHMIN-NEXT: vsetvli a1, zero, e32, m1, ta, ma
-; RV64ZVFHMIN-NEXT: vfmv.v.f v9, fa5
-; RV64ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, tu, mu
-; RV64ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v9, v0.t
-; RV64ZVFHMIN-NEXT: ret
- %elt.head = insertelement <vscale x 2 x bfloat> poison, bfloat %a, i32 0
- %va = shufflevector <vscale x 2 x bfloat> %elt.head, <vscale x 2 x bfloat> poison, <vscale x 2 x i32> zeroinitializer
- %v = call <vscale x 2 x bfloat> @llvm.vp.merge.nxv2bf16(<vscale x 2 x i1> %m, <vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vb, i32 %evl)
- ret <vscale x 2 x bfloat> %v
-}
-
-declare <vscale x 4 x bfloat> @llvm.vp.merge.nxv4bf16(<vscale x 4 x i1>, <vscale x 4 x bfloat>, <vscale x 4 x bfloat>, i32)
-
-define <vscale x 4 x bfloat> @vpmerge_vv_nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %vb, <vscale x 4 x i1> %m, i32 zeroext %evl) {
-; CHECK-LABEL: vpmerge_vv_nxv4bf16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, ma
-; CHECK-NEXT: vmerge.vvm v9, v9, v8, v0
-; CHECK-NEXT: vmv1r.v v8, v9
-; CHECK-NEXT: ret
- %v = call <vscale x 4 x bfloat> @llvm.vp.merge.nxv4bf16(<vscale x 4 x i1> %m, <vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %vb, i32 %evl)
- ret <vscale x 4 x bfloat> %v
-}
-
-define <vscale x 4 x bfloat> @vpmerge_vf_nxv4bf16(bfloat %a, <vscale x 4 x bfloat> %vb, <vscale x 4 x i1> %m, i32 zeroext %evl) {
-; RV32ZVFH-LABEL: vpmerge_vf_nxv4bf16:
-; RV32ZVFH: # %bb.0:
-; RV32ZVFH-NEXT: vsetvli zero, a0, e16, m1, tu, ma
-; RV32ZVFH-NEXT: vfmerge.vfm v8, v8, fa0, v0
-; RV32ZVFH-NEXT: ret
-;
-; RV64ZVFH-LABEL: vpmerge_vf_nxv4bf16:
-; RV64ZVFH: # %bb.0:
-; RV64ZVFH-NEXT: vsetvli zero, a0, e16, m1, tu, ma
-; RV64ZVFH-NEXT: vfmerge.vfm v8, v8, fa0, v0
-; RV64ZVFH-NEXT: ret
-;
-; RV32ZVFHMIN-LABEL: vpmerge_vf_nxv4bf16:
-; RV32ZVFHMIN: # %bb.0:
-; RV32ZVFHMIN-NEXT: fcvt.s.bf16 fa5, fa0
-; RV32ZVFHMIN-NEXT: vsetvli a1, zero, e32, m2, ta, ma
-; RV32ZVFHMIN-NEXT: vfmv.v.f v10, fa5
-; RV32ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, tu, mu
-; RV32ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v10, v0.t
-; RV32ZVFHMIN-NEXT: ret
-;
-; RV64ZVFHMIN-LABEL: vpmerge_vf_nxv4bf16:
-; RV64ZVFHMIN: # %bb.0:
-; RV64ZVFHMIN-NEXT: fcvt.s.bf16 fa5, fa0
-; RV64ZVFHMIN-NEXT: vsetvli a1, zero, e32, m2, ta, ma
-; RV64ZVFHMIN-NEXT: vfmv.v.f v10, fa5
-; RV64ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, tu, mu
-; RV64ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v10, v0.t
-; RV64ZVFHMIN-NEXT: ret
- %elt.head = insertelement <vscale x 4 x bfloat> poison, bfloat %a, i32 0
- %va = shufflevector <vscale x 4 x bfloat> %elt.head, <vscale x 4 x bfloat> poison, <vscale x 4 x i32> zeroinitializer
- %v = call <vscale x 4 x bfloat> @llvm.vp.merge.nxv4bf16(<vscale x 4 x i1> %m, <vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %vb, i32 %evl)
- ret <vscale x 4 x bfloat> %v
-}
-
-declare <vscale x 8 x bfloat> @llvm.vp.merge.nxv8bf16(<vscale x 8 x i1>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, i32)
-
-define <vscale x 8 x bfloat> @vpmerge_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, <vscale x 8 x i1> %m, i32 zeroext %evl) {
-; CHECK-LABEL: vpmerge_vv_nxv8bf16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, ma
-; CHECK-NEXT: vmerge.vvm v10, v10, v8, v0
-; CHECK-NEXT: vmv2r.v v8, v10
-; CHECK-NEXT: ret
- %v = call <vscale x 8 x bfloat> @llvm.vp.merge.nxv8bf16(<vscale x 8 x i1> %m, <vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, i32 %evl)
- ret <vscale x 8 x bfloat> %v
-}
-
-define <vscale x 8 x bfloat> @vpmerge_vf_nxv8bf16(bfloat %a, <vscale x 8 x bfloat> %vb, <vscale x 8 x i1> %m, i32 zeroext %evl) {
-; RV32ZVFH-LABEL: vpmerge_vf_nxv8bf16:
-; RV32ZVFH: # %bb.0:
-; RV32ZVFH-NEXT: vsetvli zero, a0, e16, m2, tu, ma
-; RV32ZVFH-NEXT: vfmerge.vfm v8, v8, fa0, v0
-; RV32ZVFH-NEXT: ret
-;
-; RV64ZVFH-LABEL: vpmerge_vf_nxv8bf16:
-; RV64ZVFH: # %bb.0:
-; RV64ZVFH-NEXT: vsetvli zero, a0, e16, m2, tu, ma
-; RV64ZVFH-NEXT: vfmerge.vfm v8, v8, fa0, v0
-; RV64ZVFH-NEXT: ret
-;
-; RV32ZVFHMIN-LABEL: vpmerge_vf_nxv8bf16:
-; RV32ZVFHMIN: # %bb.0:
-; RV32ZVFHMIN-NEXT: fcvt.s.bf16 fa5, fa0
-; RV32ZVFHMIN-NEXT: vsetvli a1, zero, e32, m4, ta, ma
-; RV32ZVFHMIN-NEXT: vfmv.v.f v12, fa5
-; RV32ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, tu, mu
-; RV32ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v12, v0.t
-; RV32ZVFHMIN-NEXT: ret
-;
-; RV64ZVFHMIN-LABEL: vpmerge_vf_nxv8bf16:
-; RV64ZVFHMIN: # %bb.0:
-; RV64ZVFHMIN-NEXT: fcvt.s.bf16 fa5, fa0
-; RV64ZVFHMIN-NEXT: vsetvli a1, zero, e32, m4, ta, ma
-; RV64ZVFHMIN-NEXT: vfmv.v.f v12, fa5
-; RV64ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, tu, mu
-; RV64ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v12, v0.t
-; RV64ZVFHMIN-NEXT: ret
- %elt.head = insertelement <vscale x 8 x bfloat> poison, bfloat %a, i32 0
- %va = shufflevector <vscale x 8 x bfloat> %elt.head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
- %v = call <vscale x 8 x bfloat> @llvm.vp.merge.nxv8bf16(<vscale x 8 x i1> %m, <vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, i32 %evl)
- ret <vscale x 8 x bfloat> %v
-}
-
-declare <vscale x 16 x bfloat> @llvm.vp.merge.nxv16bf16(<vscale x 16 x i1>, <vscale x 16 x bfloat>, <vscale x 16 x bfloat>, i32)
-
-define <vscale x 16 x bfloat> @vpmerge_vv_nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %vb, <vscale x 16 x i1> %m, i32 zeroext %evl) {
-; CHECK-LABEL: vpmerge_vv_nxv16bf16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, ma
-; CHECK-NEXT: vmerge.vvm v12, v12, v8, v0
-; CHECK-NEXT: vmv4r.v v8, v12
-; CHECK-NEXT: ret
- %v = call <vscale x 16 x bfloat> @llvm.vp.merge.nxv16bf16(<vscale x 16 x i1> %m, <vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %vb, i32 %evl)
- ret <vscale x 16 x bfloat> %v
-}
-
-define <vscale x 16 x bfloat> @vpmerge_vf_nxv16bf16(bfloat %a, <vscale x 16 x bfloat> %vb, <vscale x 16 x i1> %m, i32 zeroext %evl) {
-; RV32ZVFH-LABEL: vpmerge_vf_nxv16bf16:
-; RV32ZVFH: # %bb.0:
-; RV32ZVFH-NEXT: vsetvli zero, a0, e16, m4, tu, ma
-; RV32ZVFH-NEXT: vfmerge.vfm v8, v8, fa0, v0
-; RV32ZVFH-NEXT: ret
-;
-; RV64ZVFH-LABEL: vpmerge_vf_nxv16bf16:
-; RV64ZVFH: # %bb.0:
-; RV64ZVFH-NEXT: vsetvli zero, a0, e16, m4, tu, ma
-; RV64ZVFH-NEXT: vfmerge.vfm v8, v8, fa0, v0
-; RV64ZVFH-NEXT: ret
-;
-; RV32ZVFHMIN-LABEL: vpmerge_vf_nxv16bf16:
-; RV32ZVFHMIN: # %bb.0:
-; RV32ZVFHMIN-NEXT: fcvt.s.bf16 fa5, fa0
-; RV32ZVFHMIN-NEXT: vsetvli a1, zero, e32, m8, ta, ma
-; RV32ZVFHMIN-NEXT: vfmv.v.f v16, fa5
-; RV32ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, tu, mu
-; RV32ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v16, v0.t
-; RV32ZVFHMIN-NEXT: ret
-;
-; RV64ZVFHMIN-LABEL: vpmerge_vf_nxv16bf16:
-; RV64ZVFHMIN: # %bb.0:
-; RV64ZVFHMIN-NEXT: fcvt.s.bf16 fa5, fa0
-; RV64ZVFHMIN-NEXT: vsetvli a1, zero, e32, m8, ta, ma
-; RV64ZVFHMIN-NEXT: vfmv.v.f v16, fa5
-; RV64ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, tu, mu
-; RV64ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v16, v0.t
-; RV64ZVFHMIN-NEXT: ret
- %elt.head = insertelement <vscale x 16 x bfloat> poison, bfloat %a, i32 0
- %va = shufflevector <vscale x 16 x bfloat> %elt.head, <vscale x 16 x bfloat> poison, <vscale x 16 x i32> zeroinitializer
- %v = call <vscale x 16 x bfloat> @llvm.vp.merge.nxv16bf16(<vscale x 16 x i1> %m, <vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %vb, i32 %evl)
- ret <vscale x 16 x bfloat> %v
-}
-
-declare <vscale x 32 x bfloat> @llvm.vp.merge.nxv32bf16(<vscale x 32 x i1>, <vscale x 32 x bfloat>, <vscale x 32 x bfloat>, i32)
-
-define <vscale x 32 x bfloat> @vpmerge_vv_nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %vb, <vscale x 32 x i1> %m, i32 zeroext %evl) {
-; CHECK-LABEL: vpmerge_vv_nxv32bf16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli zero, a0, e16, m8, tu, ma
-; CHECK-NEXT: vmerge.vvm v16, v16, v8, v0
-; CHECK-NEXT: vmv8r.v v8, v16
-; CHECK-NEXT: ret
- %v = call <vscale x 32 x bfloat> @llvm.vp.merge.nxv32bf16(<vscale x 32 x i1> %m, <vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %vb, i32 %evl)
- ret <vscale x 32 x bfloat> %v
-}
-
-define <vscale x 32 x bfloat> @vpmerge_vf_nxv32bf16(bfloat %a, <vscale x 32 x bfloat> %vb, <vscale x 32 x i1> %m, i32 zeroext %evl) {
-; RV32ZVFH-LABEL: vpmerge_vf_nxv32bf16:
-; RV32ZVFH: # %bb.0:
-; RV32ZVFH-NEXT: vsetvli zero, a0, e16, m8, tu, ma
-; RV32ZVFH-NEXT: vfmerge.vfm v8, v8, fa0, v0
-; RV32ZVFH-NEXT: ret
-;
-; RV64ZVFH-LABEL: vpmerge_vf_nxv32bf16:
-; RV64ZVFH: # %bb.0:
-; RV64ZVFH-NEXT: vsetvli zero, a0, e16, m8, tu, ma
-; RV64ZVFH-NEXT: vfmerge.vfm v8, v8, fa0, v0
-; RV64ZVFH-NEXT: ret
-;
-; RV32ZVFHMIN-LABEL: vpmerge_vf_nxv32bf16:
-; RV32ZVFHMIN: # %bb.0:
-; RV32ZVFHMIN-NEXT: fcvt.s.bf16 fa5, fa0
-; RV32ZVFHMIN-NEXT: vsetvli a1, zero, e32, m8, ta, ma
-; RV32ZVFHMIN-NEXT: vfmv.v.f v24, fa5
-; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; RV32ZVFHMIN-NEXT: vfncvtbf16.f.f.w v16, v24
-; RV32ZVFHMIN-NEXT: vmv.v.v v20, v16
-; RV32ZVFHMIN-NEXT: vsetvli zero, a0, e16, m8, tu, ma
-; RV32ZVFHMIN-NEXT: vmerge.vvm v8, v8, v16, v0
-; RV32ZVFHMIN-NEXT: ret
-;
-; RV64ZVFHMIN-LABEL: vpmerge_vf_nxv32bf16:
-; RV64ZVFHMIN: # %bb.0:
-; RV64ZVFHMIN-NEXT: fcvt.s.bf16 fa5, fa0
-; RV64ZVFHMIN-NEXT: vsetvli a1, zero, e32, m8, ta, ma
-; RV64ZVFHMIN-NEXT: vfmv.v.f v24, fa5
-; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; RV64ZVFHMIN-NEXT: vfncvtbf16.f.f.w v16, v24
-; RV64ZVFHMIN-NEXT: vmv.v.v v20, v16
-; RV64ZVFHMIN-NEXT: vsetvli zero, a0, e16, m8, tu, ma
-; RV64ZVFHMIN-NEXT: vmerge.vvm v8, v8, v16, v0
-; RV64ZVFHMIN-NEXT: ret
- %elt.head = insertelement <vscale x 32 x bfloat> poison, bfloat %a, i32 0
- %va = shufflevector <vscale x 32 x bfloat> %elt.head, <vscale x 32 x bfloat> poison, <vscale x 32 x i32> zeroinitializer
- %v = call <vscale x 32 x bfloat> @llvm.vp.merge.nxv32bf16(<vscale x 32 x i1> %m, <vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %vb, i32 %evl)
- ret <vscale x 32 x bfloat> %v
-}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vselect-fp.ll b/llvm/test/CodeGen/RISCV/rvv/vselect-fp.ll
index 82c2fe3273bd..53b8e4a78b75 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vselect-fp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vselect-fp.ll
@@ -1,11 +1,11 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+zvfh,+v,+experimental-zfbfmin,+experimental-zvfbfmin -target-abi=ilp32d \
+; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+zvfh,+v -target-abi=ilp32d \
; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-ZVFH
-; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+zvfh,+v,+experimental-zfbfmin,+experimental-zvfbfmin -target-abi=lp64d \
+; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+zvfh,+v -target-abi=lp64d \
; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-ZVFH
-; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+zvfhmin,+v,+experimental-zfbfmin,+experimental-zvfbfmin -target-abi=ilp32d \
+; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+zvfhmin,+v -target-abi=ilp32d \
; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-ZVFHMIN
-; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+zvfhmin,+v,+experimental-zfbfmin,+experimental-zvfbfmin -target-abi=lp64d \
+; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+zvfhmin,+v -target-abi=lp64d \
; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-ZVFHMIN
define <vscale x 1 x half> @vfmerge_vv_nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x half> %vb, <vscale x 1 x i1> %cond) {
@@ -512,219 +512,3 @@ define void @vselect_legalize_regression(<vscale x 16 x double> %a, <vscale x 16
store <vscale x 16 x double> %sel, ptr %out
ret void
}
-
-define <vscale x 1 x bfloat> @vfmerge_vv_nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, <vscale x 1 x i1> %cond) {
-; CHECK-LABEL: vfmerge_vv_nxv1bf16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
-; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0
-; CHECK-NEXT: ret
- %vc = select <vscale x 1 x i1> %cond, <vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb
- ret <vscale x 1 x bfloat> %vc
-}
-
-define <vscale x 1 x bfloat> @vfmerge_fv_nxv1bf16(<vscale x 1 x bfloat> %va, bfloat %b, <vscale x 1 x i1> %cond) {
-; CHECK-ZVFH-LABEL: vfmerge_fv_nxv1bf16:
-; CHECK-ZVFH: # %bb.0:
-; CHECK-ZVFH-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
-; CHECK-ZVFH-NEXT: vfmerge.vfm v8, v8, fa0, v0
-; CHECK-ZVFH-NEXT: ret
-;
-; CHECK-ZVFHMIN-LABEL: vfmerge_fv_nxv1bf16:
-; CHECK-ZVFHMIN: # %bb.0:
-; CHECK-ZVFHMIN-NEXT: fcvt.s.bf16 fa5, fa0
-; CHECK-ZVFHMIN-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
-; CHECK-ZVFHMIN-NEXT: vfmv.v.f v9, fa5
-; CHECK-ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, mu
-; CHECK-ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v9, v0.t
-; CHECK-ZVFHMIN-NEXT: ret
- %head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0
- %splat = shufflevector <vscale x 1 x bfloat> %head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer
- %vc = select <vscale x 1 x i1> %cond, <vscale x 1 x bfloat> %splat, <vscale x 1 x bfloat> %va
- ret <vscale x 1 x bfloat> %vc
-}
-
-define <vscale x 2 x bfloat> @vfmerge_vv_nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vb, <vscale x 2 x i1> %cond) {
-; CHECK-LABEL: vfmerge_vv_nxv2bf16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
-; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0
-; CHECK-NEXT: ret
- %vc = select <vscale x 2 x i1> %cond, <vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vb
- ret <vscale x 2 x bfloat> %vc
-}
-
-define <vscale x 2 x bfloat> @vfmerge_fv_nxv2bf16(<vscale x 2 x bfloat> %va, bfloat %b, <vscale x 2 x i1> %cond) {
-; CHECK-ZVFH-LABEL: vfmerge_fv_nxv2bf16:
-; CHECK-ZVFH: # %bb.0:
-; CHECK-ZVFH-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
-; CHECK-ZVFH-NEXT: vfmerge.vfm v8, v8, fa0, v0
-; CHECK-ZVFH-NEXT: ret
-;
-; CHECK-ZVFHMIN-LABEL: vfmerge_fv_nxv2bf16:
-; CHECK-ZVFHMIN: # %bb.0:
-; CHECK-ZVFHMIN-NEXT: fcvt.s.bf16 fa5, fa0
-; CHECK-ZVFHMIN-NEXT: vsetvli a0, zero, e32, m1, ta, ma
-; CHECK-ZVFHMIN-NEXT: vfmv.v.f v9, fa5
-; CHECK-ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, mu
-; CHECK-ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v9, v0.t
-; CHECK-ZVFHMIN-NEXT: ret
- %head = insertelement <vscale x 2 x bfloat> poison, bfloat %b, i32 0
- %splat = shufflevector <vscale x 2 x bfloat> %head, <vscale x 2 x bfloat> poison, <vscale x 2 x i32> zeroinitializer
- %vc = select <vscale x 2 x i1> %cond, <vscale x 2 x bfloat> %splat, <vscale x 2 x bfloat> %va
- ret <vscale x 2 x bfloat> %vc
-}
-
-define <vscale x 4 x bfloat> @vfmerge_vv_nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %vb, <vscale x 4 x i1> %cond) {
-; CHECK-LABEL: vfmerge_vv_nxv4bf16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
-; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0
-; CHECK-NEXT: ret
- %vc = select <vscale x 4 x i1> %cond, <vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %vb
- ret <vscale x 4 x bfloat> %vc
-}
-
-define <vscale x 4 x bfloat> @vfmerge_fv_nxv4bf16(<vscale x 4 x bfloat> %va, bfloat %b, <vscale x 4 x i1> %cond) {
-; CHECK-ZVFH-LABEL: vfmerge_fv_nxv4bf16:
-; CHECK-ZVFH: # %bb.0:
-; CHECK-ZVFH-NEXT: vsetvli a0, zero, e16, m1, ta, ma
-; CHECK-ZVFH-NEXT: vfmerge.vfm v8, v8, fa0, v0
-; CHECK-ZVFH-NEXT: ret
-;
-; CHECK-ZVFHMIN-LABEL: vfmerge_fv_nxv4bf16:
-; CHECK-ZVFHMIN: # %bb.0:
-; CHECK-ZVFHMIN-NEXT: fcvt.s.bf16 fa5, fa0
-; CHECK-ZVFHMIN-NEXT: vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-ZVFHMIN-NEXT: vfmv.v.f v10, fa5
-; CHECK-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, mu
-; CHECK-ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v10, v0.t
-; CHECK-ZVFHMIN-NEXT: ret
- %head = insertelement <vscale x 4 x bfloat> poison, bfloat %b, i32 0
- %splat = shufflevector <vscale x 4 x bfloat> %head, <vscale x 4 x bfloat> poison, <vscale x 4 x i32> zeroinitializer
- %vc = select <vscale x 4 x i1> %cond, <vscale x 4 x bfloat> %splat, <vscale x 4 x bfloat> %va
- ret <vscale x 4 x bfloat> %vc
-}
-
-define <vscale x 8 x bfloat> @vfmerge_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, <vscale x 8 x i1> %cond) {
-; CHECK-LABEL: vfmerge_vv_nxv8bf16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
-; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0
-; CHECK-NEXT: ret
- %vc = select <vscale x 8 x i1> %cond, <vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb
- ret <vscale x 8 x bfloat> %vc
-}
-
-define <vscale x 8 x bfloat> @vfmerge_fv_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b, <vscale x 8 x i1> %cond) {
-; CHECK-ZVFH-LABEL: vfmerge_fv_nxv8bf16:
-; CHECK-ZVFH: # %bb.0:
-; CHECK-ZVFH-NEXT: vsetvli a0, zero, e16, m2, ta, ma
-; CHECK-ZVFH-NEXT: vfmerge.vfm v8, v8, fa0, v0
-; CHECK-ZVFH-NEXT: ret
-;
-; CHECK-ZVFHMIN-LABEL: vfmerge_fv_nxv8bf16:
-; CHECK-ZVFHMIN: # %bb.0:
-; CHECK-ZVFHMIN-NEXT: fcvt.s.bf16 fa5, fa0
-; CHECK-ZVFHMIN-NEXT: vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-ZVFHMIN-NEXT: vfmv.v.f v12, fa5
-; CHECK-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, mu
-; CHECK-ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v12, v0.t
-; CHECK-ZVFHMIN-NEXT: ret
- %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
- %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
- %vc = select <vscale x 8 x i1> %cond, <vscale x 8 x bfloat> %splat, <vscale x 8 x bfloat> %va
- ret <vscale x 8 x bfloat> %vc
-}
-
-define <vscale x 8 x bfloat> @vfmerge_zv_nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x i1> %cond) {
-; CHECK-LABEL: vfmerge_zv_nxv8bf16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
-; CHECK-NEXT: vmerge.vim v8, v8, 0, v0
-; CHECK-NEXT: ret
- %vc = select <vscale x 8 x i1> %cond, <vscale x 8 x bfloat> splat (bfloat zeroinitializer), <vscale x 8 x bfloat> %va
- ret <vscale x 8 x bfloat> %vc
-}
-
-define <vscale x 8 x bfloat> @vmerge_truelhs_nxv8bf16_0(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb) {
-; CHECK-LABEL: vmerge_truelhs_nxv8bf16_0:
-; CHECK: # %bb.0:
-; CHECK-NEXT: ret
- %vc = select <vscale x 8 x i1> splat (i1 1), <vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb
- ret <vscale x 8 x bfloat> %vc
-}
-
-define <vscale x 8 x bfloat> @vmerge_falselhs_nxv8bf16_0(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb) {
-; CHECK-LABEL: vmerge_falselhs_nxv8bf16_0:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vmv2r.v v8, v10
-; CHECK-NEXT: ret
- %vc = select <vscale x 8 x i1> zeroinitializer, <vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb
- ret <vscale x 8 x bfloat> %vc
-}
-
-define <vscale x 16 x bfloat> @vfmerge_vv_nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %vb, <vscale x 16 x i1> %cond) {
-; CHECK-LABEL: vfmerge_vv_nxv16bf16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
-; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0
-; CHECK-NEXT: ret
- %vc = select <vscale x 16 x i1> %cond, <vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %vb
- ret <vscale x 16 x bfloat> %vc
-}
-
-define <vscale x 16 x bfloat> @vfmerge_fv_nxv16bf16(<vscale x 16 x bfloat> %va, bfloat %b, <vscale x 16 x i1> %cond) {
-; CHECK-ZVFH-LABEL: vfmerge_fv_nxv16bf16:
-; CHECK-ZVFH: # %bb.0:
-; CHECK-ZVFH-NEXT: vsetvli a0, zero, e16, m4, ta, ma
-; CHECK-ZVFH-NEXT: vfmerge.vfm v8, v8, fa0, v0
-; CHECK-ZVFH-NEXT: ret
-;
-; CHECK-ZVFHMIN-LABEL: vfmerge_fv_nxv16bf16:
-; CHECK-ZVFHMIN: # %bb.0:
-; CHECK-ZVFHMIN-NEXT: fcvt.s.bf16 fa5, fa0
-; CHECK-ZVFHMIN-NEXT: vsetvli a0, zero, e32, m8, ta, ma
-; CHECK-ZVFHMIN-NEXT: vfmv.v.f v16, fa5
-; CHECK-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, mu
-; CHECK-ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v16, v0.t
-; CHECK-ZVFHMIN-NEXT: ret
- %head = insertelement <vscale x 16 x bfloat> poison, bfloat %b, i32 0
- %splat = shufflevector <vscale x 16 x bfloat> %head, <vscale x 16 x bfloat> poison, <vscale x 16 x i32> zeroinitializer
- %vc = select <vscale x 16 x i1> %cond, <vscale x 16 x bfloat> %splat, <vscale x 16 x bfloat> %va
- ret <vscale x 16 x bfloat> %vc
-}
-
-define <vscale x 32 x bfloat> @vfmerge_vv_nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %vb, <vscale x 32 x i1> %cond) {
-; CHECK-LABEL: vfmerge_vv_nxv32bf16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma
-; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0
-; CHECK-NEXT: ret
- %vc = select <vscale x 32 x i1> %cond, <vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %vb
- ret <vscale x 32 x bfloat> %vc
-}
-
-define <vscale x 32 x bfloat> @vfmerge_fv_nxv32bf16(<vscale x 32 x bfloat> %va, bfloat %b, <vscale x 32 x i1> %cond) {
-; CHECK-ZVFH-LABEL: vfmerge_fv_nxv32bf16:
-; CHECK-ZVFH: # %bb.0:
-; CHECK-ZVFH-NEXT: vsetvli a0, zero, e16, m8, ta, ma
-; CHECK-ZVFH-NEXT: vfmerge.vfm v8, v8, fa0, v0
-; CHECK-ZVFH-NEXT: ret
-;
-; CHECK-ZVFHMIN-LABEL: vfmerge_fv_nxv32bf16:
-; CHECK-ZVFHMIN: # %bb.0:
-; CHECK-ZVFHMIN-NEXT: fcvt.s.bf16 fa5, fa0
-; CHECK-ZVFHMIN-NEXT: vsetvli a0, zero, e32, m8, ta, ma
-; CHECK-ZVFHMIN-NEXT: vfmv.v.f v24, fa5
-; CHECK-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-ZVFHMIN-NEXT: vfncvtbf16.f.f.w v16, v24
-; CHECK-ZVFHMIN-NEXT: vmv.v.v v20, v16
-; CHECK-ZVFHMIN-NEXT: vsetvli a0, zero, e16, m8, ta, ma
-; CHECK-ZVFHMIN-NEXT: vmerge.vvm v8, v8, v16, v0
-; CHECK-ZVFHMIN-NEXT: ret
- %head = insertelement <vscale x 32 x bfloat> poison, bfloat %b, i32 0
- %splat = shufflevector <vscale x 32 x bfloat> %head, <vscale x 32 x bfloat> poison, <vscale x 32 x i32> zeroinitializer
- %vc = select <vscale x 32 x i1> %cond, <vscale x 32 x bfloat> %splat, <vscale x 32 x bfloat> %va
- ret <vscale x 32 x bfloat> %vc
-}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vselect-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vselect-vp.ll
index d1049e14fa29..ee0617c93148 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vselect-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vselect-vp.ll
@@ -1,11 +1,11 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d,+m,+zfh,+zvfh,+v,+experimental-zfbfmin,+experimental-zvfbfmin -target-abi=ilp32d \
+; RUN: llc -mtriple=riscv32 -mattr=+d,+m,+zfh,+zvfh,+v -target-abi=ilp32d \
; RUN: -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv64 -mattr=+d,+m,+zfh,+zvfh,+v,+experimental-zfbfmin,+experimental-zvfbfmin -target-abi=lp64d \
+; RUN: llc -mtriple=riscv64 -mattr=+d,+m,+zfh,+zvfh,+v -target-abi=lp64d \
; RUN: -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv32 -mattr=+d,+m,+zfh,+zvfhmin,+v,+experimental-zfbfmin,+experimental-zvfbfmin -target-abi=ilp32d \
+; RUN: llc -mtriple=riscv32 -mattr=+d,+m,+zfh,+zvfhmin,+v -target-abi=ilp32d \
; RUN: -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv64 -mattr=+d,+m,+zfh,+zvfhmin,+v,+experimental-zfbfmin,+experimental-zvfbfmin -target-abi=lp64d \
+; RUN: llc -mtriple=riscv64 -mattr=+d,+m,+zfh,+zvfhmin,+v -target-abi=lp64d \
; RUN: -verify-machineinstrs < %s | FileCheck %s
declare <vscale x 1 x i1> @llvm.vp.select.nxv1i1(<vscale x 1 x i1>, <vscale x 1 x i1>, <vscale x 1 x i1>, i32)
@@ -922,75 +922,3 @@ define <vscale x 2 x i1> @select_unknown_T_T(<vscale x 2 x i1> %x, <vscale x 2 x
%a = call <vscale x 2 x i1> @llvm.vp.select.nxv2i1(<vscale x 2 x i1> %x, <vscale x 2 x i1> %y, <vscale x 2 x i1> %y, i32 %evl)
ret <vscale x 2 x i1> %a
}
-
-declare <vscale x 1 x bfloat> @llvm.vp.select.nxv1bf16(<vscale x 1 x i1>, <vscale x 1 x bfloat>, <vscale x 1 x bfloat>, i32)
-
-define <vscale x 1 x bfloat> @select_nxv1bf16(<vscale x 1 x i1> %a, <vscale x 1 x bfloat> %b, <vscale x 1 x bfloat> %c, i32 zeroext %evl) {
-; CHECK-LABEL: select_nxv1bf16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
-; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0
-; CHECK-NEXT: ret
- %v = call <vscale x 1 x bfloat> @llvm.vp.select.nxv1bf16(<vscale x 1 x i1> %a, <vscale x 1 x bfloat> %b, <vscale x 1 x bfloat> %c, i32 %evl)
- ret <vscale x 1 x bfloat> %v
-}
-
-declare <vscale x 2 x bfloat> @llvm.vp.select.nxv2bf16(<vscale x 2 x i1>, <vscale x 2 x bfloat>, <vscale x 2 x bfloat>, i32)
-
-define <vscale x 2 x bfloat> @select_nxv2bf16(<vscale x 2 x i1> %a, <vscale x 2 x bfloat> %b, <vscale x 2 x bfloat> %c, i32 zeroext %evl) {
-; CHECK-LABEL: select_nxv2bf16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
-; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0
-; CHECK-NEXT: ret
- %v = call <vscale x 2 x bfloat> @llvm.vp.select.nxv2bf16(<vscale x 2 x i1> %a, <vscale x 2 x bfloat> %b, <vscale x 2 x bfloat> %c, i32 %evl)
- ret <vscale x 2 x bfloat> %v
-}
-
-declare <vscale x 4 x bfloat> @llvm.vp.select.nxv4bf16(<vscale x 4 x i1>, <vscale x 4 x bfloat>, <vscale x 4 x bfloat>, i32)
-
-define <vscale x 4 x bfloat> @select_nxv4bf16(<vscale x 4 x i1> %a, <vscale x 4 x bfloat> %b, <vscale x 4 x bfloat> %c, i32 zeroext %evl) {
-; CHECK-LABEL: select_nxv4bf16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
-; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0
-; CHECK-NEXT: ret
- %v = call <vscale x 4 x bfloat> @llvm.vp.select.nxv4bf16(<vscale x 4 x i1> %a, <vscale x 4 x bfloat> %b, <vscale x 4 x bfloat> %c, i32 %evl)
- ret <vscale x 4 x bfloat> %v
-}
-
-declare <vscale x 8 x bfloat> @llvm.vp.select.nxv8bf16(<vscale x 8 x i1>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, i32)
-
-define <vscale x 8 x bfloat> @select_nxv8bf16(<vscale x 8 x i1> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i32 zeroext %evl) {
-; CHECK-LABEL: select_nxv8bf16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
-; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0
-; CHECK-NEXT: ret
- %v = call <vscale x 8 x bfloat> @llvm.vp.select.nxv8bf16(<vscale x 8 x i1> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i32 %evl)
- ret <vscale x 8 x bfloat> %v
-}
-
-declare <vscale x 16 x bfloat> @llvm.vp.select.nxv16bf16(<vscale x 16 x i1>, <vscale x 16 x bfloat>, <vscale x 16 x bfloat>, i32)
-
-define <vscale x 16 x bfloat> @select_nxv16bf16(<vscale x 16 x i1> %a, <vscale x 16 x bfloat> %b, <vscale x 16 x bfloat> %c, i32 zeroext %evl) {
-; CHECK-LABEL: select_nxv16bf16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0
-; CHECK-NEXT: ret
- %v = call <vscale x 16 x bfloat> @llvm.vp.select.nxv16bf16(<vscale x 16 x i1> %a, <vscale x 16 x bfloat> %b, <vscale x 16 x bfloat> %c, i32 %evl)
- ret <vscale x 16 x bfloat> %v
-}
-
-declare <vscale x 32 x bfloat> @llvm.vp.select.nxv32bf16(<vscale x 32 x i1>, <vscale x 32 x bfloat>, <vscale x 32 x bfloat>, i32)
-
-define <vscale x 32 x bfloat> @select_nxv32bf16(<vscale x 32 x i1> %a, <vscale x 32 x bfloat> %b, <vscale x 32 x bfloat> %c, i32 zeroext %evl) {
-; CHECK-LABEL: select_nxv32bf16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma
-; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0
-; CHECK-NEXT: ret
- %v = call <vscale x 32 x bfloat> @llvm.vp.select.nxv32bf16(<vscale x 32 x i1> %a, <vscale x 32 x bfloat> %b, <vscale x 32 x bfloat> %c, i32 %evl)
- ret <vscale x 32 x bfloat> %v
-}
diff --git a/llvm/test/CodeGen/SPIRV/event-wait-ptr-type.ll b/llvm/test/CodeGen/SPIRV/event-wait-ptr-type.ll
index d6fb70bb59a7..ec9afc789944 100644
--- a/llvm/test/CodeGen/SPIRV/event-wait-ptr-type.ll
+++ b/llvm/test/CodeGen/SPIRV/event-wait-ptr-type.ll
@@ -4,16 +4,16 @@
; RUN: llc -O0 -mtriple=spirv32-unknown-unknown %s -o - | FileCheck %s
; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv32-unknown-unknown %s -o - -filetype=obj | spirv-val %}
-; CHECK: %[[#EventTy:]] = OpTypeEvent
-; CHECK: %[[#StructEventTy:]] = OpTypeStruct %[[#EventTy]]
-; CHECK: %[[#GenPtrStructEventTy:]] = OpTypePointer Generic %[[#StructEventTy]]
-; CHECK: %[[#FunPtrStructEventTy:]] = OpTypePointer Function %[[#StructEventTy]]
-; CHECK: %[[#GenPtrEventTy:]] = OpTypePointer Generic %[[#EventTy:]]
+; CHECK-DAG: %[[#EventTy:]] = OpTypeEvent
+; CHECK-DAG: %[[#StructEventTy:]] = OpTypeStruct %[[#EventTy]]
+; CHECK-DAG: %[[#FunPtrStructEventTy:]] = OpTypePointer Function %[[#StructEventTy]]
+; CHECK-DAG: %[[#GenPtrEventTy:]] = OpTypePointer Generic %[[#EventTy]]
+; CHECK-DAG: %[[#FunPtrEventTy:]] = OpTypePointer Function %[[#EventTy]]
; CHECK: OpFunction
; CHECK: %[[#Var:]] = OpVariable %[[#FunPtrStructEventTy]] Function
-; CHECK-NEXT: %[[#AddrspacecastVar:]] = OpPtrCastToGeneric %[[#GenPtrStructEventTy]] %[[#Var]]
-; CHECK-NEXT: %[[#BitcastVar:]] = OpBitcast %[[#GenPtrEventTy]] %[[#AddrspacecastVar]]
-; CHECK-NEXT: OpGroupWaitEvents %[[#]] %[[#]] %[[#BitcastVar]]
+; CHECK-NEXT: %[[#FunEvent:]] = OpBitcast %[[#FunPtrEventTy]] %[[#Var]]
+; CHECK-NEXT: %[[#GenEvent:]] = OpPtrCastToGeneric %[[#GenPtrEventTy]] %[[#FunEvent]]
+; CHECK-NEXT: OpGroupWaitEvents %[[#]] %[[#]] %[[#GenEvent]]
%"class.sycl::_V1::device_event" = type { target("spirv.Event") }
diff --git a/llvm/test/CodeGen/SPIRV/passes/SPIRVEmitIntrinsics-no-duplicate-spv_assign_type.ll b/llvm/test/CodeGen/SPIRV/passes/SPIRVEmitIntrinsics-no-duplicate-spv_assign_type.ll
index 7056b9cb1230..9db4f26a27d4 100644
--- a/llvm/test/CodeGen/SPIRV/passes/SPIRVEmitIntrinsics-no-duplicate-spv_assign_type.ll
+++ b/llvm/test/CodeGen/SPIRV/passes/SPIRVEmitIntrinsics-no-duplicate-spv_assign_type.ll
@@ -3,9 +3,9 @@
; CHECK: *** IR Dump After SPIRV emit intrinsics (emit-intrinsics) ***
define spir_kernel void @test(ptr addrspace(1) %srcimg) {
-; CHECK: call void @llvm.spv.assign.type.p1(ptr addrspace(1) %srcimg, metadata target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 0) undef)
+; CHECK: call void @llvm.spv.assign.type.p1(ptr addrspace(1) %srcimg, metadata target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 0) poison)
%call1 = call spir_func <2 x i32> @_Z13get_image_dim14ocl_image2d_ro(ptr addrspace(1) %srcimg)
-; CHECK-NOT: call void @llvm.spv.assign.type.p1(ptr addrspace(1) %srcimg, metadata target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 0) undef)
+; CHECK-NOT: call void @llvm.spv.assign.type.p1(ptr addrspace(1) %srcimg, metadata target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 0) poison)
%call2 = call spir_func <2 x i32> @_Z13get_image_dim14ocl_image2d_ro(ptr addrspace(1) %srcimg)
ret void
; CHECK: }
diff --git a/llvm/test/CodeGen/SPIRV/transcoding/OpGroupAsyncCopy-strided.ll b/llvm/test/CodeGen/SPIRV/transcoding/OpGroupAsyncCopy-strided.ll
new file mode 100644
index 000000000000..96d6016083f0
--- /dev/null
+++ b/llvm/test/CodeGen/SPIRV/transcoding/OpGroupAsyncCopy-strided.ll
@@ -0,0 +1,36 @@
+; RUN: llc -O0 -mtriple=spirv64-unknown-unknown %s -o - | FileCheck %s --check-prefix=CHECK-SPIRV
+; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown %s -o - -filetype=obj | spirv-val %}
+
+; RUN: llc -O0 -mtriple=spirv32-unknown-unknown %s -o - | FileCheck %s --check-prefix=CHECK-SPIRV
+; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv32-unknown-unknown %s -o - -filetype=obj | spirv-val %}
+
+; CHECK-SPIRV-DAG: %[[#LongTy:]] = OpTypeInt 64 0
+; CHECK-SPIRV-DAG: %[[#IntTy:]] = OpTypeInt 32 0
+; CHECK-SPIRV-DAG: %[[#EventTy:]] = OpTypeEvent
+; CHECK-SPIRV-DAG: %[[#Scope:]] = OpConstant %[[#IntTy]] 2
+; CHECK-SPIRV-DAG: %[[#Num:]] = OpConstant %[[#LongTy]] 123
+; CHECK-SPIRV-DAG: %[[#Null:]] = OpConstantNull
+; CHECK-SPIRV-DAG: %[[#Stride:]] = OpConstant %[[#LongTy]] 1
+; CHECK-SPIRV-DAG: %[[#GenPtrEventTy:]] = OpTypePointer Generic %[[#EventTy]]
+; CHECK-SPIRV-DAG: %[[#FunPtrEventTy:]] = OpTypePointer Function %[[#EventTy]]
+; CHECK-SPIRV: OpFunction
+; CHECK-SPIRV: %[[#Var:]] = OpVariable %[[#]] Function
+; CHECK-SPIRV: %[[#ResEvent:]] = OpGroupAsyncCopy %[[#EventTy]] %[[#Scope]] %[[#Null]] %[[#Null]] %[[#Num]] %[[#Stride]] %[[#Null]]
+; CHECK-SPIRV: %[[#VarPtrEvent:]] = OpBitcast %[[#FunPtrEventTy]] %[[#Var]]
+; CHECK-SPIRV: OpStore %[[#VarPtrEvent]] %[[#ResEvent]]
+; CHECK-SPIRV: %[[#VarPtrEvent2:]] = OpBitcast %[[#FunPtrEventTy]] %[[#Var]]
+; CHECK-SPIRV: %[[#PtrEventGen:]] = OpPtrCastToGeneric %[[#]] %[[#VarPtrEvent2]]
+; CHECK-SPIRV: OpGroupWaitEvents %[[#Scope]] %[[#Num]] %[[#PtrEventGen]]
+; CHECK-SPIRV: OpFunctionEnd
+
+define spir_kernel void @foo() {
+ %event = alloca ptr, align 8
+ %call = call spir_func ptr @_Z29async_work_group_strided_copyPU3AS3hPU3AS1Khmm9ocl_event(ptr null, ptr null, i64 123, i64 1, ptr null)
+ store ptr %call, ptr %event, align 8
+ %event.ascast = addrspacecast ptr %event to ptr addrspace(4)
+ call spir_func void @_Z17wait_group_eventsiPU3AS49ocl_event(i64 123, ptr addrspace(4) %event.ascast)
+ ret void
+}
+
+declare spir_func ptr @_Z29async_work_group_strided_copyPU3AS3hPU3AS1Khmm9ocl_event(ptr, ptr, i64, i64, ptr)
+declare spir_func void @_Z17wait_group_eventsiPU3AS49ocl_event(i64, ptr addrspace(4))
diff --git a/llvm/test/CodeGen/SPIRV/transcoding/check_ro_qualifier.ll b/llvm/test/CodeGen/SPIRV/transcoding/check_ro_qualifier.ll
index 824ca1b2d692..6f61aba23a46 100644
--- a/llvm/test/CodeGen/SPIRV/transcoding/check_ro_qualifier.ll
+++ b/llvm/test/CodeGen/SPIRV/transcoding/check_ro_qualifier.ll
@@ -1,5 +1,5 @@
; RUN: llc -O0 -mtriple=spirv64-unknown-unknown %s -o - | FileCheck %s --check-prefix=CHECK-SPIRV
-; TODO: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown %s -o - -filetype=obj | spirv-val %}
+; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown %s -o - -filetype=obj | spirv-val %}
; CHECK-SPIRV: %[[#IMAGE_TYPE:]] = OpTypeImage
; CHECK-SPIRV: %[[#IMAGE_ARG:]] = OpFunctionParameter %[[#IMAGE_TYPE]]
diff --git a/llvm/test/CodeGen/Thumb2/mve-tailpred-vptblock.ll b/llvm/test/CodeGen/Thumb2/mve-tailpred-vptblock.ll
index f9b3757bb6d2..6392452e06cd 100644
--- a/llvm/test/CodeGen/Thumb2/mve-tailpred-vptblock.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-tailpred-vptblock.ll
@@ -20,50 +20,35 @@ define void @convert_vptblock(ptr %pchTarget, i16 signext %iTargetStride, ptr %p
; CHECK-NEXT: mov.w r8, #0
; CHECK-NEXT: ldrd r4, r5, [sp, #88]
; CHECK-NEXT: mov r7, r0
-; CHECK-NEXT: cmp.w r10, #8
-; CHECK-NEXT: mov.w r0, #1
-; CHECK-NEXT: mov r3, r10
; CHECK-NEXT: mov.w r11, #0
-; CHECK-NEXT: it ge
-; CHECK-NEXT: movge r3, #8
; CHECK-NEXT: vidup.u16 q0, r8, #4
-; CHECK-NEXT: sub.w r3, r10, r3
; CHECK-NEXT: vmov.i32 q1, #0x0
-; CHECK-NEXT: adds r3, #7
; CHECK-NEXT: vmov.i16 q2, #0x100
; CHECK-NEXT: vmov.i16 q3, #0xff
-; CHECK-NEXT: add.w r9, r0, r3, lsr #3
; CHECK-NEXT: .LBB0_2: @ %for.body
; CHECK-NEXT: @ =>This Loop Header: Depth=1
; CHECK-NEXT: @ Child Loop BB0_3 Depth 2
-; CHECK-NEXT: mov r3, r10
; CHECK-NEXT: vmov q4, q0
; CHECK-NEXT: mov r6, r8
; CHECK-NEXT: mov r0, r7
-; CHECK-NEXT: dls lr, r9
+; CHECK-NEXT: dlstp.16 lr, r10
; CHECK-NEXT: .LBB0_3: @ %do.body
; CHECK-NEXT: @ Parent Loop BB0_2 Depth=1
; CHECK-NEXT: @ => This Inner Loop Header: Depth=2
-; CHECK-NEXT: vctp.16 r3
-; CHECK-NEXT: vpst
-; CHECK-NEXT: vldrbt.u16 q5, [r2, q4]
+; CHECK-NEXT: vldrb.u16 q5, [r2, q4]
; CHECK-NEXT: vmul.i16 q4, q5, r5
; CHECK-NEXT: vshr.u16 q4, q4, #8
; CHECK-NEXT: vsub.i16 q5, q2, q4
; CHECK-NEXT: vpt.i16 eq, q4, q3
; CHECK-NEXT: vmovt q5, q1
-; CHECK-NEXT: vctp.16 r3
-; CHECK-NEXT: vpst
-; CHECK-NEXT: vldrbt.u16 q6, [r0]
+; CHECK-NEXT: vldrb.u16 q6, [r0]
; CHECK-NEXT: vsub.i16 q4, q2, q5
-; CHECK-NEXT: subs r3, #8
; CHECK-NEXT: vmul.i16 q5, q5, q6
; CHECK-NEXT: vmla.i16 q5, q4, r4
; CHECK-NEXT: vshr.u16 q4, q5, #8
-; CHECK-NEXT: vpst
-; CHECK-NEXT: vstrbt.16 q4, [r0], #8
+; CHECK-NEXT: vstrb.16 q4, [r0], #8
; CHECK-NEXT: vidup.u16 q4, r6, #4
-; CHECK-NEXT: le lr, .LBB0_3
+; CHECK-NEXT: letp lr, .LBB0_3
; CHECK-NEXT: @ %bb.4: @ %do.end
; CHECK-NEXT: @ in Loop: Header=BB0_2 Depth=1
; CHECK-NEXT: add.w r0, r11, #1
diff --git a/llvm/test/CodeGen/WebAssembly/expand-variadic-call.ll b/llvm/test/CodeGen/WebAssembly/expand-variadic-call.ll
new file mode 100644
index 000000000000..80f3db0e52e0
--- /dev/null
+++ b/llvm/test/CodeGen/WebAssembly/expand-variadic-call.ll
@@ -0,0 +1,484 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: -p --function-signature
+; RUN: opt -S --passes=expand-variadics --expand-variadics-override=lowering < %s | FileCheck %s
+; REQUIRES: webassembly-registered-target
+target datalayout = "e-m:e-p:32:32-p10:8:8-p20:8:8-i64:64-n32:64-S128-ni:1:10:20"
+target triple = "wasm32-unknown-unknown"
+
+; Check the variables are lowered to the locations this target expects
+
+; The types show the call frames
+; CHECK: %single_i32.vararg = type <{ i32 }>
+; CHECK: %single_double.vararg = type <{ double }>
+; CHECK: %single_v4f32.vararg = type <{ <4 x float> }>
+; CHECK: %single_v8f32.vararg = type <{ <8 x float> }>
+; CHECK: %single_v16f32.vararg = type <{ <16 x float> }>
+; CHECK: %single_v32f32.vararg = type <{ <32 x float> }>
+; CHECK: %i32_double.vararg = type <{ i32, [4 x i8], double }>
+; CHECK: %double_i32.vararg = type <{ double, i32 }>
+; CHECK: %i32_libcS.vararg = type <{ i32, ptr }>
+; CHECK: %libcS_i32.vararg = type <{ ptr, i32 }>
+; CHECK: %i32_v4f32.vararg = type <{ i32, [12 x i8], <4 x float> }>
+; CHECK: %v4f32_i32.vararg = type <{ <4 x float>, i32 }>
+; CHECK: %i32_v8f32.vararg = type <{ i32, [28 x i8], <8 x float> }>
+; CHECK: %v8f32_i32.vararg = type <{ <8 x float>, i32 }>
+; CHECK: %i32_v16f32.vararg = type <{ i32, [60 x i8], <16 x float> }>
+; CHECK: %v16f32_i32.vararg = type <{ <16 x float>, i32 }>
+; CHECK: %i32_v32f32.vararg = type <{ i32, [124 x i8], <32 x float> }>
+; CHECK: %v32f32_i32.vararg = type <{ <32 x float>, i32 }>
+; CHECK: %fptr_single_i32.vararg = type <{ i32 }>
+; CHECK: %fptr_libcS.vararg = type <{ ptr }>
+
+%struct.libcS = type { i8, i16, i32, i32, float, double }
+
+@vararg_ptr = hidden global ptr @vararg, align 4
+
+define hidden void @copy(ptr noundef %va) {
+; CHECK-LABEL: define {{[^@]+}}@copy(ptr noundef %va) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %va.addr = alloca ptr, align 4
+; CHECK-NEXT: %cp = alloca ptr, align 4
+; CHECK-NEXT: store ptr %va, ptr %va.addr, align 4
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %cp)
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr %cp, ptr %va.addr, i32 4, i1 false)
+; CHECK-NEXT: %0 = load ptr, ptr %cp, align 4
+; CHECK-NEXT: call void @valist(ptr noundef %0)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %cp)
+; CHECK-NEXT: ret void
+;
+entry:
+ %va.addr = alloca ptr, align 4
+ %cp = alloca ptr, align 4
+ store ptr %va, ptr %va.addr, align 4
+ call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %cp)
+ call void @llvm.va_copy.p0(ptr nonnull %cp, ptr nonnull %va.addr)
+ %0 = load ptr, ptr %cp, align 4
+ call void @valist(ptr noundef %0)
+ call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %cp)
+ ret void
+}
+
+declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture)
+
+declare void @llvm.va_copy.p0(ptr, ptr)
+
+declare void @valist(ptr noundef)
+
+declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture)
+
+define hidden void @start_once(...) {
+; CHECK-LABEL: define {{[^@]+}}@start_once(ptr %varargs) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %s = alloca ptr, align 4
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %s)
+; CHECK-NEXT: store ptr %varargs, ptr %s, align 4
+; CHECK-NEXT: %0 = load ptr, ptr %s, align 4
+; CHECK-NEXT: call void @valist(ptr noundef %0)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %s)
+; CHECK-NEXT: ret void
+;
+entry:
+ %s = alloca ptr, align 4
+ call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %s)
+ call void @llvm.va_start.p0(ptr nonnull %s)
+ %0 = load ptr, ptr %s, align 4
+ call void @valist(ptr noundef %0)
+ call void @llvm.va_end.p0(ptr %s)
+ call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %s)
+ ret void
+}
+
+declare void @llvm.va_start.p0(ptr)
+
+declare void @llvm.va_end.p0(ptr)
+
+define hidden void @start_twice(...) {
+; CHECK-LABEL: define {{[^@]+}}@start_twice(ptr %varargs) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %s0 = alloca ptr, align 4
+; CHECK-NEXT: %s1 = alloca ptr, align 4
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %s0)
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %s1)
+; CHECK-NEXT: store ptr %varargs, ptr %s0, align 4
+; CHECK-NEXT: %0 = load ptr, ptr %s0, align 4
+; CHECK-NEXT: call void @valist(ptr noundef %0)
+; CHECK-NEXT: store ptr %varargs, ptr %s1, align 4
+; CHECK-NEXT: %1 = load ptr, ptr %s1, align 4
+; CHECK-NEXT: call void @valist(ptr noundef %1)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %s1)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %s0)
+; CHECK-NEXT: ret void
+;
+entry:
+ %s0 = alloca ptr, align 4
+ %s1 = alloca ptr, align 4
+ call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %s0)
+ call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %s1)
+ call void @llvm.va_start.p0(ptr nonnull %s0)
+ %0 = load ptr, ptr %s0, align 4
+ call void @valist(ptr noundef %0)
+ call void @llvm.va_end.p0(ptr %s0)
+ call void @llvm.va_start.p0(ptr nonnull %s1)
+ %1 = load ptr, ptr %s1, align 4
+ call void @valist(ptr noundef %1)
+ call void @llvm.va_end.p0(ptr %s1)
+ call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %s1)
+ call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %s0)
+ ret void
+}
+
+define hidden void @single_i32(i32 noundef %x) {
+; CHECK-LABEL: define {{[^@]+}}@single_i32(i32 noundef %x) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %single_i32.vararg, align 16
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr %vararg_buffer)
+; CHECK-NEXT: %0 = getelementptr inbounds %single_i32.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store i32 %x, ptr %0, align 4
+; CHECK-NEXT: call void @vararg(ptr %vararg_buffer)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr %vararg_buffer)
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(i32 noundef %x)
+ ret void
+}
+
+declare void @vararg(...)
+
+define hidden void @single_double(double noundef %x) {
+; CHECK-LABEL: define {{[^@]+}}@single_double(double noundef %x) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %single_double.vararg, align 16
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr %vararg_buffer)
+; CHECK-NEXT: %0 = getelementptr inbounds %single_double.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store double %x, ptr %0, align 8
+; CHECK-NEXT: call void @vararg(ptr %vararg_buffer)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr %vararg_buffer)
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(double noundef %x)
+ ret void
+}
+
+define hidden void @single_v4f32(<4 x float> noundef %x) {
+; CHECK-LABEL: define {{[^@]+}}@single_v4f32(<4 x float> noundef %x) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %single_v4f32.vararg, align 16
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 16, ptr %vararg_buffer)
+; CHECK-NEXT: %0 = getelementptr inbounds %single_v4f32.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store <4 x float> %x, ptr %0, align 16
+; CHECK-NEXT: call void @vararg(ptr %vararg_buffer)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 16, ptr %vararg_buffer)
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(<4 x float> noundef %x)
+ ret void
+}
+
+define hidden void @single_v8f32(<8 x float> noundef %x) {
+; CHECK-LABEL: define {{[^@]+}}@single_v8f32(<8 x float> noundef %x) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %single_v8f32.vararg, align 32
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 32, ptr %vararg_buffer)
+; CHECK-NEXT: %0 = getelementptr inbounds %single_v8f32.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store <8 x float> %x, ptr %0, align 32
+; CHECK-NEXT: call void @vararg(ptr %vararg_buffer)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 32, ptr %vararg_buffer)
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(<8 x float> noundef %x)
+ ret void
+}
+
+define hidden void @single_v16f32(<16 x float> noundef %x) {
+; CHECK-LABEL: define {{[^@]+}}@single_v16f32(<16 x float> noundef %x) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %single_v16f32.vararg, align 64
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 64, ptr %vararg_buffer)
+; CHECK-NEXT: %0 = getelementptr inbounds %single_v16f32.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store <16 x float> %x, ptr %0, align 64
+; CHECK-NEXT: call void @vararg(ptr %vararg_buffer)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 64, ptr %vararg_buffer)
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(<16 x float> noundef %x)
+ ret void
+}
+
+define hidden void @single_v32f32(<32 x float> noundef %x) {
+; CHECK-LABEL: define {{[^@]+}}@single_v32f32(<32 x float> noundef %x) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %single_v32f32.vararg, align 128
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 128, ptr %vararg_buffer)
+; CHECK-NEXT: %0 = getelementptr inbounds %single_v32f32.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store <32 x float> %x, ptr %0, align 128
+; CHECK-NEXT: call void @vararg(ptr %vararg_buffer)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 128, ptr %vararg_buffer)
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(<32 x float> noundef %x)
+ ret void
+}
+
+define hidden void @i32_double(i32 noundef %x, double noundef %y) {
+; CHECK-LABEL: define {{[^@]+}}@i32_double(i32 noundef %x, double noundef %y) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %i32_double.vararg, align 16
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 16, ptr %vararg_buffer)
+; CHECK-NEXT: %0 = getelementptr inbounds %i32_double.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store i32 %x, ptr %0, align 4
+; CHECK-NEXT: %1 = getelementptr inbounds %i32_double.vararg, ptr %vararg_buffer, i32 0, i32 2
+; CHECK-NEXT: store double %y, ptr %1, align 8
+; CHECK-NEXT: call void @vararg(ptr %vararg_buffer)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 16, ptr %vararg_buffer)
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(i32 noundef %x, double noundef %y)
+ ret void
+}
+
+define hidden void @double_i32(double noundef %x, i32 noundef %y) {
+; CHECK-LABEL: define {{[^@]+}}@double_i32(double noundef %x, i32 noundef %y) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %double_i32.vararg, align 16
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr %vararg_buffer)
+; CHECK-NEXT: %0 = getelementptr inbounds %double_i32.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store double %x, ptr %0, align 8
+; CHECK-NEXT: %1 = getelementptr inbounds %double_i32.vararg, ptr %vararg_buffer, i32 0, i32 1
+; CHECK-NEXT: store i32 %y, ptr %1, align 4
+; CHECK-NEXT: call void @vararg(ptr %vararg_buffer)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr %vararg_buffer)
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(double noundef %x, i32 noundef %y)
+ ret void
+}
+
+define hidden void @i32_libcS(i32 noundef %x, ptr noundef byval(%struct.libcS) align 8 %y) {
+; CHECK-LABEL: define {{[^@]+}}@i32_libcS(i32 noundef %x, ptr noundef byval(%struct.libcS) align 8 %y) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %IndirectAlloca = alloca %struct.libcS, align 8
+; CHECK-NEXT: %vararg_buffer = alloca %i32_libcS.vararg, align 16
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr %IndirectAlloca, ptr %y, i64 24, i1 false)
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr %vararg_buffer)
+; CHECK-NEXT: %0 = getelementptr inbounds %i32_libcS.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store i32 %x, ptr %0, align 4
+; CHECK-NEXT: %1 = getelementptr inbounds %i32_libcS.vararg, ptr %vararg_buffer, i32 0, i32 1
+; CHECK-NEXT: store ptr %IndirectAlloca, ptr %1, align 4
+; CHECK-NEXT: call void @vararg(ptr %vararg_buffer)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr %vararg_buffer)
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(i32 noundef %x, ptr noundef nonnull byval(%struct.libcS) align 8 %y)
+ ret void
+}
+
+define hidden void @libcS_i32(ptr noundef byval(%struct.libcS) align 8 %x, i32 noundef %y) {
+; CHECK-LABEL: define {{[^@]+}}@libcS_i32(ptr noundef byval(%struct.libcS) align 8 %x, i32 noundef %y) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %IndirectAlloca = alloca %struct.libcS, align 8
+; CHECK-NEXT: %vararg_buffer = alloca %libcS_i32.vararg, align 16
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr %IndirectAlloca, ptr %x, i64 24, i1 false)
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr %vararg_buffer)
+; CHECK-NEXT: %0 = getelementptr inbounds %libcS_i32.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store ptr %IndirectAlloca, ptr %0, align 4
+; CHECK-NEXT: %1 = getelementptr inbounds %libcS_i32.vararg, ptr %vararg_buffer, i32 0, i32 1
+; CHECK-NEXT: store i32 %y, ptr %1, align 4
+; CHECK-NEXT: call void @vararg(ptr %vararg_buffer)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr %vararg_buffer)
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(ptr noundef nonnull byval(%struct.libcS) align 8 %x, i32 noundef %y)
+ ret void
+}
+
+define hidden void @i32_v4f32(i32 noundef %x, <4 x float> noundef %y) {
+; CHECK-LABEL: define {{[^@]+}}@i32_v4f32(i32 noundef %x, <4 x float> noundef %y) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %i32_v4f32.vararg, align 16
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 32, ptr %vararg_buffer)
+; CHECK-NEXT: %0 = getelementptr inbounds %i32_v4f32.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store i32 %x, ptr %0, align 4
+; CHECK-NEXT: %1 = getelementptr inbounds %i32_v4f32.vararg, ptr %vararg_buffer, i32 0, i32 2
+; CHECK-NEXT: store <4 x float> %y, ptr %1, align 16
+; CHECK-NEXT: call void @vararg(ptr %vararg_buffer)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 32, ptr %vararg_buffer)
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(i32 noundef %x, <4 x float> noundef %y)
+ ret void
+}
+
+define hidden void @v4f32_i32(<4 x float> noundef %x, i32 noundef %y) {
+; CHECK-LABEL: define {{[^@]+}}@v4f32_i32(<4 x float> noundef %x, i32 noundef %y) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %v4f32_i32.vararg, align 16
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 20, ptr %vararg_buffer)
+; CHECK-NEXT: %0 = getelementptr inbounds %v4f32_i32.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store <4 x float> %x, ptr %0, align 16
+; CHECK-NEXT: %1 = getelementptr inbounds %v4f32_i32.vararg, ptr %vararg_buffer, i32 0, i32 1
+; CHECK-NEXT: store i32 %y, ptr %1, align 4
+; CHECK-NEXT: call void @vararg(ptr %vararg_buffer)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 20, ptr %vararg_buffer)
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(<4 x float> noundef %x, i32 noundef %y)
+ ret void
+}
+
+define hidden void @i32_v8f32(i32 noundef %x, <8 x float> noundef %y) {
+; CHECK-LABEL: define {{[^@]+}}@i32_v8f32(i32 noundef %x, <8 x float> noundef %y) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %i32_v8f32.vararg, align 32
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 64, ptr %vararg_buffer)
+; CHECK-NEXT: %0 = getelementptr inbounds %i32_v8f32.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store i32 %x, ptr %0, align 4
+; CHECK-NEXT: %1 = getelementptr inbounds %i32_v8f32.vararg, ptr %vararg_buffer, i32 0, i32 2
+; CHECK-NEXT: store <8 x float> %y, ptr %1, align 32
+; CHECK-NEXT: call void @vararg(ptr %vararg_buffer)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 64, ptr %vararg_buffer)
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(i32 noundef %x, <8 x float> noundef %y)
+ ret void
+}
+
+define hidden void @v8f32_i32(<8 x float> noundef %x, i32 noundef %y) {
+; CHECK-LABEL: define {{[^@]+}}@v8f32_i32(<8 x float> noundef %x, i32 noundef %y) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %v8f32_i32.vararg, align 32
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 36, ptr %vararg_buffer)
+; CHECK-NEXT: %0 = getelementptr inbounds %v8f32_i32.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store <8 x float> %x, ptr %0, align 32
+; CHECK-NEXT: %1 = getelementptr inbounds %v8f32_i32.vararg, ptr %vararg_buffer, i32 0, i32 1
+; CHECK-NEXT: store i32 %y, ptr %1, align 4
+; CHECK-NEXT: call void @vararg(ptr %vararg_buffer)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 36, ptr %vararg_buffer)
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(<8 x float> noundef %x, i32 noundef %y)
+ ret void
+}
+
+define hidden void @i32_v16f32(i32 noundef %x, <16 x float> noundef %y) {
+; CHECK-LABEL: define {{[^@]+}}@i32_v16f32(i32 noundef %x, <16 x float> noundef %y) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %i32_v16f32.vararg, align 64
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 128, ptr %vararg_buffer)
+; CHECK-NEXT: %0 = getelementptr inbounds %i32_v16f32.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store i32 %x, ptr %0, align 4
+; CHECK-NEXT: %1 = getelementptr inbounds %i32_v16f32.vararg, ptr %vararg_buffer, i32 0, i32 2
+; CHECK-NEXT: store <16 x float> %y, ptr %1, align 64
+; CHECK-NEXT: call void @vararg(ptr %vararg_buffer)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 128, ptr %vararg_buffer)
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(i32 noundef %x, <16 x float> noundef %y)
+ ret void
+}
+
+define hidden void @v16f32_i32(<16 x float> noundef %x, i32 noundef %y) {
+; CHECK-LABEL: define {{[^@]+}}@v16f32_i32(<16 x float> noundef %x, i32 noundef %y) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %v16f32_i32.vararg, align 64
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 68, ptr %vararg_buffer)
+; CHECK-NEXT: %0 = getelementptr inbounds %v16f32_i32.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store <16 x float> %x, ptr %0, align 64
+; CHECK-NEXT: %1 = getelementptr inbounds %v16f32_i32.vararg, ptr %vararg_buffer, i32 0, i32 1
+; CHECK-NEXT: store i32 %y, ptr %1, align 4
+; CHECK-NEXT: call void @vararg(ptr %vararg_buffer)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 68, ptr %vararg_buffer)
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(<16 x float> noundef %x, i32 noundef %y)
+ ret void
+}
+
+define hidden void @i32_v32f32(i32 noundef %x, <32 x float> noundef %y) {
+; CHECK-LABEL: define {{[^@]+}}@i32_v32f32(i32 noundef %x, <32 x float> noundef %y) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %i32_v32f32.vararg, align 128
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 256, ptr %vararg_buffer)
+; CHECK-NEXT: %0 = getelementptr inbounds %i32_v32f32.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store i32 %x, ptr %0, align 4
+; CHECK-NEXT: %1 = getelementptr inbounds %i32_v32f32.vararg, ptr %vararg_buffer, i32 0, i32 2
+; CHECK-NEXT: store <32 x float> %y, ptr %1, align 128
+; CHECK-NEXT: call void @vararg(ptr %vararg_buffer)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 256, ptr %vararg_buffer)
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(i32 noundef %x, <32 x float> noundef %y)
+ ret void
+}
+
+define hidden void @v32f32_i32(<32 x float> noundef %x, i32 noundef %y) {
+; CHECK-LABEL: define {{[^@]+}}@v32f32_i32(<32 x float> noundef %x, i32 noundef %y) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %v32f32_i32.vararg, align 128
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 132, ptr %vararg_buffer)
+; CHECK-NEXT: %0 = getelementptr inbounds %v32f32_i32.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store <32 x float> %x, ptr %0, align 128
+; CHECK-NEXT: %1 = getelementptr inbounds %v32f32_i32.vararg, ptr %vararg_buffer, i32 0, i32 1
+; CHECK-NEXT: store i32 %y, ptr %1, align 4
+; CHECK-NEXT: call void @vararg(ptr %vararg_buffer)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 132, ptr %vararg_buffer)
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(<32 x float> noundef %x, i32 noundef %y)
+ ret void
+}
+
+define hidden void @fptr_single_i32(i32 noundef %x) {
+; CHECK-LABEL: define {{[^@]+}}@fptr_single_i32(i32 noundef %x) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %fptr_single_i32.vararg, align 16
+; CHECK-NEXT: %0 = load volatile ptr, ptr @vararg_ptr, align 4
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr %vararg_buffer)
+; CHECK-NEXT: %1 = getelementptr inbounds %fptr_single_i32.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store i32 %x, ptr %1, align 4
+; CHECK-NEXT: call void %0(ptr %vararg_buffer)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr %vararg_buffer)
+; CHECK-NEXT: ret void
+;
+entry:
+ %0 = load volatile ptr, ptr @vararg_ptr, align 4
+ tail call void (...) %0(i32 noundef %x)
+ ret void
+}
+
+define hidden void @fptr_libcS(ptr noundef byval(%struct.libcS) align 8 %x) {
+; CHECK-LABEL: define {{[^@]+}}@fptr_libcS(ptr noundef byval(%struct.libcS) align 8 %x) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %IndirectAlloca = alloca %struct.libcS, align 8
+; CHECK-NEXT: %vararg_buffer = alloca %fptr_libcS.vararg, align 16
+; CHECK-NEXT: %0 = load volatile ptr, ptr @vararg_ptr, align 4
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr %IndirectAlloca, ptr %x, i64 24, i1 false)
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr %vararg_buffer)
+; CHECK-NEXT: %1 = getelementptr inbounds %fptr_libcS.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store ptr %IndirectAlloca, ptr %1, align 4
+; CHECK-NEXT: call void %0(ptr %vararg_buffer)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr %vararg_buffer)
+; CHECK-NEXT: ret void
+;
+entry:
+ %0 = load volatile ptr, ptr @vararg_ptr, align 4
+ tail call void (...) %0(ptr noundef nonnull byval(%struct.libcS) align 8 %x)
+ ret void
+}
+
diff --git a/llvm/test/CodeGen/WebAssembly/simd-arith.ll b/llvm/test/CodeGen/WebAssembly/simd-arith.ll
index 761a75418a00..67388b688e3b 100644
--- a/llvm/test/CodeGen/WebAssembly/simd-arith.ll
+++ b/llvm/test/CodeGen/WebAssembly/simd-arith.ll
@@ -11788,27 +11788,35 @@ define <4 x float> @minnum_intrinsic_v4f32(<4 x float> %x, <4 x float> %y) {
; NO-SIMD128-LABEL: minnum_intrinsic_v4f32:
; NO-SIMD128: .functype minnum_intrinsic_v4f32 (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> ()
; NO-SIMD128-NEXT: # %bb.0:
-; NO-SIMD128-NEXT: call $push0=, fminf, $4, $8
-; NO-SIMD128-NEXT: f32.store 12($0), $pop0
-; NO-SIMD128-NEXT: call $push1=, fminf, $3, $7
-; NO-SIMD128-NEXT: f32.store 8($0), $pop1
-; NO-SIMD128-NEXT: call $push2=, fminf, $2, $6
-; NO-SIMD128-NEXT: f32.store 4($0), $pop2
-; NO-SIMD128-NEXT: call $push3=, fminf, $1, $5
-; NO-SIMD128-NEXT: f32.store 0($0), $pop3
+; NO-SIMD128-NEXT: f32.lt $push0=, $4, $8
+; NO-SIMD128-NEXT: f32.select $push1=, $4, $8, $pop0
+; NO-SIMD128-NEXT: f32.store 12($0), $pop1
+; NO-SIMD128-NEXT: f32.lt $push2=, $3, $7
+; NO-SIMD128-NEXT: f32.select $push3=, $3, $7, $pop2
+; NO-SIMD128-NEXT: f32.store 8($0), $pop3
+; NO-SIMD128-NEXT: f32.lt $push4=, $2, $6
+; NO-SIMD128-NEXT: f32.select $push5=, $2, $6, $pop4
+; NO-SIMD128-NEXT: f32.store 4($0), $pop5
+; NO-SIMD128-NEXT: f32.lt $push6=, $1, $5
+; NO-SIMD128-NEXT: f32.select $push7=, $1, $5, $pop6
+; NO-SIMD128-NEXT: f32.store 0($0), $pop7
; NO-SIMD128-NEXT: return
;
; NO-SIMD128-FAST-LABEL: minnum_intrinsic_v4f32:
; NO-SIMD128-FAST: .functype minnum_intrinsic_v4f32 (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> ()
; NO-SIMD128-FAST-NEXT: # %bb.0:
-; NO-SIMD128-FAST-NEXT: call $push0=, fminf, $1, $5
-; NO-SIMD128-FAST-NEXT: f32.store 0($0), $pop0
-; NO-SIMD128-FAST-NEXT: call $push1=, fminf, $2, $6
-; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop1
-; NO-SIMD128-FAST-NEXT: call $push2=, fminf, $3, $7
-; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop2
-; NO-SIMD128-FAST-NEXT: call $push3=, fminf, $4, $8
-; NO-SIMD128-FAST-NEXT: f32.store 12($0), $pop3
+; NO-SIMD128-FAST-NEXT: f32.lt $push0=, $1, $5
+; NO-SIMD128-FAST-NEXT: f32.select $push1=, $1, $5, $pop0
+; NO-SIMD128-FAST-NEXT: f32.store 0($0), $pop1
+; NO-SIMD128-FAST-NEXT: f32.lt $push2=, $2, $6
+; NO-SIMD128-FAST-NEXT: f32.select $push3=, $2, $6, $pop2
+; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop3
+; NO-SIMD128-FAST-NEXT: f32.lt $push4=, $3, $7
+; NO-SIMD128-FAST-NEXT: f32.select $push5=, $3, $7, $pop4
+; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop5
+; NO-SIMD128-FAST-NEXT: f32.lt $push6=, $4, $8
+; NO-SIMD128-FAST-NEXT: f32.select $push7=, $4, $8, $pop6
+; NO-SIMD128-FAST-NEXT: f32.store 12($0), $pop7
; NO-SIMD128-FAST-NEXT: return
%a = call nnan <4 x float> @llvm.minnum.v4f32(<4 x float> %x, <4 x float> %y)
ret <4 x float> %a
@@ -11830,26 +11838,26 @@ define <4 x float> @minnum_nsz_intrinsic_v4f32(<4 x float> %x, <4 x float> %y) {
; NO-SIMD128-LABEL: minnum_nsz_intrinsic_v4f32:
; NO-SIMD128: .functype minnum_nsz_intrinsic_v4f32 (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> ()
; NO-SIMD128-NEXT: # %bb.0:
-; NO-SIMD128-NEXT: call $push0=, fminf, $4, $8
+; NO-SIMD128-NEXT: f32.min $push0=, $4, $8
; NO-SIMD128-NEXT: f32.store 12($0), $pop0
-; NO-SIMD128-NEXT: call $push1=, fminf, $3, $7
+; NO-SIMD128-NEXT: f32.min $push1=, $3, $7
; NO-SIMD128-NEXT: f32.store 8($0), $pop1
-; NO-SIMD128-NEXT: call $push2=, fminf, $2, $6
+; NO-SIMD128-NEXT: f32.min $push2=, $2, $6
; NO-SIMD128-NEXT: f32.store 4($0), $pop2
-; NO-SIMD128-NEXT: call $push3=, fminf, $1, $5
+; NO-SIMD128-NEXT: f32.min $push3=, $1, $5
; NO-SIMD128-NEXT: f32.store 0($0), $pop3
; NO-SIMD128-NEXT: return
;
; NO-SIMD128-FAST-LABEL: minnum_nsz_intrinsic_v4f32:
; NO-SIMD128-FAST: .functype minnum_nsz_intrinsic_v4f32 (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> ()
; NO-SIMD128-FAST-NEXT: # %bb.0:
-; NO-SIMD128-FAST-NEXT: call $push0=, fminf, $1, $5
+; NO-SIMD128-FAST-NEXT: f32.min $push0=, $1, $5
; NO-SIMD128-FAST-NEXT: f32.store 0($0), $pop0
-; NO-SIMD128-FAST-NEXT: call $push1=, fminf, $2, $6
+; NO-SIMD128-FAST-NEXT: f32.min $push1=, $2, $6
; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop1
-; NO-SIMD128-FAST-NEXT: call $push2=, fminf, $3, $7
+; NO-SIMD128-FAST-NEXT: f32.min $push2=, $3, $7
; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop2
-; NO-SIMD128-FAST-NEXT: call $push3=, fminf, $4, $8
+; NO-SIMD128-FAST-NEXT: f32.min $push3=, $4, $8
; NO-SIMD128-FAST-NEXT: f32.store 12($0), $pop3
; NO-SIMD128-FAST-NEXT: return
%a = call nnan nsz <4 x float> @llvm.minnum.v4f32(<4 x float> %x, <4 x float> %y)
@@ -11875,16 +11883,16 @@ define <4 x float> @fminnumv432_non_zero_intrinsic(<4 x float> %x) {
; NO-SIMD128: .functype fminnumv432_non_zero_intrinsic (i32, f32, f32, f32, f32) -> ()
; NO-SIMD128-NEXT: # %bb.0:
; NO-SIMD128-NEXT: f32.const $push0=, -0x1p0
-; NO-SIMD128-NEXT: call $push1=, fminf, $4, $pop0
+; NO-SIMD128-NEXT: f32.min $push1=, $4, $pop0
; NO-SIMD128-NEXT: f32.store 12($0), $pop1
; NO-SIMD128-NEXT: f32.const $push7=, -0x1p0
-; NO-SIMD128-NEXT: call $push2=, fminf, $3, $pop7
+; NO-SIMD128-NEXT: f32.min $push2=, $3, $pop7
; NO-SIMD128-NEXT: f32.store 8($0), $pop2
; NO-SIMD128-NEXT: f32.const $push6=, -0x1p0
-; NO-SIMD128-NEXT: call $push3=, fminf, $2, $pop6
+; NO-SIMD128-NEXT: f32.min $push3=, $2, $pop6
; NO-SIMD128-NEXT: f32.store 4($0), $pop3
; NO-SIMD128-NEXT: f32.const $push5=, -0x1p0
-; NO-SIMD128-NEXT: call $push4=, fminf, $1, $pop5
+; NO-SIMD128-NEXT: f32.min $push4=, $1, $pop5
; NO-SIMD128-NEXT: f32.store 0($0), $pop4
; NO-SIMD128-NEXT: return
;
@@ -11892,16 +11900,16 @@ define <4 x float> @fminnumv432_non_zero_intrinsic(<4 x float> %x) {
; NO-SIMD128-FAST: .functype fminnumv432_non_zero_intrinsic (i32, f32, f32, f32, f32) -> ()
; NO-SIMD128-FAST-NEXT: # %bb.0:
; NO-SIMD128-FAST-NEXT: f32.const $push0=, -0x1p0
-; NO-SIMD128-FAST-NEXT: call $push1=, fminf, $1, $pop0
+; NO-SIMD128-FAST-NEXT: f32.min $push1=, $1, $pop0
; NO-SIMD128-FAST-NEXT: f32.store 0($0), $pop1
; NO-SIMD128-FAST-NEXT: f32.const $push7=, -0x1p0
-; NO-SIMD128-FAST-NEXT: call $push2=, fminf, $2, $pop7
+; NO-SIMD128-FAST-NEXT: f32.min $push2=, $2, $pop7
; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop2
; NO-SIMD128-FAST-NEXT: f32.const $push6=, -0x1p0
-; NO-SIMD128-FAST-NEXT: call $push3=, fminf, $3, $pop6
+; NO-SIMD128-FAST-NEXT: f32.min $push3=, $3, $pop6
; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop3
; NO-SIMD128-FAST-NEXT: f32.const $push5=, -0x1p0
-; NO-SIMD128-FAST-NEXT: call $push4=, fminf, $4, $pop5
+; NO-SIMD128-FAST-NEXT: f32.min $push4=, $4, $pop5
; NO-SIMD128-FAST-NEXT: f32.store 12($0), $pop4
; NO-SIMD128-FAST-NEXT: return
%a = call nnan <4 x float> @llvm.minnum.v4f32(<4 x float> %x, <4 x float><float -1.0, float -1.0, float -1.0, float -1.0>)
@@ -11979,34 +11987,38 @@ define <4 x float> @fminnumv432_one_zero_intrinsic(<4 x float> %x) {
; NO-SIMD128: .functype fminnumv432_one_zero_intrinsic (i32, f32, f32, f32, f32) -> ()
; NO-SIMD128-NEXT: # %bb.0:
; NO-SIMD128-NEXT: f32.const $push0=, -0x1p0
-; NO-SIMD128-NEXT: call $push1=, fminf, $4, $pop0
+; NO-SIMD128-NEXT: f32.min $push1=, $4, $pop0
; NO-SIMD128-NEXT: f32.store 12($0), $pop1
-; NO-SIMD128-NEXT: f32.const $push7=, -0x1p0
-; NO-SIMD128-NEXT: call $push2=, fminf, $3, $pop7
+; NO-SIMD128-NEXT: f32.const $push9=, -0x1p0
+; NO-SIMD128-NEXT: f32.min $push2=, $3, $pop9
; NO-SIMD128-NEXT: f32.store 8($0), $pop2
; NO-SIMD128-NEXT: f32.const $push3=, 0x0p0
-; NO-SIMD128-NEXT: call $push4=, fminf, $2, $pop3
-; NO-SIMD128-NEXT: f32.store 4($0), $pop4
-; NO-SIMD128-NEXT: f32.const $push6=, -0x1p0
-; NO-SIMD128-NEXT: call $push5=, fminf, $1, $pop6
-; NO-SIMD128-NEXT: f32.store 0($0), $pop5
+; NO-SIMD128-NEXT: f32.const $push8=, 0x0p0
+; NO-SIMD128-NEXT: f32.lt $push4=, $2, $pop8
+; NO-SIMD128-NEXT: f32.select $push5=, $2, $pop3, $pop4
+; NO-SIMD128-NEXT: f32.store 4($0), $pop5
+; NO-SIMD128-NEXT: f32.const $push7=, -0x1p0
+; NO-SIMD128-NEXT: f32.min $push6=, $1, $pop7
+; NO-SIMD128-NEXT: f32.store 0($0), $pop6
; NO-SIMD128-NEXT: return
;
; NO-SIMD128-FAST-LABEL: fminnumv432_one_zero_intrinsic:
; NO-SIMD128-FAST: .functype fminnumv432_one_zero_intrinsic (i32, f32, f32, f32, f32) -> ()
; NO-SIMD128-FAST-NEXT: # %bb.0:
; NO-SIMD128-FAST-NEXT: f32.const $push0=, -0x1p0
-; NO-SIMD128-FAST-NEXT: call $push1=, fminf, $1, $pop0
+; NO-SIMD128-FAST-NEXT: f32.min $push1=, $1, $pop0
; NO-SIMD128-FAST-NEXT: f32.store 0($0), $pop1
-; NO-SIMD128-FAST-NEXT: f32.const $push2=, 0x0p0
-; NO-SIMD128-FAST-NEXT: call $push3=, fminf, $2, $pop2
-; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop3
+; NO-SIMD128-FAST-NEXT: f32.const $push9=, -0x1p0
+; NO-SIMD128-FAST-NEXT: f32.min $push2=, $3, $pop9
+; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop2
+; NO-SIMD128-FAST-NEXT: f32.const $push3=, 0x0p0
+; NO-SIMD128-FAST-NEXT: f32.const $push8=, 0x0p0
+; NO-SIMD128-FAST-NEXT: f32.lt $push4=, $2, $pop8
+; NO-SIMD128-FAST-NEXT: f32.select $push5=, $2, $pop3, $pop4
+; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop5
; NO-SIMD128-FAST-NEXT: f32.const $push7=, -0x1p0
-; NO-SIMD128-FAST-NEXT: call $push4=, fminf, $3, $pop7
-; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop4
-; NO-SIMD128-FAST-NEXT: f32.const $push6=, -0x1p0
-; NO-SIMD128-FAST-NEXT: call $push5=, fminf, $4, $pop6
-; NO-SIMD128-FAST-NEXT: f32.store 12($0), $pop5
+; NO-SIMD128-FAST-NEXT: f32.min $push6=, $4, $pop7
+; NO-SIMD128-FAST-NEXT: f32.store 12($0), $pop6
; NO-SIMD128-FAST-NEXT: return
%a = call nnan <4 x float> @llvm.minnum.v4f32(<4 x float> %x, <4 x float><float -1.0, float 0.0, float -1.0, float -1.0>)
ret <4 x float> %a
@@ -12126,27 +12138,35 @@ define <4 x float> @maxnum_intrinsic_v4f32(<4 x float> %x, <4 x float> %y) {
; NO-SIMD128-LABEL: maxnum_intrinsic_v4f32:
; NO-SIMD128: .functype maxnum_intrinsic_v4f32 (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> ()
; NO-SIMD128-NEXT: # %bb.0:
-; NO-SIMD128-NEXT: call $push0=, fmaxf, $4, $8
-; NO-SIMD128-NEXT: f32.store 12($0), $pop0
-; NO-SIMD128-NEXT: call $push1=, fmaxf, $3, $7
-; NO-SIMD128-NEXT: f32.store 8($0), $pop1
-; NO-SIMD128-NEXT: call $push2=, fmaxf, $2, $6
-; NO-SIMD128-NEXT: f32.store 4($0), $pop2
-; NO-SIMD128-NEXT: call $push3=, fmaxf, $1, $5
-; NO-SIMD128-NEXT: f32.store 0($0), $pop3
+; NO-SIMD128-NEXT: f32.gt $push0=, $4, $8
+; NO-SIMD128-NEXT: f32.select $push1=, $4, $8, $pop0
+; NO-SIMD128-NEXT: f32.store 12($0), $pop1
+; NO-SIMD128-NEXT: f32.gt $push2=, $3, $7
+; NO-SIMD128-NEXT: f32.select $push3=, $3, $7, $pop2
+; NO-SIMD128-NEXT: f32.store 8($0), $pop3
+; NO-SIMD128-NEXT: f32.gt $push4=, $2, $6
+; NO-SIMD128-NEXT: f32.select $push5=, $2, $6, $pop4
+; NO-SIMD128-NEXT: f32.store 4($0), $pop5
+; NO-SIMD128-NEXT: f32.gt $push6=, $1, $5
+; NO-SIMD128-NEXT: f32.select $push7=, $1, $5, $pop6
+; NO-SIMD128-NEXT: f32.store 0($0), $pop7
; NO-SIMD128-NEXT: return
;
; NO-SIMD128-FAST-LABEL: maxnum_intrinsic_v4f32:
; NO-SIMD128-FAST: .functype maxnum_intrinsic_v4f32 (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> ()
; NO-SIMD128-FAST-NEXT: # %bb.0:
-; NO-SIMD128-FAST-NEXT: call $push0=, fmaxf, $1, $5
-; NO-SIMD128-FAST-NEXT: f32.store 0($0), $pop0
-; NO-SIMD128-FAST-NEXT: call $push1=, fmaxf, $2, $6
-; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop1
-; NO-SIMD128-FAST-NEXT: call $push2=, fmaxf, $3, $7
-; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop2
-; NO-SIMD128-FAST-NEXT: call $push3=, fmaxf, $4, $8
-; NO-SIMD128-FAST-NEXT: f32.store 12($0), $pop3
+; NO-SIMD128-FAST-NEXT: f32.gt $push0=, $1, $5
+; NO-SIMD128-FAST-NEXT: f32.select $push1=, $1, $5, $pop0
+; NO-SIMD128-FAST-NEXT: f32.store 0($0), $pop1
+; NO-SIMD128-FAST-NEXT: f32.gt $push2=, $2, $6
+; NO-SIMD128-FAST-NEXT: f32.select $push3=, $2, $6, $pop2
+; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop3
+; NO-SIMD128-FAST-NEXT: f32.gt $push4=, $3, $7
+; NO-SIMD128-FAST-NEXT: f32.select $push5=, $3, $7, $pop4
+; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop5
+; NO-SIMD128-FAST-NEXT: f32.gt $push6=, $4, $8
+; NO-SIMD128-FAST-NEXT: f32.select $push7=, $4, $8, $pop6
+; NO-SIMD128-FAST-NEXT: f32.store 12($0), $pop7
; NO-SIMD128-FAST-NEXT: return
%a = call nnan <4 x float> @llvm.maxnum.v4f32(<4 x float> %x, <4 x float> %y)
ret <4 x float> %a
@@ -12168,26 +12188,26 @@ define <4 x float> @maxnum_nsz_intrinsic_v4f32(<4 x float> %x, <4 x float> %y) {
; NO-SIMD128-LABEL: maxnum_nsz_intrinsic_v4f32:
; NO-SIMD128: .functype maxnum_nsz_intrinsic_v4f32 (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> ()
; NO-SIMD128-NEXT: # %bb.0:
-; NO-SIMD128-NEXT: call $push0=, fmaxf, $4, $8
+; NO-SIMD128-NEXT: f32.max $push0=, $4, $8
; NO-SIMD128-NEXT: f32.store 12($0), $pop0
-; NO-SIMD128-NEXT: call $push1=, fmaxf, $3, $7
+; NO-SIMD128-NEXT: f32.max $push1=, $3, $7
; NO-SIMD128-NEXT: f32.store 8($0), $pop1
-; NO-SIMD128-NEXT: call $push2=, fmaxf, $2, $6
+; NO-SIMD128-NEXT: f32.max $push2=, $2, $6
; NO-SIMD128-NEXT: f32.store 4($0), $pop2
-; NO-SIMD128-NEXT: call $push3=, fmaxf, $1, $5
+; NO-SIMD128-NEXT: f32.max $push3=, $1, $5
; NO-SIMD128-NEXT: f32.store 0($0), $pop3
; NO-SIMD128-NEXT: return
;
; NO-SIMD128-FAST-LABEL: maxnum_nsz_intrinsic_v4f32:
; NO-SIMD128-FAST: .functype maxnum_nsz_intrinsic_v4f32 (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> ()
; NO-SIMD128-FAST-NEXT: # %bb.0:
-; NO-SIMD128-FAST-NEXT: call $push0=, fmaxf, $1, $5
+; NO-SIMD128-FAST-NEXT: f32.max $push0=, $1, $5
; NO-SIMD128-FAST-NEXT: f32.store 0($0), $pop0
-; NO-SIMD128-FAST-NEXT: call $push1=, fmaxf, $2, $6
+; NO-SIMD128-FAST-NEXT: f32.max $push1=, $2, $6
; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop1
-; NO-SIMD128-FAST-NEXT: call $push2=, fmaxf, $3, $7
+; NO-SIMD128-FAST-NEXT: f32.max $push2=, $3, $7
; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop2
-; NO-SIMD128-FAST-NEXT: call $push3=, fmaxf, $4, $8
+; NO-SIMD128-FAST-NEXT: f32.max $push3=, $4, $8
; NO-SIMD128-FAST-NEXT: f32.store 12($0), $pop3
; NO-SIMD128-FAST-NEXT: return
%a = call nnan nsz <4 x float> @llvm.maxnum.v4f32(<4 x float> %x, <4 x float> %y)
@@ -12265,34 +12285,38 @@ define <4 x float> @maxnum_one_zero_intrinsic_v4f32(<4 x float> %x, <4 x float>
; NO-SIMD128: .functype maxnum_one_zero_intrinsic_v4f32 (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> ()
; NO-SIMD128-NEXT: # %bb.0:
; NO-SIMD128-NEXT: f32.const $push0=, -0x1p0
-; NO-SIMD128-NEXT: call $push1=, fmaxf, $4, $pop0
+; NO-SIMD128-NEXT: f32.max $push1=, $4, $pop0
; NO-SIMD128-NEXT: f32.store 12($0), $pop1
-; NO-SIMD128-NEXT: f32.const $push7=, -0x1p0
-; NO-SIMD128-NEXT: call $push2=, fmaxf, $3, $pop7
+; NO-SIMD128-NEXT: f32.const $push9=, -0x1p0
+; NO-SIMD128-NEXT: f32.max $push2=, $3, $pop9
; NO-SIMD128-NEXT: f32.store 8($0), $pop2
; NO-SIMD128-NEXT: f32.const $push3=, 0x0p0
-; NO-SIMD128-NEXT: call $push4=, fmaxf, $2, $pop3
-; NO-SIMD128-NEXT: f32.store 4($0), $pop4
-; NO-SIMD128-NEXT: f32.const $push6=, -0x1p0
-; NO-SIMD128-NEXT: call $push5=, fmaxf, $1, $pop6
-; NO-SIMD128-NEXT: f32.store 0($0), $pop5
+; NO-SIMD128-NEXT: f32.const $push8=, 0x0p0
+; NO-SIMD128-NEXT: f32.gt $push4=, $2, $pop8
+; NO-SIMD128-NEXT: f32.select $push5=, $2, $pop3, $pop4
+; NO-SIMD128-NEXT: f32.store 4($0), $pop5
+; NO-SIMD128-NEXT: f32.const $push7=, -0x1p0
+; NO-SIMD128-NEXT: f32.max $push6=, $1, $pop7
+; NO-SIMD128-NEXT: f32.store 0($0), $pop6
; NO-SIMD128-NEXT: return
;
; NO-SIMD128-FAST-LABEL: maxnum_one_zero_intrinsic_v4f32:
; NO-SIMD128-FAST: .functype maxnum_one_zero_intrinsic_v4f32 (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> ()
; NO-SIMD128-FAST-NEXT: # %bb.0:
; NO-SIMD128-FAST-NEXT: f32.const $push0=, -0x1p0
-; NO-SIMD128-FAST-NEXT: call $push1=, fmaxf, $1, $pop0
+; NO-SIMD128-FAST-NEXT: f32.max $push1=, $1, $pop0
; NO-SIMD128-FAST-NEXT: f32.store 0($0), $pop1
-; NO-SIMD128-FAST-NEXT: f32.const $push2=, 0x0p0
-; NO-SIMD128-FAST-NEXT: call $push3=, fmaxf, $2, $pop2
-; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop3
+; NO-SIMD128-FAST-NEXT: f32.const $push9=, -0x1p0
+; NO-SIMD128-FAST-NEXT: f32.max $push2=, $3, $pop9
+; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop2
+; NO-SIMD128-FAST-NEXT: f32.const $push3=, 0x0p0
+; NO-SIMD128-FAST-NEXT: f32.const $push8=, 0x0p0
+; NO-SIMD128-FAST-NEXT: f32.gt $push4=, $2, $pop8
+; NO-SIMD128-FAST-NEXT: f32.select $push5=, $2, $pop3, $pop4
+; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop5
; NO-SIMD128-FAST-NEXT: f32.const $push7=, -0x1p0
-; NO-SIMD128-FAST-NEXT: call $push4=, fmaxf, $3, $pop7
-; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop4
-; NO-SIMD128-FAST-NEXT: f32.const $push6=, -0x1p0
-; NO-SIMD128-FAST-NEXT: call $push5=, fmaxf, $4, $pop6
-; NO-SIMD128-FAST-NEXT: f32.store 12($0), $pop5
+; NO-SIMD128-FAST-NEXT: f32.max $push6=, $4, $pop7
+; NO-SIMD128-FAST-NEXT: f32.store 12($0), $pop6
; NO-SIMD128-FAST-NEXT: return
%a = call nnan <4 x float> @llvm.maxnum.v4f32(<4 x float> %x, <4 x float><float -1.0, float 0.0, float -1.0, float -1.0>)
ret <4 x float> %a
@@ -12317,16 +12341,16 @@ define <4 x float> @maxnum_non_zero_intrinsic_v4f32(<4 x float> %x, <4 x float>
; NO-SIMD128: .functype maxnum_non_zero_intrinsic_v4f32 (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> ()
; NO-SIMD128-NEXT: # %bb.0:
; NO-SIMD128-NEXT: f32.const $push0=, -0x1p0
-; NO-SIMD128-NEXT: call $push1=, fmaxf, $4, $pop0
+; NO-SIMD128-NEXT: f32.max $push1=, $4, $pop0
; NO-SIMD128-NEXT: f32.store 12($0), $pop1
; NO-SIMD128-NEXT: f32.const $push7=, -0x1p0
-; NO-SIMD128-NEXT: call $push2=, fmaxf, $3, $pop7
+; NO-SIMD128-NEXT: f32.max $push2=, $3, $pop7
; NO-SIMD128-NEXT: f32.store 8($0), $pop2
; NO-SIMD128-NEXT: f32.const $push3=, 0x1p0
-; NO-SIMD128-NEXT: call $push4=, fmaxf, $2, $pop3
+; NO-SIMD128-NEXT: f32.max $push4=, $2, $pop3
; NO-SIMD128-NEXT: f32.store 4($0), $pop4
; NO-SIMD128-NEXT: f32.const $push6=, -0x1p0
-; NO-SIMD128-NEXT: call $push5=, fmaxf, $1, $pop6
+; NO-SIMD128-NEXT: f32.max $push5=, $1, $pop6
; NO-SIMD128-NEXT: f32.store 0($0), $pop5
; NO-SIMD128-NEXT: return
;
@@ -12334,16 +12358,16 @@ define <4 x float> @maxnum_non_zero_intrinsic_v4f32(<4 x float> %x, <4 x float>
; NO-SIMD128-FAST: .functype maxnum_non_zero_intrinsic_v4f32 (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> ()
; NO-SIMD128-FAST-NEXT: # %bb.0:
; NO-SIMD128-FAST-NEXT: f32.const $push0=, -0x1p0
-; NO-SIMD128-FAST-NEXT: call $push1=, fmaxf, $1, $pop0
+; NO-SIMD128-FAST-NEXT: f32.max $push1=, $1, $pop0
; NO-SIMD128-FAST-NEXT: f32.store 0($0), $pop1
; NO-SIMD128-FAST-NEXT: f32.const $push2=, 0x1p0
-; NO-SIMD128-FAST-NEXT: call $push3=, fmaxf, $2, $pop2
+; NO-SIMD128-FAST-NEXT: f32.max $push3=, $2, $pop2
; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop3
; NO-SIMD128-FAST-NEXT: f32.const $push7=, -0x1p0
-; NO-SIMD128-FAST-NEXT: call $push4=, fmaxf, $3, $pop7
+; NO-SIMD128-FAST-NEXT: f32.max $push4=, $3, $pop7
; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop4
; NO-SIMD128-FAST-NEXT: f32.const $push6=, -0x1p0
-; NO-SIMD128-FAST-NEXT: call $push5=, fmaxf, $4, $pop6
+; NO-SIMD128-FAST-NEXT: f32.max $push5=, $4, $pop6
; NO-SIMD128-FAST-NEXT: f32.store 12($0), $pop5
; NO-SIMD128-FAST-NEXT: return
%a = call nnan <4 x float> @llvm.maxnum.v4f32(<4 x float> %x, <4 x float><float -1.0, float 1.0, float -1.0, float -1.0>)
diff --git a/llvm/test/CodeGen/WebAssembly/vararg-frame.ll b/llvm/test/CodeGen/WebAssembly/vararg-frame.ll
new file mode 100644
index 000000000000..5c76040325cc
--- /dev/null
+++ b/llvm/test/CodeGen/WebAssembly/vararg-frame.ll
@@ -0,0 +1,526 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -verify-machineinstrs | FileCheck %s
+; REQUIRES: webassembly-registered-target
+target datalayout = "e-m:e-p:32:32-p10:8:8-p20:8:8-i64:64-n32:64-S128-ni:1:10:20"
+target triple = "wasm32-unknown-unknown"
+
+; Function Attrs: nounwind
+define void @pass_s0() {
+; CHECK-LABEL: pass_s0:
+; CHECK: .functype pass_s0 () -> ()
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: i32.const 0
+; CHECK-NEXT: call sink
+; CHECK-NEXT: # fallthrough-return
+entry:
+ tail call void (...) @sink()
+ ret void
+}
+
+declare void @sink(...)
+
+; Function Attrs: nounwind
+define void @pass_s1(i8 %x.coerce) {
+; CHECK-LABEL: pass_s1:
+; CHECK: .functype pass_s1 (i32) -> ()
+; CHECK-NEXT: .local i32
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: global.get __stack_pointer
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.sub
+; CHECK-NEXT: local.tee 1
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i32.store 0
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: call sink
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: # fallthrough-return
+entry:
+ tail call void (...) @sink(i8 %x.coerce)
+ ret void
+}
+
+; Function Attrs: nounwind
+define void @pass_s2(i16 %x.coerce) {
+; CHECK-LABEL: pass_s2:
+; CHECK: .functype pass_s2 (i32) -> ()
+; CHECK-NEXT: .local i32
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: global.get __stack_pointer
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.sub
+; CHECK-NEXT: local.tee 1
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i32.store 0
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: call sink
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: # fallthrough-return
+entry:
+ tail call void (...) @sink(i16 %x.coerce)
+ ret void
+}
+
+; Function Attrs: nounwind
+define void @pass_s3(i32 %x.coerce) {
+; CHECK-LABEL: pass_s3:
+; CHECK: .functype pass_s3 (i32) -> ()
+; CHECK-NEXT: .local i32
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: global.get __stack_pointer
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.sub
+; CHECK-NEXT: local.tee 1
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i32.store 0
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: call sink
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: # fallthrough-return
+entry:
+ tail call void (...) @sink(i32 %x.coerce)
+ ret void
+}
+
+; Function Attrs: nounwind
+define void @pass_s4(i64 %x.coerce) {
+; CHECK-LABEL: pass_s4:
+; CHECK: .functype pass_s4 (i64) -> ()
+; CHECK-NEXT: .local i32
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: global.get __stack_pointer
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.sub
+; CHECK-NEXT: local.tee 1
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i64.store 0
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: call sink
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: # fallthrough-return
+entry:
+ tail call void (...) @sink(i64 %x.coerce)
+ ret void
+}
+
+; Function Attrs: nounwind
+define void @pass_s5(<4 x i32> noundef %x) {
+; CHECK-LABEL: pass_s5:
+; CHECK: .functype pass_s5 (i32, i32, i32, i32) -> ()
+; CHECK-NEXT: .local i32
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: global.get __stack_pointer
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.sub
+; CHECK-NEXT: local.tee 4
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: local.get 4
+; CHECK-NEXT: local.get 3
+; CHECK-NEXT: i32.store 12
+; CHECK-NEXT: local.get 4
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i32.store 8
+; CHECK-NEXT: local.get 4
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.store 4
+; CHECK-NEXT: local.get 4
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i32.store 0
+; CHECK-NEXT: local.get 4
+; CHECK-NEXT: call sink
+; CHECK-NEXT: local.get 4
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: # fallthrough-return
+entry:
+ tail call void (...) @sink(<4 x i32> noundef %x)
+ ret void
+}
+
+; Function Attrs: nounwind
+define void @pass_int_s0(i32 noundef %i) {
+; CHECK-LABEL: pass_int_s0:
+; CHECK: .functype pass_int_s0 (i32) -> ()
+; CHECK-NEXT: .local i32
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: global.get __stack_pointer
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.sub
+; CHECK-NEXT: local.tee 1
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i32.store 0
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: call sink
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: # fallthrough-return
+entry:
+ tail call void (...) @sink(i32 noundef %i)
+ ret void
+}
+
+; Function Attrs: nounwind
+define void @pass_int_s1(i32 noundef %i, i8 %x.coerce) {
+; CHECK-LABEL: pass_int_s1:
+; CHECK: .functype pass_int_s1 (i32, i32) -> ()
+; CHECK-NEXT: .local i32
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: global.get __stack_pointer
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.sub
+; CHECK-NEXT: local.tee 2
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.store 4
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i32.store 0
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: call sink
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: # fallthrough-return
+entry:
+ tail call void (...) @sink(i32 noundef %i, i8 %x.coerce)
+ ret void
+}
+
+; Function Attrs: nounwind
+define void @pass_int_s2(i32 noundef %i, i16 %x.coerce) {
+; CHECK-LABEL: pass_int_s2:
+; CHECK: .functype pass_int_s2 (i32, i32) -> ()
+; CHECK-NEXT: .local i32
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: global.get __stack_pointer
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.sub
+; CHECK-NEXT: local.tee 2
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.store 4
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i32.store 0
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: call sink
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: # fallthrough-return
+entry:
+ tail call void (...) @sink(i32 noundef %i, i16 %x.coerce)
+ ret void
+}
+
+; Function Attrs: nounwind
+define void @pass_int_s3(i32 noundef %i, i32 %x.coerce) {
+; CHECK-LABEL: pass_int_s3:
+; CHECK: .functype pass_int_s3 (i32, i32) -> ()
+; CHECK-NEXT: .local i32
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: global.get __stack_pointer
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.sub
+; CHECK-NEXT: local.tee 2
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.store 4
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i32.store 0
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: call sink
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: # fallthrough-return
+entry:
+ tail call void (...) @sink(i32 noundef %i, i32 %x.coerce)
+ ret void
+}
+
+; Function Attrs: nounwind
+define void @pass_int_s4(i32 noundef %i, i64 %x.coerce) {
+; CHECK-LABEL: pass_int_s4:
+; CHECK: .functype pass_int_s4 (i32, i64) -> ()
+; CHECK-NEXT: .local i32
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: global.get __stack_pointer
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.sub
+; CHECK-NEXT: local.tee 2
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i64.store 8
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i32.store 0
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: call sink
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: # fallthrough-return
+entry:
+ tail call void (...) @sink(i32 noundef %i, i64 %x.coerce)
+ ret void
+}
+
+; Function Attrs: nounwind
+define void @pass_int_s5(i32 noundef %i, <4 x i32> noundef %x) {
+; CHECK-LABEL: pass_int_s5:
+; CHECK: .functype pass_int_s5 (i32, i32, i32, i32, i32) -> ()
+; CHECK-NEXT: .local i32
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: global.get __stack_pointer
+; CHECK-NEXT: i32.const 32
+; CHECK-NEXT: i32.sub
+; CHECK-NEXT: local.tee 5
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: local.get 5
+; CHECK-NEXT: i32.const 28
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: local.get 4
+; CHECK-NEXT: i32.store 0
+; CHECK-NEXT: local.get 5
+; CHECK-NEXT: i32.const 24
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: local.get 3
+; CHECK-NEXT: i32.store 0
+; CHECK-NEXT: local.get 5
+; CHECK-NEXT: i32.const 20
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i32.store 0
+; CHECK-NEXT: local.get 5
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.store 0
+; CHECK-NEXT: local.get 5
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i32.store 0
+; CHECK-NEXT: local.get 5
+; CHECK-NEXT: call sink
+; CHECK-NEXT: local.get 5
+; CHECK-NEXT: i32.const 32
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: # fallthrough-return
+entry:
+ tail call void (...) @sink(i32 noundef %i, <4 x i32> noundef %x)
+ ret void
+}
+
+; Function Attrs: nounwind
+define void @pass_asc(i8 %x1.coerce, i16 %x2.coerce, i32 %x3.coerce, i64 %x4.coerce, <4 x i32> noundef %x5) {
+; CHECK-LABEL: pass_asc:
+; CHECK: .functype pass_asc (i32, i32, i32, i64, i32, i32, i32, i32) -> ()
+; CHECK-NEXT: .local i32
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: global.get __stack_pointer
+; CHECK-NEXT: i32.const 48
+; CHECK-NEXT: i32.sub
+; CHECK-NEXT: local.tee 8
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: local.get 8
+; CHECK-NEXT: i32.const 44
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: local.get 7
+; CHECK-NEXT: i32.store 0
+; CHECK-NEXT: local.get 8
+; CHECK-NEXT: i32.const 40
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: local.get 6
+; CHECK-NEXT: i32.store 0
+; CHECK-NEXT: local.get 8
+; CHECK-NEXT: i32.const 36
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: local.get 5
+; CHECK-NEXT: i32.store 0
+; CHECK-NEXT: local.get 8
+; CHECK-NEXT: i32.const 32
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: local.get 4
+; CHECK-NEXT: i32.store 0
+; CHECK-NEXT: local.get 8
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: local.get 3
+; CHECK-NEXT: i64.store 0
+; CHECK-NEXT: local.get 8
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i32.store 8
+; CHECK-NEXT: local.get 8
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.store 4
+; CHECK-NEXT: local.get 8
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i32.store 0
+; CHECK-NEXT: local.get 8
+; CHECK-NEXT: call sink
+; CHECK-NEXT: local.get 8
+; CHECK-NEXT: i32.const 48
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: # fallthrough-return
+entry:
+ tail call void (...) @sink(i8 %x1.coerce, i16 %x2.coerce, i32 %x3.coerce, i64 %x4.coerce, <4 x i32> noundef %x5)
+ ret void
+}
+
+; Function Attrs: nounwind
+define void @pass_dsc(<4 x i32> noundef %x0, i64 %x1.coerce, i32 %x2.coerce, i16 %x3.coerce, i8 %x4.coerce) {
+; CHECK-LABEL: pass_dsc:
+; CHECK: .functype pass_dsc (i32, i32, i32, i32, i64, i32, i32, i32) -> ()
+; CHECK-NEXT: .local i32
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: global.get __stack_pointer
+; CHECK-NEXT: i32.const 48
+; CHECK-NEXT: i32.sub
+; CHECK-NEXT: local.tee 8
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: local.get 8
+; CHECK-NEXT: i32.const 32
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: local.get 7
+; CHECK-NEXT: i32.store 0
+; CHECK-NEXT: local.get 8
+; CHECK-NEXT: i32.const 28
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: local.get 6
+; CHECK-NEXT: i32.store 0
+; CHECK-NEXT: local.get 8
+; CHECK-NEXT: i32.const 24
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: local.get 5
+; CHECK-NEXT: i32.store 0
+; CHECK-NEXT: local.get 8
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: local.get 4
+; CHECK-NEXT: i64.store 0
+; CHECK-NEXT: local.get 8
+; CHECK-NEXT: local.get 3
+; CHECK-NEXT: i32.store 12
+; CHECK-NEXT: local.get 8
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i32.store 8
+; CHECK-NEXT: local.get 8
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.store 4
+; CHECK-NEXT: local.get 8
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i32.store 0
+; CHECK-NEXT: local.get 8
+; CHECK-NEXT: call sink
+; CHECK-NEXT: local.get 8
+; CHECK-NEXT: i32.const 48
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: # fallthrough-return
+entry:
+ tail call void (...) @sink(<4 x i32> noundef %x0, i64 %x1.coerce, i32 %x2.coerce, i16 %x3.coerce, i8 %x4.coerce)
+ ret void
+}
+
+; Function Attrs: nounwind
+define void @pass_multiple(i32 noundef %i, i8 %x1.coerce, i16 %x2.coerce, i32 %x3.coerce, i64 %x4.coerce, <4 x i32> noundef %x5) {
+; CHECK-LABEL: pass_multiple:
+; CHECK: .functype pass_multiple (i32, i32, i32, i32, i64, i32, i32, i32, i32) -> ()
+; CHECK-NEXT: .local i32
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: global.get __stack_pointer
+; CHECK-NEXT: i32.const 48
+; CHECK-NEXT: i32.sub
+; CHECK-NEXT: local.tee 9
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: local.get 9
+; CHECK-NEXT: local.get 4
+; CHECK-NEXT: i64.store 40
+; CHECK-NEXT: local.get 9
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i32.store 36
+; CHECK-NEXT: local.get 9
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i32.store 32
+; CHECK-NEXT: local.get 9
+; CHECK-NEXT: i32.const 32
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: call sink
+; CHECK-NEXT: local.get 9
+; CHECK-NEXT: i32.const 28
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: local.get 8
+; CHECK-NEXT: i32.store 0
+; CHECK-NEXT: local.get 9
+; CHECK-NEXT: i32.const 24
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: local.get 7
+; CHECK-NEXT: i32.store 0
+; CHECK-NEXT: local.get 9
+; CHECK-NEXT: i32.const 20
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: local.get 6
+; CHECK-NEXT: i32.store 0
+; CHECK-NEXT: local.get 9
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: local.get 5
+; CHECK-NEXT: i32.store 0
+; CHECK-NEXT: local.get 9
+; CHECK-NEXT: local.get 3
+; CHECK-NEXT: i32.store 8
+; CHECK-NEXT: local.get 9
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.store 4
+; CHECK-NEXT: local.get 9
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i32.store 0
+; CHECK-NEXT: local.get 9
+; CHECK-NEXT: call sink
+; CHECK-NEXT: local.get 9
+; CHECK-NEXT: i32.const 48
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: # fallthrough-return
+entry:
+ tail call void (...) @sink(i32 noundef %i, i16 %x2.coerce, i64 %x4.coerce)
+ tail call void (...) @sink(i32 noundef %i, i8 %x1.coerce, i32 %x3.coerce, <4 x i32> noundef %x5)
+ ret void
+}
+
diff --git a/llvm/test/CodeGen/X86/avx512fp16-fminimum-fmaximum.ll b/llvm/test/CodeGen/X86/avx512fp16-fminimum-fmaximum.ll
index 1782e5252870..55b86cadfe30 100644
--- a/llvm/test/CodeGen/X86/avx512fp16-fminimum-fmaximum.ll
+++ b/llvm/test/CodeGen/X86/avx512fp16-fminimum-fmaximum.ll
@@ -28,35 +28,17 @@ define half @test_fminimum(half %x, half %y) {
define <8 x half> @test_fminimum_scalarize(<8 x half> %x, <8 x half> %y) "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" {
; CHECK-LABEL: test_fminimum_scalarize:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpsrldq {{.*#+}} xmm2 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
-; CHECK-NEXT: vpsrldq {{.*#+}} xmm3 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
-; CHECK-NEXT: vminsh %xmm2, %xmm3, %xmm2
-; CHECK-NEXT: vshufps {{.*#+}} xmm3 = xmm1[3,3,3,3]
-; CHECK-NEXT: vshufps {{.*#+}} xmm4 = xmm0[3,3,3,3]
-; CHECK-NEXT: vminsh %xmm3, %xmm4, %xmm3
-; CHECK-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
-; CHECK-NEXT: vpsrldq {{.*#+}} xmm3 = xmm1[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
-; CHECK-NEXT: vpsrldq {{.*#+}} xmm4 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
-; CHECK-NEXT: vminsh %xmm3, %xmm4, %xmm3
-; CHECK-NEXT: vshufpd {{.*#+}} xmm4 = xmm1[1,0]
-; CHECK-NEXT: vshufpd {{.*#+}} xmm5 = xmm0[1,0]
-; CHECK-NEXT: vminsh %xmm4, %xmm5, %xmm4
-; CHECK-NEXT: vpunpcklwd {{.*#+}} xmm3 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3]
-; CHECK-NEXT: vpunpckldq {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
-; CHECK-NEXT: vpsrlq $48, %xmm1, %xmm3
-; CHECK-NEXT: vpsrlq $48, %xmm0, %xmm4
-; CHECK-NEXT: vminsh %xmm3, %xmm4, %xmm3
-; CHECK-NEXT: vmovshdup {{.*#+}} xmm4 = xmm1[1,1,3,3]
-; CHECK-NEXT: vmovshdup {{.*#+}} xmm5 = xmm0[1,1,3,3]
-; CHECK-NEXT: vminsh %xmm4, %xmm5, %xmm4
-; CHECK-NEXT: vpunpcklwd {{.*#+}} xmm3 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3]
-; CHECK-NEXT: vminsh %xmm1, %xmm0, %xmm4
-; CHECK-NEXT: vpsrld $16, %xmm1, %xmm1
-; CHECK-NEXT: vpsrld $16, %xmm0, %xmm0
-; CHECK-NEXT: vminsh %xmm1, %xmm0, %xmm0
-; CHECK-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm4[0],xmm0[0],xmm4[1],xmm0[1],xmm4[2],xmm0[2],xmm4[3],xmm0[3]
-; CHECK-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
-; CHECK-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
+; CHECK-NEXT: vcmpltph %xmm1, %xmm0, %k1
+; CHECK-NEXT: vpblendmw %xmm0, %xmm1, %xmm2 {%k1}
+; CHECK-NEXT: vpbroadcastw {{.*#+}} xmm3 = [32768,32768,32768,32768,32768,32768,32768,32768]
+; CHECK-NEXT: vpcmpeqw %xmm3, %xmm0, %k1
+; CHECK-NEXT: vpblendmw %xmm0, %xmm2, %xmm0 {%k1}
+; CHECK-NEXT: vpcmpeqw %xmm3, %xmm1, %k1
+; CHECK-NEXT: vmovdqu16 %xmm1, %xmm0 {%k1}
+; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vcmpeqph %xmm1, %xmm2, %k1
+; CHECK-NEXT: vmovdqu16 %xmm0, %xmm2 {%k1}
+; CHECK-NEXT: vmovdqa %xmm2, %xmm0
; CHECK-NEXT: retq
%r = call <8 x half> @llvm.minimum.v8f16(<8 x half> %x, <8 x half> %y)
ret <8 x half> %r
@@ -134,35 +116,16 @@ define half @test_fmaximum(half %x, half %y) {
define <8 x half> @test_fmaximum_scalarize(<8 x half> %x, <8 x half> %y) "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" {
; CHECK-LABEL: test_fmaximum_scalarize:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpsrldq {{.*#+}} xmm2 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
-; CHECK-NEXT: vpsrldq {{.*#+}} xmm3 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
-; CHECK-NEXT: vmaxsh %xmm2, %xmm3, %xmm2
-; CHECK-NEXT: vshufps {{.*#+}} xmm3 = xmm1[3,3,3,3]
-; CHECK-NEXT: vshufps {{.*#+}} xmm4 = xmm0[3,3,3,3]
-; CHECK-NEXT: vmaxsh %xmm3, %xmm4, %xmm3
-; CHECK-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
-; CHECK-NEXT: vpsrldq {{.*#+}} xmm3 = xmm1[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
-; CHECK-NEXT: vpsrldq {{.*#+}} xmm4 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
-; CHECK-NEXT: vmaxsh %xmm3, %xmm4, %xmm3
-; CHECK-NEXT: vshufpd {{.*#+}} xmm4 = xmm1[1,0]
-; CHECK-NEXT: vshufpd {{.*#+}} xmm5 = xmm0[1,0]
-; CHECK-NEXT: vmaxsh %xmm4, %xmm5, %xmm4
-; CHECK-NEXT: vpunpcklwd {{.*#+}} xmm3 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3]
-; CHECK-NEXT: vpunpckldq {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
-; CHECK-NEXT: vpsrlq $48, %xmm1, %xmm3
-; CHECK-NEXT: vpsrlq $48, %xmm0, %xmm4
-; CHECK-NEXT: vmaxsh %xmm3, %xmm4, %xmm3
-; CHECK-NEXT: vmovshdup {{.*#+}} xmm4 = xmm1[1,1,3,3]
-; CHECK-NEXT: vmovshdup {{.*#+}} xmm5 = xmm0[1,1,3,3]
-; CHECK-NEXT: vmaxsh %xmm4, %xmm5, %xmm4
-; CHECK-NEXT: vpunpcklwd {{.*#+}} xmm3 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3]
-; CHECK-NEXT: vmaxsh %xmm1, %xmm0, %xmm4
-; CHECK-NEXT: vpsrld $16, %xmm1, %xmm1
-; CHECK-NEXT: vpsrld $16, %xmm0, %xmm0
-; CHECK-NEXT: vmaxsh %xmm1, %xmm0, %xmm0
-; CHECK-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm4[0],xmm0[0],xmm4[1],xmm0[1],xmm4[2],xmm0[2],xmm4[3],xmm0[3]
-; CHECK-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
-; CHECK-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
+; CHECK-NEXT: vcmpltph %xmm0, %xmm1, %k1
+; CHECK-NEXT: vpblendmw %xmm0, %xmm1, %xmm2 {%k1}
+; CHECK-NEXT: vptestnmw %xmm0, %xmm0, %k1
+; CHECK-NEXT: vpblendmw %xmm0, %xmm2, %xmm0 {%k1}
+; CHECK-NEXT: vptestnmw %xmm1, %xmm1, %k1
+; CHECK-NEXT: vmovdqu16 %xmm1, %xmm0 {%k1}
+; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vcmpeqph %xmm1, %xmm2, %k1
+; CHECK-NEXT: vmovdqu16 %xmm0, %xmm2 {%k1}
+; CHECK-NEXT: vmovdqa %xmm2, %xmm0
; CHECK-NEXT: retq
%r = call <8 x half> @llvm.maximum.v8f16(<8 x half> %x, <8 x half> %y)
ret <8 x half> %r
diff --git a/llvm/test/DebugInfo/X86/sdag-order.ll b/llvm/test/DebugInfo/X86/sdag-order.ll
new file mode 100644
index 000000000000..f959a8065679
--- /dev/null
+++ b/llvm/test/DebugInfo/X86/sdag-order.ll
@@ -0,0 +1,46 @@
+; RUN: llc %s --stop-after=finalize-isel -o - | FileCheck %s
+
+;; Check the DBG_VALUE which is salvaged from the dbg.value using an otherwised
+;; unused value is emitted at the correct position in the function.
+;; Prior (-) to patching (+), these DBG_VALUEs would sink to the bottom of the
+;; function:
+;; │ bb.1.if.then:
+;; │- $rax = COPY %1
+;; │ DBG_VALUE 0, $noreg, !9, !DIExpression(DW_OP_plus_uconst, 4, DW_OP_stack_value)
+;; │+ $rax = COPY %1
+;; │ RET 0, $rax
+
+; CHECK: bb.1.if.then:
+; CHECK-NEXT: DBG_VALUE 0, $noreg, ![[#]], !DIExpression(DW_OP_plus_uconst, 4, DW_OP_stack_value)
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @badger(ptr sret(i64) %sret) !dbg !5 {
+entry:
+ %f.i = getelementptr i8, ptr null, i64 4
+ br label %if.then
+
+if.then: ; preds = %entry
+ tail call void @llvm.dbg.value(metadata ptr %f.i, metadata !9, metadata !DIExpression()), !dbg !11
+ ret void
+}
+
+declare void @llvm.dbg.value(metadata, metadata, metadata)
+
+!llvm.dbg.cu = !{!0}
+!llvm.debugify = !{!2, !3}
+!llvm.module.flags = !{!4}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C, file: !1, producer: "debugify", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug)
+!1 = !DIFile(filename: "test.ll", directory: "/")
+!2 = !{i32 3}
+!3 = !{i32 1}
+!4 = !{i32 2, !"Debug Info Version", i32 3}
+!5 = distinct !DISubprogram(name: "_ZNK1d1gEv", linkageName: "_ZNK1d1gEv", scope: null, file: !1, line: 1, type: !6, scopeLine: 1, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !8)
+!6 = !DISubroutineType(types: !7)
+!7 = !{}
+!8 = !{!9}
+!9 = !DILocalVariable(name: "1", scope: !5, file: !1, line: 1, type: !10)
+!10 = !DIBasicType(name: "ty64", size: 64, encoding: DW_ATE_unsigned)
+!11 = !DILocation(line: 5, column: 1, scope: !5)
diff --git a/llvm/test/DebugInfo/symbolize-gnu-debuglink-no-realpath.test b/llvm/test/DebugInfo/symbolize-gnu-debuglink-no-realpath.test
index 5141ff6ce322..9e46570783c9 100644
--- a/llvm/test/DebugInfo/symbolize-gnu-debuglink-no-realpath.test
+++ b/llvm/test/DebugInfo/symbolize-gnu-debuglink-no-realpath.test
@@ -1,4 +1,3 @@
-# REQUIRES: shell
# Ensure that no realpath assumptions are made about .gnu_debuglink paths.
# Copy inputs to some other location with arbitrary names, with the original
diff --git a/llvm/test/Instrumentation/HWAddressSanitizer/stack-safety-analysis.ll b/llvm/test/Instrumentation/HWAddressSanitizer/stack-safety-analysis.ll
index dad5f8e2fc56..8610645a4ca7 100644
--- a/llvm/test/Instrumentation/HWAddressSanitizer/stack-safety-analysis.ll
+++ b/llvm/test/Instrumentation/HWAddressSanitizer/stack-safety-analysis.ll
@@ -1,4 +1,5 @@
-; RUN: opt -mtriple=aarch64-unknown-linux-gnu -passes=hwasan -hwasan-instrument-with-calls -hwasan-use-stack-safety=1 -hwasan-generate-tags-with-calls -S < %s | FileCheck %s --check-prefixes=SAFETY,CHECK
+; RUN: opt -pass-remarks-output=%t.pass-remarks -mtriple=aarch64-unknown-linux-gnu -passes=hwasan -hwasan-instrument-with-calls -hwasan-use-stack-safety=1 -hwasan-generate-tags-with-calls -S < %s | FileCheck %s --check-prefixes=SAFETY,CHECK
+; RUN: cat %t.pass-remarks | FileCheck %s --check-prefixes=SAFETY-REMARKS
; RUN: opt -mtriple=aarch64-unknown-linux-gnu -passes=hwasan -hwasan-instrument-with-calls -hwasan-use-stack-safety=0 -hwasan-generate-tags-with-calls -S < %s | FileCheck %s --check-prefixes=NOSAFETY,CHECK
; RUN: opt -mtriple=aarch64-unknown-linux-gnu -passes=hwasan -hwasan-instrument-with-calls -hwasan-generate-tags-with-calls -S < %s | FileCheck %s --check-prefixes=SAFETY,CHECK
; RUN: opt -mtriple=aarch64-unknown-linux-gnu -passes=hwasan -hwasan-instrument-stack=0 -hwasan-instrument-with-calls -hwasan-generate-tags-with-calls -S < %s | FileCheck %s --check-prefixes=NOSTACK,CHECK
@@ -20,6 +21,7 @@ entry:
; SAFETY-NOT: call {{.*}}__hwasan_store
; NOSTACK-NOT: call {{.*}}__hwasan_generate_tag
; NOSTACK-NOT: call {{.*}}__hwasan_store
+ ; SAFETY-REMARKS: --- !Passed{{[[:space:]]}}Pass: hwasan{{[[:space:]]}}Name: ignoreAccess{{[[:space:]]}}Function: test_simple
%buf.sroa.0 = alloca i8, align 4
call void @llvm.lifetime.start.p0(i64 1, ptr nonnull %buf.sroa.0)
store volatile i8 0, ptr %buf.sroa.0, align 4, !tbaa !8
@@ -37,6 +39,7 @@ entry:
; SAFETY-NOT: call {{.*}}__hwasan_store
; NOSTACK-NOT: call {{.*}}__hwasan_generate_tag
; NOSTACK-NOT: call {{.*}}__hwasan_store
+ ; SAFETY-REMARKS: --- !Passed{{[[:space:]]}}Pass: hwasan{{[[:space:]]}}Name: ignoreAccess{{[[:space:]]}}Function: test_cmpxchg
%buf.sroa.0 = alloca i8, align 4
call void @llvm.lifetime.start.p0(i64 1, ptr nonnull %buf.sroa.0)
%0 = cmpxchg ptr %buf.sroa.0, i8 1, i8 2 monotonic monotonic, align 4
@@ -54,6 +57,7 @@ entry:
; SAFETY-NOT: call {{.*}}__hwasan_store
; NOSTACK-NOT: call {{.*}}__hwasan_generate_tag
; NOSTACK-NOT: call {{.*}}__hwasan_store
+ ; SAFETY-REMARKS: --- !Passed{{[[:space:]]}}Pass: hwasan{{[[:space:]]}}Name: ignoreAccess{{[[:space:]]}}Function: test_atomicrwm
%buf.sroa.0 = alloca i8, align 4
call void @llvm.lifetime.start.p0(i64 1, ptr nonnull %buf.sroa.0)
%0 = atomicrmw add ptr %buf.sroa.0, i8 1 monotonic, align 4
@@ -71,6 +75,7 @@ entry:
; SAFETY-NOT: call {{.*}}__hwasan_store
; NOSTACK-NOT: call {{.*}}__hwasan_generate_tag
; NOSTACK-NOT: call {{.*}}__hwasan_store
+ ; SAFETY-REMARKS: --- !Passed{{[[:space:]]}}Pass: hwasan{{[[:space:]]}}Name: ignoreAccess{{[[:space:]]}}Function: test_use
%buf.sroa.0 = alloca i8, align 4
call void @use(ptr nonnull %buf.sroa.0)
call void @llvm.lifetime.start.p0(i64 1, ptr nonnull %buf.sroa.0)
@@ -89,6 +94,7 @@ entry:
; SAFETY-NOT: call {{.*}}__hwasan_store
; NOSTACK-NOT: call {{.*}}__hwasan_generate_tag
; NOSTACK-NOT: call {{.*}}__hwasan_store
+ ; SAFETY-REMARKS: --- !Passed{{[[:space:]]}}Pass: hwasan{{[[:space:]]}}Name: ignoreAccess{{[[:space:]]}}Function: test_in_range
%buf.sroa.0 = alloca [10 x i8], align 4
call void @llvm.lifetime.start.p0(i64 10, ptr nonnull %buf.sroa.0)
store volatile i8 0, ptr %buf.sroa.0, align 4, !tbaa !8
@@ -106,6 +112,7 @@ entry:
; SAFETY-NOT: call {{.*}}__hwasan_store
; NOSTACK-NOT: call {{.*}}__hwasan_generate_tag
; NOSTACK-NOT: call {{.*}}__hwasan_store
+ ; SAFETY-REMARKS: --- !Passed{{[[:space:]]}}Pass: hwasan{{[[:space:]]}}Name: ignoreAccess{{[[:space:]]}}Function: test_in_range2
%buf.sroa.0 = alloca [10 x i8], align 4
%ptr = getelementptr [10 x i8], ptr %buf.sroa.0, i32 0, i32 9
call void @llvm.lifetime.start.p0(i64 10, ptr nonnull %buf.sroa.0)
@@ -123,6 +130,7 @@ entry:
; SAFETY-NOT: call {{.*}}__hwasan_memset
; NOSTACK-NOT: call {{.*}}__hwasan_generate_tag
; NOSTACK-NOT: call {{.*}}__hwasan_memset
+ ; SAFETY-REMARKS: --- !Passed{{[[:space:]]}}Pass: hwasan{{[[:space:]]}}Name: ignoreAccess{{[[:space:]]}}Function: test_in_range3
%buf.sroa.0 = alloca [10 x i8], align 4
%ptr = getelementptr [10 x i8], ptr %buf.sroa.0, i32 0, i32 9
call void @llvm.memset.p0.i32(ptr %ptr, i8 0, i32 1, i1 true)
@@ -138,6 +146,7 @@ entry:
; SAFETY-NOT: call {{.*}}__hwasan_memmove
; NOSTACK-NOT: call {{.*}}__hwasan_generate_tag
; NOSTACK-NOT: call {{.*}}__hwasan_memmove
+ ; SAFETY-REMARKS: --- !Passed{{[[:space:]]}}Pass: hwasan{{[[:space:]]}}Name: ignoreAccess{{[[:space:]]}}Function: test_in_range4
%buf.sroa.0 = alloca [10 x i8], align 4
%ptr = getelementptr [10 x i8], ptr %buf.sroa.0, i32 0, i32 9
call void @llvm.memmove.p0.p0.i32(ptr %ptr, ptr %ptr, i32 1, i1 true)
@@ -153,6 +162,7 @@ entry:
; SAFETY-NOT: call {{.*}}__hwasan_memmove
; NOSTACK-NOT: call {{.*}}__hwasan_generate_tag
; NOSTACK-NOT: call {{.*}}__hwasan_memmove
+ ; SAFETY-REMARKS: --- !Passed{{[[:space:]]}}Pass: hwasan{{[[:space:]]}}Name: ignoreAccess{{[[:space:]]}}Function: test_in_range5
%buf.sroa.0 = alloca [10 x i8], align 4
%ptr = getelementptr [10 x i8], ptr %buf.sroa.0, i32 0, i32 9
%buf.sroa.1 = alloca [10 x i8], align 4
@@ -171,6 +181,7 @@ entry:
; SAFETY: call {{.*}}__hwasan_store
; NOSTACK-NOT: call {{.*}}__hwasan_generate_tag
; NOSTACK-NOT: call {{.*}}__hwasan_store
+ ; SAFETY-REMARKS: --- !Missed{{[[:space:]]}}Pass: hwasan{{[[:space:]]}}Name: ignoreAccess{{[[:space:]]}}Function: test_out_of_range
%buf.sroa.0 = alloca [10 x i8], align 4
%ptr = getelementptr [10 x i8], ptr %buf.sroa.0, i32 0, i32 10
call void @llvm.lifetime.start.p0(i64 10, ptr nonnull %buf.sroa.0)
@@ -188,6 +199,7 @@ entry:
; SAFETY: call {{.*}}__hwasan_store
; NOSTACK-NOT: call {{.*}}__hwasan_generate_tag
; NOSTACK-NOT: call {{.*}}__hwasan_store
+ ; SAFETY-REMARKS: --- !Missed{{[[:space:]]}}Pass: hwasan{{[[:space:]]}}Name: ignoreAccess{{[[:space:]]}}Function: test_out_of_range2
%buf.sroa.0 = alloca [10 x i8], align 4
%ptr = getelementptr [10 x i8], ptr %buf.sroa.0, i32 0, i32 10
call void @llvm.lifetime.start.p0(i64 10, ptr nonnull %buf.sroa.0)
@@ -205,6 +217,7 @@ entry:
; SAFETY: call {{.*}}__hwasan_memset
; NOSTACK-NOT: call {{.*}}__hwasan_generate_tag
; NOSTACK-NOT: call {{.*}}__hwasan_memset
+ ; SAFETY-REMARKS: --- !Missed{{[[:space:]]}}Pass: hwasan{{[[:space:]]}}Name: ignoreAccess{{[[:space:]]}}Function: test_out_of_range3
%buf.sroa.0 = alloca [10 x i8], align 4
%ptr = getelementptr [10 x i8], ptr %buf.sroa.0, i32 0, i32 9
call void @llvm.memset.p0.i32(ptr %ptr, i8 0, i32 2, i1 true)
@@ -220,6 +233,7 @@ entry:
; SAFETY: call {{.*}}__hwasan_memmove
; NOSTACK-NOT: call {{.*}}__hwasan_generate_tag
; NOSTACK-NOT: call {{.*}}__hwasan_memmove
+ ; SAFETY-REMARKS: --- !Missed{{[[:space:]]}}Pass: hwasan{{[[:space:]]}}Name: ignoreAccess{{[[:space:]]}}Function: test_out_of_range4
%buf.sroa.0 = alloca [10 x i8], align 4
%ptr = getelementptr [10 x i8], ptr %buf.sroa.0, i32 0, i32 9
call void @llvm.memmove.p0.p0.i32(ptr %ptr, ptr %ptr, i32 2, i1 true)
@@ -235,6 +249,7 @@ entry:
; SAFETY: call {{.*}}__hwasan_memmove
; NOSTACK-NOT: call {{.*}}__hwasan_generate_tag
; NOSTACK-NOT: call {{.*}}__hwasan_memmove
+ ; SAFETY-REMARKS: --- !Missed{{[[:space:]]}}Pass: hwasan{{[[:space:]]}}Name: ignoreAccess{{[[:space:]]}}Function: test_out_of_range5
%buf.sroa.0 = alloca [10 x i8], align 4
%ptr = getelementptr [10 x i8], ptr %buf.sroa.0, i32 0, i32 9
%buf.sroa.1 = alloca [10 x i8], align 4
@@ -256,6 +271,7 @@ entry:
; SAFETY: call {{.*}}__hwasan_store
; NOSTACK-NOT: call {{.*}}__hwasan_generate_tag
; NOSTACK-NOT: call {{.*}}__hwasan_store
+ ; SAFETY-REMARKS: --- !Missed{{[[:space:]]}}Pass: hwasan{{[[:space:]]}}Name: ignoreAccess{{[[:space:]]}}Function: test_out_of_range6
%buf.sroa.0 = alloca [10 x i8], align 4
%ptr = getelementptr [10 x i8], ptr %buf.sroa.0, i32 0, i32 10
call void @llvm.lifetime.start.p0(i64 10, ptr nonnull %buf.sroa.0)
@@ -275,6 +291,7 @@ entry:
; SAFETY: call {{.*}}__hwasan_store
; NOSTACK-NOT: call {{.*}}__hwasan_generate_tag
; NOSTACK-NOT: call {{.*}}__hwasan_store
+ ; SAFETY-REMARKS: --- !Missed{{[[:space:]]}}Pass: hwasan{{[[:space:]]}}Name: ignoreAccess{{[[:space:]]}}Function: test_potentially_out_of_range
%buf.sroa.0 = alloca [10 x i8], align 4
%off = call i32 @getoffset()
%ptr = getelementptr [10 x i8], ptr %buf.sroa.0, i32 0, i32 %off
@@ -293,6 +310,7 @@ entry:
; SAFETY: call {{.*}}__hwasan_memmove
; NOSTACK-NOT: call {{.*}}__hwasan_generate_tag
; NOSTACK: call {{.*}}__hwasan_memmove
+ ; SAFETY-REMARKS: --- !Missed{{[[:space:]]}}Pass: hwasan{{[[:space:]]}}Name: ignoreAccess{{[[:space:]]}}Function: test_potentially_out_of_range2
%buf.sroa.0 = alloca [10 x i8], align 4
%ptr = getelementptr [10 x i8], ptr %buf.sroa.0, i32 0, i32 9
call void @llvm.memmove.p0.p0.i32(ptr %ptr, ptr %a, i32 1, i1 true)
@@ -309,6 +327,7 @@ entry:
; SAFETY: call {{.*}}__hwasan_store
; NOSTACK-NOT: call {{.*}}__hwasan_generate_tag
; NOSTACK: call {{.*}}__hwasan_store
+ ; SAFETY-REMARKS: --- !Missed{{[[:space:]]}}Pass: hwasan{{[[:space:]]}}Name: ignoreAccess{{[[:space:]]}}Function: test_unclear
%buf.sroa.0 = alloca i8, align 4
%ptr = call ptr @getptr(ptr %buf.sroa.0)
call void @llvm.lifetime.start.p0(i64 10, ptr nonnull %ptr)
@@ -326,6 +345,7 @@ entry:
; SAFETY: call {{.*}}__hwasan_store
; NOSTACK-NOT: call {{.*}}__hwasan_generate_tag
; NOSTACK: call {{.*}}__hwasan_store
+ ; SAFETY-REMARKS: --- !Missed{{[[:space:]]}}Pass: hwasan{{[[:space:]]}}Name: ignoreAccess{{[[:space:]]}}Function: test_select
%x = call ptr @getptr(ptr %a)
%buf.sroa.0 = alloca i8, align 4
call void @llvm.lifetime.start.p0(i64 1, ptr nonnull %buf.sroa.0)
@@ -346,6 +366,7 @@ entry:
; SAFETY-NOT: call {{.*}}__hwasan_store
; NOSTACK-NOT: call {{.*}}__hwasan_generate_tag
; NOSTACK-NOT: call {{.*}}__hwasan_store
+ ; SAFETY-REMARKS: --- !Passed{{[[:space:]]}}Pass: hwasan{{[[:space:]]}}Name: ignoreAccess{{[[:space:]]}}Function: test_retptr
%buf.sroa.0 = alloca i8, align 4
call void @llvm.lifetime.start.p0(i64 1, ptr nonnull %buf.sroa.0)
%ptr = call ptr @retptr(ptr %buf.sroa.0)
diff --git a/llvm/test/MC/AMDGPU/gfx1150_asm_features.s b/llvm/test/MC/AMDGPU/gfx1150_asm_features.s
index 58b784779ac7..d3f82a217a17 100644
--- a/llvm/test/MC/AMDGPU/gfx1150_asm_features.s
+++ b/llvm/test/MC/AMDGPU/gfx1150_asm_features.s
@@ -1,5 +1,6 @@
// RUN: llvm-mc -triple=amdgcn -show-encoding -mcpu=gfx1150 %s | FileCheck --check-prefix=GFX1150 %s
// RUN: llvm-mc -triple=amdgcn -show-encoding -mcpu=gfx1151 %s | FileCheck --check-prefix=GFX1150 %s
+// RUN: llvm-mc -triple=amdgcn -show-encoding -mcpu=gfx1152 %s | FileCheck --check-prefix=GFX1150 %s
//
// Subtargets allow src1 of VOP3 DPP instructions to be SGPR or inlinable
diff --git a/llvm/test/MC/RISCV/relocations.s b/llvm/test/MC/RISCV/relocations.s
index 3cad3d44a615..f5f6417487f2 100644
--- a/llvm/test/MC/RISCV/relocations.s
+++ b/llvm/test/MC/RISCV/relocations.s
@@ -185,7 +185,7 @@ auipc a0, %tlsdesc_hi(a_symbol)
lw a1, %tlsdesc_load_lo(.L5)(a0)
# RELOC: R_RISCV_TLSDESC_LOAD_LO12
-# INSTR: a1, %tlsdesc_load_lo(.L5)(a0)
+# INSTR: lw a1, %tlsdesc_load_lo(.L5)(a0)
# FIXUP: fixup A - offset: 0, value: %tlsdesc_load_lo(.L5), kind: fixup_riscv_tlsdesc_load_lo12
addi a0, a0, %tlsdesc_add_lo(.L5)
diff --git a/llvm/test/MC/WebAssembly/reloc-pic64.s b/llvm/test/MC/WebAssembly/reloc-pic64.s
index 0f2ebba2a2f3..4c5ec4f30666 100644
--- a/llvm/test/MC/WebAssembly/reloc-pic64.s
+++ b/llvm/test/MC/WebAssembly/reloc-pic64.s
@@ -93,6 +93,7 @@ hidden_func:
# CHECK-NEXT: Index: 0
# CHECK-NEXT: ElemType: FUNCREF
# CHECK-NEXT: Limits:
+# CHECK-NEXT: Flags: [ IS_64 ]
# CHECK-NEXT: Minimum: 0x1
# CHECK-NEXT: - Module: GOT.mem
# CHECK-NEXT: Field: default_data
@@ -109,7 +110,7 @@ hidden_func:
# CHECK-NEXT: - Type: ELEM
# CHECK-NEXT: Segments:
# CHECK-NEXT: Offset:
-# CHECK-NEXT: Opcode: I32_CONST
+# CHECK-NEXT: Opcode: I64_CONST
# CHECK-NEXT: Value: 1
# CHECK-NEXT: Functions: [ 5 ]
# CHECK-NEXT: - Type: DATACOUNT
diff --git a/llvm/test/Object/AMDGPU/elf-header-flags-mach.yaml b/llvm/test/Object/AMDGPU/elf-header-flags-mach.yaml
index 7f36795aa6a9..7512edd27491 100644
--- a/llvm/test/Object/AMDGPU/elf-header-flags-mach.yaml
+++ b/llvm/test/Object/AMDGPU/elf-header-flags-mach.yaml
@@ -230,6 +230,10 @@
# RUN: llvm-readobj -S --file-headers %t.o.AMDGCN_GFX1151 | FileCheck --check-prefixes=ELF-AMDGCN-ALL,ELF-AMDGCN-GFX1151 %s
# RUN: obj2yaml %t.o.AMDGCN_GFX1151 | FileCheck --check-prefixes=YAML-AMDGCN-ALL,YAML-AMDGCN-GFX1151 %s
+# RUN: sed -e 's/<BITS>/64/' -e 's/<MACH>/AMDGCN_GFX1152/' %s | yaml2obj -o %t.o.AMDGCN_GFX1152
+# RUN: llvm-readobj -S --file-headers %t.o.AMDGCN_GFX1152 | FileCheck --check-prefixes=ELF-AMDGCN-ALL,ELF-AMDGCN-GFX1152 %s
+# RUN: obj2yaml %t.o.AMDGCN_GFX1152 | FileCheck --check-prefixes=YAML-AMDGCN-ALL,YAML-AMDGCN-GFX1152 %s
+
# RUN: sed -e 's/<BITS>/64/' -e 's/<MACH>/AMDGCN_GFX1200/' %s | yaml2obj -o %t.o.AMDGCN_GFX1200
# RUN: llvm-readobj -S --file-headers %t.o.AMDGCN_GFX1200 | FileCheck --check-prefixes=ELF-AMDGCN-ALL,ELF-AMDGCN-GFX1200 %s
# RUN: obj2yaml %t.o.AMDGCN_GFX1200 | FileCheck --check-prefixes=YAML-AMDGCN-ALL,YAML-AMDGCN-GFX1200 %s
@@ -450,6 +454,9 @@
# ELF-AMDGCN-GFX1151: EF_AMDGPU_MACH_AMDGCN_GFX1151 (0x4A)
# YAML-AMDGCN-GFX1151: Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX1151 ]
+# ELF-AMDGCN-GFX1152: EF_AMDGPU_MACH_AMDGCN_GFX1152 (0x55)
+# YAML-AMDGCN-GFX1152: Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX1152 ]
+
# ELF-AMDGCN-GFX1200: EF_AMDGPU_MACH_AMDGCN_GFX1200 (0x48)
# YAML-AMDGCN-GFX1200: Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX1200 ]
diff --git a/llvm/test/Other/can-execute.txt b/llvm/test/Other/can-execute.txt
index 46791cb892a2..37626e7357b2 100644
--- a/llvm/test/Other/can-execute.txt
+++ b/llvm/test/Other/can-execute.txt
@@ -1,5 +1,4 @@
REQUIRES: can-execute
-REQUIRES: shell
This tests that we abstract two peculiarities of unix in can_execute:
diff --git a/llvm/test/Other/lit-unicode.txt b/llvm/test/Other/lit-unicode.txt
index 2f4000145168..b375fc505b73 100644
--- a/llvm/test/Other/lit-unicode.txt
+++ b/llvm/test/Other/lit-unicode.txt
@@ -1,5 +1,4 @@
FIXME: See if we can fix this in lit by using Unicode strings.
-REQUIRES: shell
RUN: echo "ようこそ" | FileCheck %s
CHECK: {{^}}ようこそ{{$}}
diff --git a/llvm/test/Transforms/ConstraintElimination/induction-condition-in-loop-exit.ll b/llvm/test/Transforms/ConstraintElimination/induction-condition-in-loop-exit.ll
new file mode 100644
index 000000000000..2f0b51c410a5
--- /dev/null
+++ b/llvm/test/Transforms/ConstraintElimination/induction-condition-in-loop-exit.ll
@@ -0,0 +1,443 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -p constraint-elimination -S %s | FileCheck %s
+
+declare void @llvm.assume(i1)
+
+define i1 @multi_exiting_loop_eq_same_unique_exit_const_compare_known(ptr %s) {
+; CHECK-LABEL: define i1 @multi_exiting_loop_eq_same_unique_exit_const_compare_known(
+; CHECK-SAME: ptr [[S:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
+; CHECK: [[LOOP_HEADER]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
+; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[IV]], 1234
+; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[LOOP_LATCH]]
+; CHECK: [[LOOP_LATCH]]:
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[S]], i32 [[IV]]
+; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; CHECK-NEXT: [[LATCH_C:%.*]] = icmp ult i8 [[TMP0]], 10
+; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1
+; CHECK-NEXT: br i1 [[LATCH_C]], label %[[LOOP_HEADER]], label %[[EXIT]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: [[T:%.*]] = icmp ult i32 [[IV]], 1235
+; CHECK-NEXT: ret i1 [[T]]
+;
+entry:
+ br label %loop.header
+
+loop.header:
+ %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ]
+ %exitcond.not = icmp eq i32 %iv, 1234
+ br i1 %exitcond.not, label %exit, label %loop.latch
+
+loop.latch:
+ %arrayidx = getelementptr inbounds i8, ptr %s, i32 %iv
+ %0 = load i8, ptr %arrayidx, align 1
+ %latch.c = icmp ult i8 %0, 10
+ %iv.next = add nuw nsw i32 %iv, 1
+ br i1 %latch.c, label %loop.header, label %exit
+
+exit:
+ %t = icmp ult i32 %iv, 1235
+ ret i1 %t
+}
+
+define i1 @multi_exiting_loop_eq_same_unique_exit_const_compare_not_known_due_to_start_value(ptr %s) {
+; CHECK-LABEL: define i1 @multi_exiting_loop_eq_same_unique_exit_const_compare_not_known_due_to_start_value(
+; CHECK-SAME: ptr [[S:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
+; CHECK: [[LOOP_HEADER]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 1235, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
+; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[IV]], 1234
+; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[LOOP_LATCH]]
+; CHECK: [[LOOP_LATCH]]:
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[S]], i32 [[IV]]
+; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; CHECK-NEXT: [[LATCH_C:%.*]] = icmp ult i8 [[TMP0]], 10
+; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1
+; CHECK-NEXT: br i1 [[LATCH_C]], label %[[LOOP_HEADER]], label %[[EXIT]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: [[T:%.*]] = icmp ult i32 [[IV]], 1235
+; CHECK-NEXT: ret i1 [[T]]
+;
+entry:
+ br label %loop.header
+
+loop.header:
+ %iv = phi i32 [ 1235, %entry ], [ %iv.next, %loop.latch ]
+ %exitcond.not = icmp eq i32 %iv, 1234
+ br i1 %exitcond.not, label %exit, label %loop.latch
+
+loop.latch:
+ %arrayidx = getelementptr inbounds i8, ptr %s, i32 %iv
+ %0 = load i8, ptr %arrayidx, align 1
+ %latch.c = icmp ult i8 %0, 10
+ %iv.next = add i32 %iv, 1
+ br i1 %latch.c, label %loop.header, label %exit
+
+exit:
+ %t = icmp ult i32 %iv, 1235
+ ret i1 %t
+}
+
+define i1 @multi_exiting_loop_eq_same_unique_exit_const_compare_known_due_to_precond_on_start_value(ptr %s, i32 %start) {
+; CHECK-LABEL: define i1 @multi_exiting_loop_eq_same_unique_exit_const_compare_known_due_to_precond_on_start_value(
+; CHECK-SAME: ptr [[S:%.*]], i32 [[START:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: [[PRE_C:%.*]] = icmp ule i32 [[START]], 1234
+; CHECK-NEXT: call void @llvm.assume(i1 [[PRE_C]])
+; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
+; CHECK: [[LOOP_HEADER]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[START]], %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
+; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[IV]], 1234
+; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[LOOP_LATCH]]
+; CHECK: [[LOOP_LATCH]]:
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[S]], i32 [[IV]]
+; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; CHECK-NEXT: [[LATCH_C:%.*]] = icmp ult i8 [[TMP0]], 10
+; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1
+; CHECK-NEXT: br i1 [[LATCH_C]], label %[[LOOP_HEADER]], label %[[EXIT]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: [[T:%.*]] = icmp ult i32 [[IV]], 1235
+; CHECK-NEXT: ret i1 [[T]]
+;
+entry:
+ %pre.c = icmp ule i32 %start, 1234
+ call void @llvm.assume(i1 %pre.c)
+ br label %loop.header
+
+loop.header:
+ %iv = phi i32 [ %start, %entry ], [ %iv.next, %loop.latch ]
+ %exitcond.not = icmp eq i32 %iv, 1234
+ br i1 %exitcond.not, label %exit, label %loop.latch
+
+loop.latch:
+ %arrayidx = getelementptr inbounds i8, ptr %s, i32 %iv
+ %0 = load i8, ptr %arrayidx, align 1
+ %latch.c = icmp ult i8 %0, 10
+ %iv.next = add i32 %iv, 1
+ br i1 %latch.c, label %loop.header, label %exit
+
+exit:
+ %t = icmp ult i32 %iv, 1235
+ ret i1 %t
+}
+
+define i1 @multi_exiting_loop_eq_same_unique_exit_const_compare_not_known_due_to_precond_on_start_value(ptr %s, i32 %start) {
+; CHECK-LABEL: define i1 @multi_exiting_loop_eq_same_unique_exit_const_compare_not_known_due_to_precond_on_start_value(
+; CHECK-SAME: ptr [[S:%.*]], i32 [[START:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: [[PRE_C:%.*]] = icmp ule i32 [[START]], 1236
+; CHECK-NEXT: call void @llvm.assume(i1 [[PRE_C]])
+; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
+; CHECK: [[LOOP_HEADER]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[START]], %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
+; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[IV]], 1234
+; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[LOOP_LATCH]]
+; CHECK: [[LOOP_LATCH]]:
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[S]], i32 [[IV]]
+; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; CHECK-NEXT: [[LATCH_C:%.*]] = icmp ult i8 [[TMP0]], 10
+; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1
+; CHECK-NEXT: br i1 [[LATCH_C]], label %[[LOOP_HEADER]], label %[[EXIT]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: [[T:%.*]] = icmp ult i32 [[IV]], 1236
+; CHECK-NEXT: ret i1 [[T]]
+;
+entry:
+ %pre.c = icmp ule i32 %start, 1236
+ call void @llvm.assume(i1 %pre.c)
+ br label %loop.header
+
+loop.header:
+ %iv = phi i32 [ %start, %entry ], [ %iv.next, %loop.latch ]
+ %exitcond.not = icmp eq i32 %iv, 1234
+ br i1 %exitcond.not, label %exit, label %loop.latch
+
+loop.latch:
+ %arrayidx = getelementptr inbounds i8, ptr %s, i32 %iv
+ %0 = load i8, ptr %arrayidx, align 1
+ %latch.c = icmp ult i8 %0, 10
+ %iv.next = add i32 %iv, 1
+ br i1 %latch.c, label %loop.header, label %exit
+
+exit:
+ %t = icmp ult i32 %iv, 1236
+ ret i1 %t
+}
+
+define i1 @multi_exiting_loop_eq_same_unique_exit_const_compare_not_known_due_to_missing_precond(ptr %s, i32 %start) {
+; CHECK-LABEL: define i1 @multi_exiting_loop_eq_same_unique_exit_const_compare_not_known_due_to_missing_precond(
+; CHECK-SAME: ptr [[S:%.*]], i32 [[START:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
+; CHECK: [[LOOP_HEADER]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[START]], %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
+; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[IV]], 1234
+; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[LOOP_LATCH]]
+; CHECK: [[LOOP_LATCH]]:
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[S]], i32 [[IV]]
+; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; CHECK-NEXT: [[LATCH_C:%.*]] = icmp ult i8 [[TMP0]], 10
+; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1
+; CHECK-NEXT: br i1 [[LATCH_C]], label %[[LOOP_HEADER]], label %[[EXIT]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: [[T:%.*]] = icmp ult i32 [[IV]], 1236
+; CHECK-NEXT: ret i1 [[T]]
+;
+entry:
+ br label %loop.header
+
+loop.header:
+ %iv = phi i32 [ %start, %entry ], [ %iv.next, %loop.latch ]
+ %exitcond.not = icmp eq i32 %iv, 1234
+ br i1 %exitcond.not, label %exit, label %loop.latch
+
+loop.latch:
+ %arrayidx = getelementptr inbounds i8, ptr %s, i32 %iv
+ %0 = load i8, ptr %arrayidx, align 1
+ %latch.c = icmp ult i8 %0, 10
+ %iv.next = add i32 %iv, 1
+ br i1 %latch.c, label %loop.header, label %exit
+
+exit:
+ %t = icmp ult i32 %iv, 1236
+ ret i1 %t
+}
+
+define i1 @multi_exiting_loop_eq_same_exit_with_out_loop_preds_const_compare_not_known(ptr %s, i1 %pre.c, i32 %x) {
+; CHECK-LABEL: define i1 @multi_exiting_loop_eq_same_exit_with_out_loop_preds_const_compare_not_known(
+; CHECK-SAME: ptr [[S:%.*]], i1 [[PRE_C:%.*]], i32 [[X:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br i1 [[PRE_C]], label %[[LOOP_HEADER:.*]], label %[[EXIT:.*]]
+; CHECK: [[LOOP_HEADER]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
+; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[IV]], 1234
+; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[LOOP_LATCH]]
+; CHECK: [[LOOP_LATCH]]:
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[S]], i32 [[IV]]
+; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; CHECK-NEXT: [[LATCH_C:%.*]] = icmp ult i8 [[TMP0]], 10
+; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1
+; CHECK-NEXT: br i1 [[LATCH_C]], label %[[LOOP_HEADER]], label %[[EXIT]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: [[P:%.*]] = phi i32 [ [[X]], %[[ENTRY]] ], [ [[IV]], %[[LOOP_HEADER]] ], [ [[IV]], %[[LOOP_LATCH]] ]
+; CHECK-NEXT: [[U:%.*]] = icmp ult i32 [[P]], 1235
+; CHECK-NEXT: ret i1 [[U]]
+;
+entry:
+ br i1 %pre.c, label %loop.header, label %exit
+
+loop.header:
+ %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ]
+ %exitcond.not = icmp eq i32 %iv, 1234
+ br i1 %exitcond.not, label %exit, label %loop.latch
+
+loop.latch:
+ %arrayidx = getelementptr inbounds i8, ptr %s, i32 %iv
+ %0 = load i8, ptr %arrayidx, align 1
+ %latch.c = icmp ult i8 %0, 10
+ %iv.next = add nuw nsw i32 %iv, 1
+ br i1 %latch.c, label %loop.header, label %exit
+
+exit:
+ %p = phi i32 [ %x, %entry ], [ %iv, %loop.header ], [ %iv, %loop.latch ]
+ %u = icmp ult i32 %p, 1235
+ ret i1 %u
+}
+
+define i1 @multi_exiting_loop_eq_same_unique_exit_successors_swapped(ptr %s) {
+; CHECK-LABEL: define i1 @multi_exiting_loop_eq_same_unique_exit_successors_swapped(
+; CHECK-SAME: ptr [[S:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
+; CHECK: [[LOOP_HEADER]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
+; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[IV]], 1234
+; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[LOOP_LATCH]], label %[[EXIT:.*]]
+; CHECK: [[LOOP_LATCH]]:
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[S]], i32 [[IV]]
+; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; CHECK-NEXT: [[LATCH_C:%.*]] = icmp ult i8 [[TMP0]], 10
+; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1
+; CHECK-NEXT: br i1 [[LATCH_C]], label %[[LOOP_HEADER]], label %[[EXIT]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: [[U:%.*]] = icmp ult i32 [[IV]], 1235
+; CHECK-NEXT: ret i1 [[U]]
+;
+entry:
+ br label %loop.header
+
+loop.header:
+ %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ]
+ %exitcond.not = icmp eq i32 %iv, 1234
+ br i1 %exitcond.not, label %loop.latch, label %exit
+
+loop.latch:
+ %arrayidx = getelementptr inbounds i8, ptr %s, i32 %iv
+ %0 = load i8, ptr %arrayidx, align 1
+ %latch.c = icmp ult i8 %0, 10
+ %iv.next = add nuw nsw i32 %iv, 1
+ br i1 %latch.c, label %loop.header, label %exit
+
+exit:
+ %u = icmp ult i32 %iv, 1235
+ ret i1 %u
+}
+
+define i1 @multi_exiting_loop_eq_same_unique_exit_const_compare_not_known(ptr %s) {
+; CHECK-LABEL: define i1 @multi_exiting_loop_eq_same_unique_exit_const_compare_not_known(
+; CHECK-SAME: ptr [[S:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
+; CHECK: [[LOOP_HEADER]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
+; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[IV]], 1234
+; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[LOOP_LATCH]]
+; CHECK: [[LOOP_LATCH]]:
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[S]], i32 [[IV]]
+; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; CHECK-NEXT: [[LATCH_C:%.*]] = icmp ult i8 [[TMP0]], 10
+; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1
+; CHECK-NEXT: br i1 [[LATCH_C]], label %[[LOOP_HEADER]], label %[[EXIT]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: [[U:%.*]] = icmp ult i32 [[IV]], 1234
+; CHECK-NEXT: ret i1 [[U]]
+;
+entry:
+ br label %loop.header
+
+loop.header:
+ %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ]
+ %exitcond.not = icmp eq i32 %iv, 1234
+ br i1 %exitcond.not, label %exit, label %loop.latch
+
+loop.latch:
+ %arrayidx = getelementptr inbounds i8, ptr %s, i32 %iv
+ %0 = load i8, ptr %arrayidx, align 1
+ %latch.c = icmp ult i8 %0, 10
+ %iv.next = add nuw nsw i32 %iv, 1
+ br i1 %latch.c, label %loop.header, label %exit
+
+exit:
+ %u = icmp ult i32 %iv, 1234
+ ret i1 %u
+}
+
+define i1 @multi_exiting_loop_eq_same_unique_exit_var_compare_known(ptr %s, i32 %N) {
+; CHECK-LABEL: define i1 @multi_exiting_loop_eq_same_unique_exit_var_compare_known(
+; CHECK-SAME: ptr [[S:%.*]], i32 [[N:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
+; CHECK: [[LOOP_HEADER]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
+; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[IV]], [[N]]
+; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[LOOP_LATCH]]
+; CHECK: [[LOOP_LATCH]]:
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[S]], i32 [[IV]]
+; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; CHECK-NEXT: [[LATCH_C:%.*]] = icmp ult i8 [[TMP0]], 10
+; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1
+; CHECK-NEXT: br i1 [[LATCH_C]], label %[[LOOP_HEADER]], label %[[EXIT]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: [[T:%.*]] = icmp ule i32 [[IV]], [[N]]
+; CHECK-NEXT: ret i1 [[T]]
+;
+entry:
+ br label %loop.header
+
+loop.header:
+ %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ]
+ %exitcond.not = icmp eq i32 %iv, %N
+ br i1 %exitcond.not, label %exit, label %loop.latch
+
+loop.latch:
+ %arrayidx = getelementptr inbounds i8, ptr %s, i32 %iv
+ %0 = load i8, ptr %arrayidx, align 1
+ %latch.c = icmp ult i8 %0, 10
+ %iv.next = add nuw nsw i32 %iv, 1
+ br i1 %latch.c, label %loop.header, label %exit
+
+exit:
+ %t = icmp ule i32 %iv, %N
+ ret i1 %t
+}
+
+define i1 @multi_exiting_loop_ne_same_unique_exit_const_compare_known(ptr %s) {
+; CHECK-LABEL: define i1 @multi_exiting_loop_ne_same_unique_exit_const_compare_known(
+; CHECK-SAME: ptr [[S:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
+; CHECK: [[LOOP_HEADER]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
+; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp ne i32 [[IV]], 1234
+; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[LOOP_LATCH]], label %[[EXIT:.*]]
+; CHECK: [[LOOP_LATCH]]:
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[S]], i32 [[IV]]
+; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; CHECK-NEXT: [[LATCH_C:%.*]] = icmp ult i8 [[TMP0]], 10
+; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1
+; CHECK-NEXT: br i1 [[LATCH_C]], label %[[LOOP_HEADER]], label %[[EXIT]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: [[T:%.*]] = icmp ult i32 [[IV]], 1235
+; CHECK-NEXT: ret i1 [[T]]
+;
+entry:
+ br label %loop.header
+
+loop.header:
+ %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ]
+ %exitcond.not = icmp ne i32 %iv, 1234
+ br i1 %exitcond.not, label %loop.latch, label %exit
+
+loop.latch:
+ %arrayidx = getelementptr inbounds i8, ptr %s, i32 %iv
+ %0 = load i8, ptr %arrayidx, align 1
+ %latch.c = icmp ult i8 %0, 10
+ %iv.next = add nuw nsw i32 %iv, 1
+ br i1 %latch.c, label %loop.header, label %exit
+
+exit:
+ %t = icmp ult i32 %iv, 1235
+ ret i1 %t
+}
+
+define i1 @multi_exiting_loop_ne_same_unique_exit_successors_swapped(ptr %s) {
+; CHECK-LABEL: define i1 @multi_exiting_loop_ne_same_unique_exit_successors_swapped(
+; CHECK-SAME: ptr [[S:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
+; CHECK: [[LOOP_HEADER]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
+; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp ne i32 [[IV]], 1234
+; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[LOOP_LATCH]]
+; CHECK: [[LOOP_LATCH]]:
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[S]], i32 [[IV]]
+; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; CHECK-NEXT: [[LATCH_C:%.*]] = icmp ult i8 [[TMP0]], 10
+; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1
+; CHECK-NEXT: br i1 [[LATCH_C]], label %[[LOOP_HEADER]], label %[[EXIT]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: [[U:%.*]] = icmp ult i32 [[IV]], 1235
+; CHECK-NEXT: ret i1 [[U]]
+;
+entry:
+ br label %loop.header
+
+loop.header:
+ %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ]
+ %exitcond.not = icmp ne i32 %iv, 1234
+ br i1 %exitcond.not, label %exit, label %loop.latch
+
+loop.latch:
+ %arrayidx = getelementptr inbounds i8, ptr %s, i32 %iv
+ %0 = load i8, ptr %arrayidx, align 1
+ %latch.c = icmp ult i8 %0, 10
+ %iv.next = add nuw nsw i32 %iv, 1
+ br i1 %latch.c, label %loop.header, label %exit
+
+exit:
+ %u = icmp ult i32 %iv, 1235
+ ret i1 %u
+}
diff --git a/llvm/test/Transforms/ExpandVariadics/expand-va-intrinsic-split-linkage.ll b/llvm/test/Transforms/ExpandVariadics/expand-va-intrinsic-split-linkage.ll
new file mode 100644
index 000000000000..f7e21cd586e6
--- /dev/null
+++ b/llvm/test/Transforms/ExpandVariadics/expand-va-intrinsic-split-linkage.ll
@@ -0,0 +1,232 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: -p --function-signature
+; RUN: opt -mtriple=wasm32-unknown-unknown -S --passes=expand-variadics --expand-variadics-override=optimize < %s | FileCheck %s --check-prefixes=OPT
+; RUN: opt -mtriple=wasm32-unknown-unknown -S --passes=expand-variadics --expand-variadics-override=lowering < %s | FileCheck %s --check-prefixes=ABI
+; REQUIRES: webassembly-registered-target
+
+; Split variadic functions into two functions:
+; - one equivalent to the original, same symbol etc
+; - one implementing the contents of the original but taking a valist
+; IR here is applicable to any target that uses a ptr for valist
+;
+; Defines a function with each linkage (in the order of the llvm documentation).
+; If split applies it does the same transform to each.
+; Whether split applies depends on whether the ABI is being changed or not - e.g. a weak
+; function is not normally useful to split as the contents cannot be called from elsewhere.
+; If the ABI is being rewritten then the function is still converted. Call sites tested elsewhere.
+
+; Update test checks doesn't emit checks for declares
+
+declare void @sink_valist(ptr)
+declare void @llvm.va_start(ptr)
+declare void @llvm.va_end(ptr)
+
+declare void @decl_simple(...)
+define void @defn_simple(...) {
+; OPT-LABEL: define {{[^@]+}}@defn_simple(...) {
+; OPT-NEXT: entry:
+; OPT-NEXT: %va_start = alloca ptr, align 4
+; OPT-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr %va_start)
+; OPT-NEXT: call void @llvm.va_start.p0(ptr %va_start)
+; OPT-NEXT: %0 = load ptr, ptr %va_start, align 4
+; OPT-NEXT: call void @defn_simple.valist(ptr %0)
+; OPT-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr %va_start)
+; OPT-NEXT: ret void
+;
+; ABI-LABEL: define {{[^@]+}}@defn_simple(ptr %varargs) {
+; ABI-NEXT: %va = alloca ptr, align 4
+; ABI-NEXT: store ptr %varargs, ptr %va, align 4
+; ABI-NEXT: call void @sink_valist(ptr %va)
+; ABI-NEXT: ret void
+;
+ %va = alloca ptr, align 4
+ call void @llvm.va_start(ptr %va)
+ call void @sink_valist(ptr %va)
+ call void @llvm.va_end(ptr %va)
+ ret void
+}
+
+; no declare for private
+define private void @defn_private_simple(...) {
+; OPT-LABEL: define {{[^@]+}}@defn_private_simple(...) {
+; OPT-NEXT: entry:
+; OPT-NEXT: %va_start = alloca ptr, align 4
+; OPT-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr %va_start)
+; OPT-NEXT: call void @llvm.va_start.p0(ptr %va_start)
+; OPT-NEXT: %0 = load ptr, ptr %va_start, align 4
+; OPT-NEXT: call void @defn_private_simple.valist(ptr %0)
+; OPT-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr %va_start)
+; OPT-NEXT: ret void
+;
+; ABI-LABEL: define {{[^@]+}}@defn_private_simple(ptr %varargs) {
+; ABI-NEXT: %va = alloca ptr, align 4
+; ABI-NEXT: store ptr %varargs, ptr %va, align 4
+; ABI-NEXT: call void @sink_valist(ptr %va)
+; ABI-NEXT: ret void
+;
+ %va = alloca ptr, align 4
+ call void @llvm.va_start(ptr %va)
+ call void @sink_valist(ptr %va)
+ call void @llvm.va_end(ptr %va)
+ ret void
+}
+
+; no declare for internal
+define internal void @defn_internal_simple(...) {
+; OPT-LABEL: define {{[^@]+}}@defn_internal_simple(...) {
+; OPT-NEXT: entry:
+; OPT-NEXT: %va_start = alloca ptr, align 4
+; OPT-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr %va_start)
+; OPT-NEXT: call void @llvm.va_start.p0(ptr %va_start)
+; OPT-NEXT: %0 = load ptr, ptr %va_start, align 4
+; OPT-NEXT: call void @defn_internal_simple.valist(ptr %0)
+; OPT-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr %va_start)
+; OPT-NEXT: ret void
+;
+; ABI-LABEL: define {{[^@]+}}@defn_internal_simple(ptr %varargs) {
+; ABI-NEXT: %va = alloca ptr, align 4
+; ABI-NEXT: store ptr %varargs, ptr %va, align 4
+; ABI-NEXT: call void @sink_valist(ptr %va)
+; ABI-NEXT: ret void
+;
+ %va = alloca ptr, align 4
+ call void @llvm.va_start(ptr %va)
+ call void @sink_valist(ptr %va)
+ call void @llvm.va_end(ptr %va)
+ ret void
+}
+
+; no declare for available_externally
+define available_externally void @available_externally_simple(...) {
+; OPT-LABEL: define {{[^@]+}}@available_externally_simple(...) {
+; OPT-NEXT: %va = alloca ptr, align 4
+; OPT-NEXT: call void @llvm.va_start.p0(ptr %va)
+; OPT-NEXT: call void @sink_valist(ptr %va)
+; OPT-NEXT: ret void
+;
+; ABI-LABEL: define {{[^@]+}}@available_externally_simple(ptr %varargs) {
+; ABI-NEXT: %va = alloca ptr, align 4
+; ABI-NEXT: store ptr %varargs, ptr %va, align 4
+; ABI-NEXT: call void @sink_valist(ptr %va)
+; ABI-NEXT: ret void
+;
+ %va = alloca ptr, align 4
+ call void @llvm.va_start(ptr %va)
+ call void @sink_valist(ptr %va)
+ call void @llvm.va_end(ptr %va)
+ ret void
+}
+
+; no declare for linkonce
+define linkonce void @defn_linkonce_simple(...) {
+; OPT-LABEL: define {{[^@]+}}@defn_linkonce_simple(...) {
+; OPT-NEXT: %va = alloca ptr, align 4
+; OPT-NEXT: call void @llvm.va_start.p0(ptr %va)
+; OPT-NEXT: call void @sink_valist(ptr %va)
+; OPT-NEXT: ret void
+;
+; ABI-LABEL: define {{[^@]+}}@defn_linkonce_simple(ptr %varargs) {
+; ABI-NEXT: %va = alloca ptr, align 4
+; ABI-NEXT: store ptr %varargs, ptr %va, align 4
+; ABI-NEXT: call void @sink_valist(ptr %va)
+; ABI-NEXT: ret void
+;
+ %va = alloca ptr, align 4
+ call void @llvm.va_start(ptr %va)
+ call void @sink_valist(ptr %va)
+ call void @llvm.va_end(ptr %va)
+ ret void
+}
+
+; no declare for weak
+define weak void @defn_weak_simple(...) {
+; OPT-LABEL: define {{[^@]+}}@defn_weak_simple(...) {
+; OPT-NEXT: %va = alloca ptr, align 4
+; OPT-NEXT: call void @llvm.va_start.p0(ptr %va)
+; OPT-NEXT: call void @sink_valist(ptr %va)
+; OPT-NEXT: ret void
+;
+; ABI-LABEL: define {{[^@]+}}@defn_weak_simple(ptr %varargs) {
+; ABI-NEXT: %va = alloca ptr, align 4
+; ABI-NEXT: store ptr %varargs, ptr %va, align 4
+; ABI-NEXT: call void @sink_valist(ptr %va)
+; ABI-NEXT: ret void
+;
+ %va = alloca ptr, align 4
+ call void @llvm.va_start(ptr %va)
+ call void @sink_valist(ptr %va)
+ call void @llvm.va_end(ptr %va)
+ ret void
+}
+
+; common is not applicable to functions
+; appending is not applicable to functions
+
+declare extern_weak void @decl_extern_weak_simple(...)
+; no define for extern_weak
+
+; no declare for linkonce_odr
+define linkonce_odr void @defn_linkonce_odr_simple(...) {
+; OPT-LABEL: define {{[^@]+}}@defn_linkonce_odr_simple(...) {
+; OPT-NEXT: %va = alloca ptr, align 4
+; OPT-NEXT: call void @llvm.va_start.p0(ptr %va)
+; OPT-NEXT: call void @sink_valist(ptr %va)
+; OPT-NEXT: ret void
+;
+; ABI-LABEL: define {{[^@]+}}@defn_linkonce_odr_simple(ptr %varargs) {
+; ABI-NEXT: %va = alloca ptr, align 4
+; ABI-NEXT: store ptr %varargs, ptr %va, align 4
+; ABI-NEXT: call void @sink_valist(ptr %va)
+; ABI-NEXT: ret void
+;
+ %va = alloca ptr, align 4
+ call void @llvm.va_start(ptr %va)
+ call void @sink_valist(ptr %va)
+ call void @llvm.va_end(ptr %va)
+ ret void
+}
+
+; no declare for weak_odr
+define weak_odr void @defn_weak_odr_simple(...) {
+; OPT-LABEL: define {{[^@]+}}@defn_weak_odr_simple(...) {
+; OPT-NEXT: %va = alloca ptr, align 4
+; OPT-NEXT: call void @llvm.va_start.p0(ptr %va)
+; OPT-NEXT: call void @sink_valist(ptr %va)
+; OPT-NEXT: ret void
+;
+; ABI-LABEL: define {{[^@]+}}@defn_weak_odr_simple(ptr %varargs) {
+; ABI-NEXT: %va = alloca ptr, align 4
+; ABI-NEXT: store ptr %varargs, ptr %va, align 4
+; ABI-NEXT: call void @sink_valist(ptr %va)
+; ABI-NEXT: ret void
+;
+ %va = alloca ptr, align 4
+ call void @llvm.va_start(ptr %va)
+ call void @sink_valist(ptr %va)
+ call void @llvm.va_end(ptr %va)
+ ret void
+}
+
+declare external void @decl_external_simple(...)
+define external void @defn_external_simple(...) {
+; OPT-LABEL: define {{[^@]+}}@defn_external_simple(...) {
+; OPT-NEXT: entry:
+; OPT-NEXT: %va_start = alloca ptr, align 4
+; OPT-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr %va_start)
+; OPT-NEXT: call void @llvm.va_start.p0(ptr %va_start)
+; OPT-NEXT: %0 = load ptr, ptr %va_start, align 4
+; OPT-NEXT: call void @defn_external_simple.valist(ptr %0)
+; OPT-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr %va_start)
+; OPT-NEXT: ret void
+;
+; ABI-LABEL: define {{[^@]+}}@defn_external_simple(ptr %varargs) {
+; ABI-NEXT: %va = alloca ptr, align 4
+; ABI-NEXT: store ptr %varargs, ptr %va, align 4
+; ABI-NEXT: call void @sink_valist(ptr %va)
+; ABI-NEXT: ret void
+;
+ %va = alloca ptr, align 4
+ call void @llvm.va_start(ptr %va)
+ call void @sink_valist(ptr %va)
+ call void @llvm.va_end(ptr %va)
+ ret void
+}
diff --git a/llvm/test/Transforms/ExpandVariadics/expand-va-intrinsic-split-simple.ll b/llvm/test/Transforms/ExpandVariadics/expand-va-intrinsic-split-simple.ll
new file mode 100644
index 000000000000..9a86540ba2d5
--- /dev/null
+++ b/llvm/test/Transforms/ExpandVariadics/expand-va-intrinsic-split-simple.ll
@@ -0,0 +1,214 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: -p --function-signature
+; RUN: opt -mtriple=wasm32-unknown-unknown -S --passes=expand-variadics --expand-variadics-override=optimize < %s | FileCheck %s --check-prefixes=OPT
+; RUN: opt -mtriple=wasm32-unknown-unknown -S --passes=expand-variadics --expand-variadics-override=lowering < %s | FileCheck %s --check-prefixes=ABI
+; REQUIRES: webassembly-registered-target
+
+; Examples are variadic functions that return the first or the second of an int and a double
+; Split the functions into an internal equivalent that takes a va_list and a ABI preserving wrapper
+
+define i32 @variadic_int_double_get_firstz(...) {
+; OPT-LABEL: define {{[^@]+}}@variadic_int_double_get_firstz(...) {
+; OPT-NEXT: entry:
+; OPT-NEXT: %va_start = alloca ptr, align 4
+; OPT-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr %va_start)
+; OPT-NEXT: call void @llvm.va_start.p0(ptr %va_start)
+; OPT-NEXT: %0 = load ptr, ptr %va_start, align 4
+; OPT-NEXT: %1 = call i32 @variadic_int_double_get_firstz.valist(ptr %0)
+; OPT-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr %va_start)
+; OPT-NEXT: ret i32 %1
+;
+; ABI-LABEL: define {{[^@]+}}@variadic_int_double_get_firstz(ptr %varargs) {
+; ABI-NEXT: entry:
+; ABI-NEXT: %va = alloca ptr, align 4
+; ABI-NEXT: store ptr %varargs, ptr %va, align 4
+; ABI-NEXT: %argp.cur = load ptr, ptr %va, align 4
+; ABI-NEXT: %argp.next = getelementptr inbounds i8, ptr %argp.cur, i32 4
+; ABI-NEXT: store ptr %argp.next, ptr %va, align 4
+; ABI-NEXT: %0 = load i32, ptr %argp.cur, align 4
+; ABI-NEXT: ret i32 %0
+;
+entry:
+ %va = alloca ptr, align 4
+ call void @llvm.va_start.p0(ptr nonnull %va)
+ %argp.cur = load ptr, ptr %va, align 4
+ %argp.next = getelementptr inbounds i8, ptr %argp.cur, i32 4
+ store ptr %argp.next, ptr %va, align 4
+ %0 = load i32, ptr %argp.cur, align 4
+ call void @llvm.va_end.p0(ptr %va)
+ ret i32 %0
+}
+
+; CHECK-LABEL: define i32 @variadic_int_double_get_firstz(...) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %va_list = alloca ptr, align 4
+; CHECK-NEXT: call void @llvm.va_start.p0(ptr %va_list)
+; CHECK-NEXT: %0 = tail call i32 @variadic_int_double_get_firstz.valist(ptr %va_list)
+; CHECK-NEXT: ret i32 %0
+; CHECK-NEXT: }
+
+; CHECK-LABEL: define internal i32 @variadic_int_double_get_firstz.valist(ptr noalias %varargs) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %va = alloca ptr, align 4
+; CHECK-NEXT: store ptr %varargs, ptr %va, align 4
+; CHECK-NEXT: %argp.cur = load ptr, ptr %va, align 4
+; CHECK-NEXT: %argp.next = getelementptr inbounds i8, ptr %argp.cur, i32 4
+; CHECK-NEXT: store ptr %argp.next, ptr %va, align 4
+; CHECK-NEXT: %0 = load i32, ptr %argp.cur, align 4
+; CHECK-NEXT: ret i32 %0
+; CHECK-NEXT: }
+
+define double @variadic_int_double_get_secondz(...) {
+; OPT-LABEL: define {{[^@]+}}@variadic_int_double_get_secondz(...) {
+; OPT-NEXT: entry:
+; OPT-NEXT: %va_start = alloca ptr, align 4
+; OPT-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr %va_start)
+; OPT-NEXT: call void @llvm.va_start.p0(ptr %va_start)
+; OPT-NEXT: %0 = load ptr, ptr %va_start, align 4
+; OPT-NEXT: %1 = call double @variadic_int_double_get_secondz.valist(ptr %0)
+; OPT-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr %va_start)
+; OPT-NEXT: ret double %1
+;
+; ABI-LABEL: define {{[^@]+}}@variadic_int_double_get_secondz(ptr %varargs) {
+; ABI-NEXT: entry:
+; ABI-NEXT: %va = alloca ptr, align 4
+; ABI-NEXT: store ptr %varargs, ptr %va, align 4
+; ABI-NEXT: %argp.cur = load ptr, ptr %va, align 4
+; ABI-NEXT: %argp.next = getelementptr inbounds i8, ptr %argp.cur, i32 4
+; ABI-NEXT: %argp.next2 = getelementptr inbounds i8, ptr %argp.cur, i32 12
+; ABI-NEXT: store ptr %argp.next2, ptr %va, align 4
+; ABI-NEXT: %0 = load double, ptr %argp.next, align 4
+; ABI-NEXT: ret double %0
+;
+entry:
+ %va = alloca ptr, align 4
+ call void @llvm.va_start.p0(ptr nonnull %va)
+ %argp.cur = load ptr, ptr %va, align 4
+ %argp.next = getelementptr inbounds i8, ptr %argp.cur, i32 4
+ %argp.next2 = getelementptr inbounds i8, ptr %argp.cur, i32 12
+ store ptr %argp.next2, ptr %va, align 4
+ %0 = load double, ptr %argp.next, align 4
+ call void @llvm.va_end.p0(ptr %va)
+ ret double %0
+}
+
+; CHECK-LABEL: define double @variadic_int_double_get_secondz(...) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %va_list = alloca ptr, align 4
+; CHECK-NEXT: call void @llvm.va_start.p0(ptr %va_list)
+; CHECK-NEXT: %0 = tail call double @variadic_int_double_get_secondz.valist(ptr %va_list)
+; CHECK-NEXT: ret double %0
+; CHECK-NEXT: }
+
+; CHECK-LABEL: define internal double @variadic_int_double_get_secondz.valist(ptr noalias %varargs) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %va = alloca ptr, align 4
+; CHECK-NEXT: store ptr %varargs, ptr %va, align 4
+; CHECK-NEXT: %argp.cur = load ptr, ptr %va, align 4
+; CHECK-NEXT: %argp.next = getelementptr inbounds i8, ptr %argp.cur, i32 4
+; CHECK-NEXT: %argp.next2 = getelementptr inbounds i8, ptr %argp.cur, i32 12
+; CHECK-NEXT: store ptr %argp.next2, ptr %va, align 4
+; CHECK-NEXT: %0 = load double, ptr %argp.next, align 4
+; CHECK-NEXT: ret double %0
+; CHECK-NEXT: }
+
+
+; CHECK-LABEL: @variadic_can_get_firstIidEEbT_T0_(i32 %x, double %y) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %variadic_can_get_firstIidEEbT_T0_.vararg, align 16
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr %vararg_buffer)
+; CHECK-NEXT: %0 = getelementptr inbounds %variadic_can_get_firstIidEEbT_T0_.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store i32 %x, ptr %0, align 4
+; CHECK-NEXT: %1 = getelementptr inbounds %variadic_can_get_firstIidEEbT_T0_.vararg, ptr %vararg_buffer, i32 0, i32 1
+; CHECK-NEXT: store double %y, ptr %1, align 4
+; CHECK-NEXT: %call = call i32 @variadic_int_double_get_firstz.valist(ptr %vararg_buffer)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr %vararg_buffer)
+; CHECK-NEXT: %cmp.i = icmp eq i32 %call, %x
+; CHECK-NEXT: ret i1 %cmp.i
+; CHECK-NEXT: }
+
+define zeroext i1 @variadic_can_get_firstIidEEbT_T0_(i32 %x, double %y) {
+; OPT-LABEL: define {{[^@]+}}@variadic_can_get_firstIidEEbT_T0_(i32 %x, double %y) {
+; OPT-NEXT: entry:
+; OPT-NEXT: %vararg_buffer = alloca %variadic_can_get_firstIidEEbT_T0_.vararg, align 16
+; OPT-NEXT: call void @llvm.lifetime.start.p0(i64 16, ptr %vararg_buffer)
+; OPT-NEXT: %0 = getelementptr inbounds %variadic_can_get_firstIidEEbT_T0_.vararg, ptr %vararg_buffer, i32 0, i32 0
+; OPT-NEXT: store i32 %x, ptr %0, align 4
+; OPT-NEXT: %1 = getelementptr inbounds %variadic_can_get_firstIidEEbT_T0_.vararg, ptr %vararg_buffer, i32 0, i32 2
+; OPT-NEXT: store double %y, ptr %1, align 8
+; OPT-NEXT: %call = call i32 @variadic_int_double_get_firstz.valist(ptr %vararg_buffer)
+; OPT-NEXT: call void @llvm.lifetime.end.p0(i64 16, ptr %vararg_buffer)
+; OPT-NEXT: %cmp.i = icmp eq i32 %call, %x
+; OPT-NEXT: ret i1 %cmp.i
+;
+; ABI-LABEL: define {{[^@]+}}@variadic_can_get_firstIidEEbT_T0_(i32 %x, double %y) {
+; ABI-NEXT: entry:
+; ABI-NEXT: %vararg_buffer = alloca %variadic_can_get_firstIidEEbT_T0_.vararg, align 16
+; ABI-NEXT: call void @llvm.lifetime.start.p0(i64 16, ptr %vararg_buffer)
+; ABI-NEXT: %0 = getelementptr inbounds %variadic_can_get_firstIidEEbT_T0_.vararg, ptr %vararg_buffer, i32 0, i32 0
+; ABI-NEXT: store i32 %x, ptr %0, align 4
+; ABI-NEXT: %1 = getelementptr inbounds %variadic_can_get_firstIidEEbT_T0_.vararg, ptr %vararg_buffer, i32 0, i32 2
+; ABI-NEXT: store double %y, ptr %1, align 8
+; ABI-NEXT: %call = call i32 @variadic_int_double_get_firstz(ptr %vararg_buffer)
+; ABI-NEXT: call void @llvm.lifetime.end.p0(i64 16, ptr %vararg_buffer)
+; ABI-NEXT: %cmp.i = icmp eq i32 %call, %x
+; ABI-NEXT: ret i1 %cmp.i
+;
+entry:
+ %call = call i32 (...) @variadic_int_double_get_firstz(i32 %x, double %y)
+ %cmp.i = icmp eq i32 %call, %x
+ ret i1 %cmp.i
+}
+
+; CHECK-LABEL: @variadic_can_get_secondIidEEbT_T0_(i32 %x, double %y) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %variadic_can_get_secondIidEEbT_T0_.vararg, align 16
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr %vararg_buffer)
+; CHECK-NEXT: %0 = getelementptr inbounds %variadic_can_get_secondIidEEbT_T0_.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store i32 %x, ptr %0, align 4
+; CHECK-NEXT: %1 = getelementptr inbounds %variadic_can_get_secondIidEEbT_T0_.vararg, ptr %vararg_buffer, i32 0, i32 1
+; CHECK-NEXT: store double %y, ptr %1, align 4
+; CHECK-NEXT: %call = call double @variadic_int_double_get_secondz.valist(ptr %vararg_buffer)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr %vararg_buffer)
+; CHECK-NEXT: %cmp.i = fcmp oeq double %call, %y
+; CHECK-NEXT: ret i1 %cmp.i
+; CHECK-NEXT: }
+
+define zeroext i1 @variadic_can_get_secondIidEEbT_T0_(i32 %x, double %y) {
+; OPT-LABEL: define {{[^@]+}}@variadic_can_get_secondIidEEbT_T0_(i32 %x, double %y) {
+; OPT-NEXT: entry:
+; OPT-NEXT: %vararg_buffer = alloca %variadic_can_get_secondIidEEbT_T0_.vararg, align 16
+; OPT-NEXT: call void @llvm.lifetime.start.p0(i64 16, ptr %vararg_buffer)
+; OPT-NEXT: %0 = getelementptr inbounds %variadic_can_get_secondIidEEbT_T0_.vararg, ptr %vararg_buffer, i32 0, i32 0
+; OPT-NEXT: store i32 %x, ptr %0, align 4
+; OPT-NEXT: %1 = getelementptr inbounds %variadic_can_get_secondIidEEbT_T0_.vararg, ptr %vararg_buffer, i32 0, i32 2
+; OPT-NEXT: store double %y, ptr %1, align 8
+; OPT-NEXT: %call = call double @variadic_int_double_get_secondz.valist(ptr %vararg_buffer)
+; OPT-NEXT: call void @llvm.lifetime.end.p0(i64 16, ptr %vararg_buffer)
+; OPT-NEXT: %cmp.i = fcmp oeq double %call, %y
+; OPT-NEXT: ret i1 %cmp.i
+;
+; ABI-LABEL: define {{[^@]+}}@variadic_can_get_secondIidEEbT_T0_(i32 %x, double %y) {
+; ABI-NEXT: entry:
+; ABI-NEXT: %vararg_buffer = alloca %variadic_can_get_secondIidEEbT_T0_.vararg, align 16
+; ABI-NEXT: call void @llvm.lifetime.start.p0(i64 16, ptr %vararg_buffer)
+; ABI-NEXT: %0 = getelementptr inbounds %variadic_can_get_secondIidEEbT_T0_.vararg, ptr %vararg_buffer, i32 0, i32 0
+; ABI-NEXT: store i32 %x, ptr %0, align 4
+; ABI-NEXT: %1 = getelementptr inbounds %variadic_can_get_secondIidEEbT_T0_.vararg, ptr %vararg_buffer, i32 0, i32 2
+; ABI-NEXT: store double %y, ptr %1, align 8
+; ABI-NEXT: %call = call double @variadic_int_double_get_secondz(ptr %vararg_buffer)
+; ABI-NEXT: call void @llvm.lifetime.end.p0(i64 16, ptr %vararg_buffer)
+; ABI-NEXT: %cmp.i = fcmp oeq double %call, %y
+; ABI-NEXT: ret i1 %cmp.i
+;
+entry:
+ %call = call double (...) @variadic_int_double_get_secondz(i32 %x, double %y)
+ %cmp.i = fcmp oeq double %call, %y
+ ret i1 %cmp.i
+}
+
+; Declaration unchanged
+; CHECK: declare void @variadic_without_callers(...)
+declare void @variadic_without_callers(...)
+
+declare void @llvm.va_start.p0(ptr)
+declare void @llvm.va_end.p0(ptr)
diff --git a/llvm/test/Transforms/ExpandVariadics/indirect-calls.ll b/llvm/test/Transforms/ExpandVariadics/indirect-calls.ll
new file mode 100644
index 000000000000..de04c7235ad1
--- /dev/null
+++ b/llvm/test/Transforms/ExpandVariadics/indirect-calls.ll
@@ -0,0 +1,59 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -mtriple=wasm32-unknown-unknown -S --passes=expand-variadics --expand-variadics-override=optimize < %s | FileCheck %s -check-prefixes=OPT
+; RUN: opt -mtriple=wasm32-unknown-unknown -S --passes=expand-variadics --expand-variadics-override=lowering < %s | FileCheck %s -check-prefixes=ABI
+; REQUIRES: webassembly-registered-target
+
+declare void @vararg(...)
+@vararg_ptr = hidden global ptr @vararg, align 4
+
+%struct.libcS = type { i8, i16, i32, i32, float, double }
+
+define hidden void @fptr_single_i32(i32 noundef %x) {
+; OPT-LABEL: @fptr_single_i32(
+; OPT-NEXT: entry:
+; OPT-NEXT: [[TMP0:%.*]] = load volatile ptr, ptr @vararg_ptr, align 4
+; OPT-NEXT: tail call void (...) [[TMP0]](i32 noundef [[X:%.*]])
+; OPT-NEXT: ret void
+;
+; ABI-LABEL: @fptr_single_i32(
+; ABI-NEXT: entry:
+; ABI-NEXT: [[VARARG_BUFFER:%.*]] = alloca [[FPTR_SINGLE_I32_VARARG:%.*]], align 16
+; ABI-NEXT: [[TMP0:%.*]] = load volatile ptr, ptr @vararg_ptr, align 4
+; ABI-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[VARARG_BUFFER]])
+; ABI-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[FPTR_SINGLE_I32_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 0
+; ABI-NEXT: store i32 [[X:%.*]], ptr [[TMP1]], align 4
+; ABI-NEXT: call void [[TMP0]](ptr [[VARARG_BUFFER]])
+; ABI-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[VARARG_BUFFER]])
+; ABI-NEXT: ret void
+;
+entry:
+ %0 = load volatile ptr, ptr @vararg_ptr, align 4
+ tail call void (...) %0(i32 noundef %x)
+ ret void
+}
+
+define hidden void @fptr_libcS(ptr noundef byval(%struct.libcS) align 8 %x) {
+; OPT-LABEL: @fptr_libcS(
+; OPT-NEXT: entry:
+; OPT-NEXT: [[TMP0:%.*]] = load volatile ptr, ptr @vararg_ptr, align 4
+; OPT-NEXT: tail call void (...) [[TMP0]](ptr noundef nonnull byval([[STRUCT_LIBCS:%.*]]) align 8 [[X:%.*]])
+; OPT-NEXT: ret void
+;
+; ABI-LABEL: @fptr_libcS(
+; ABI-NEXT: entry:
+; ABI-NEXT: [[INDIRECTALLOCA:%.*]] = alloca [[STRUCT_LIBCS:%.*]], align 8
+; ABI-NEXT: [[VARARG_BUFFER:%.*]] = alloca [[FPTR_LIBCS_VARARG:%.*]], align 16
+; ABI-NEXT: [[TMP0:%.*]] = load volatile ptr, ptr @vararg_ptr, align 4
+; ABI-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[INDIRECTALLOCA]], ptr [[X:%.*]], i64 24, i1 false)
+; ABI-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[VARARG_BUFFER]])
+; ABI-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[FPTR_LIBCS_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 0
+; ABI-NEXT: store ptr [[INDIRECTALLOCA]], ptr [[TMP1]], align 4
+; ABI-NEXT: call void [[TMP0]](ptr [[VARARG_BUFFER]])
+; ABI-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[VARARG_BUFFER]])
+; ABI-NEXT: ret void
+;
+entry:
+ %0 = load volatile ptr, ptr @vararg_ptr, align 4
+ tail call void (...) %0(ptr noundef nonnull byval(%struct.libcS) align 8 %x)
+ ret void
+}
diff --git a/llvm/test/Transforms/ExpandVariadics/intrinsics.ll b/llvm/test/Transforms/ExpandVariadics/intrinsics.ll
new file mode 100644
index 000000000000..1782c9229574
--- /dev/null
+++ b/llvm/test/Transforms/ExpandVariadics/intrinsics.ll
@@ -0,0 +1,120 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -mtriple=wasm32-unknown-unknown -S --passes=expand-variadics --expand-variadics-override=optimize < %s | FileCheck %s -check-prefixes=CHECK,OPT
+; RUN: opt -mtriple=wasm32-unknown-unknown -S --passes=expand-variadics --expand-variadics-override=lowering < %s | FileCheck %s -check-prefixes=CHECK,ABI
+; REQUIRES: webassembly-registered-target
+
+declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture)
+
+declare void @llvm.va_copy.p0(ptr, ptr)
+
+declare void @valist(ptr noundef)
+
+declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture)
+
+declare void @llvm.va_start.p0(ptr)
+
+declare void @llvm.va_end.p0(ptr)
+
+
+define void @start_once(...) {
+; OPT-LABEL: @start_once(
+; OPT-NEXT: entry:
+; OPT-NEXT: [[VA_START:%.*]] = alloca ptr, align 4
+; OPT-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[VA_START]])
+; OPT-NEXT: call void @llvm.va_start.p0(ptr [[VA_START]])
+; OPT-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VA_START]], align 4
+; OPT-NEXT: call void @start_once.valist(ptr [[TMP0]])
+; OPT-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[VA_START]])
+; OPT-NEXT: ret void
+;
+; ABI-LABEL: @start_once(
+; ABI-NEXT: entry:
+; ABI-NEXT: [[S:%.*]] = alloca ptr, align 4
+; ABI-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr nonnull [[S]])
+; ABI-NEXT: store ptr [[VARARGS:%.*]], ptr [[S]], align 4
+; ABI-NEXT: [[TMP0:%.*]] = load ptr, ptr [[S]], align 4
+; ABI-NEXT: call void @valist(ptr noundef [[TMP0]])
+; ABI-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr nonnull [[S]])
+; ABI-NEXT: ret void
+;
+entry:
+ %s = alloca ptr, align 4
+ call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %s)
+ call void @llvm.va_start.p0(ptr nonnull %s)
+ %0 = load ptr, ptr %s, align 4
+ call void @valist(ptr noundef %0)
+ call void @llvm.va_end.p0(ptr %s)
+ call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %s)
+ ret void
+}
+
+
+define void @start_twice(...) {
+; OPT-LABEL: @start_twice(
+; OPT-NEXT: entry:
+; OPT-NEXT: [[VA_START:%.*]] = alloca ptr, align 4
+; OPT-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[VA_START]])
+; OPT-NEXT: call void @llvm.va_start.p0(ptr [[VA_START]])
+; OPT-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VA_START]], align 4
+; OPT-NEXT: call void @start_twice.valist(ptr [[TMP0]])
+; OPT-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[VA_START]])
+; OPT-NEXT: ret void
+;
+; ABI-LABEL: @start_twice(
+; ABI-NEXT: entry:
+; ABI-NEXT: [[S0:%.*]] = alloca ptr, align 4
+; ABI-NEXT: [[S1:%.*]] = alloca ptr, align 4
+; ABI-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr nonnull [[S0]])
+; ABI-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr nonnull [[S1]])
+; ABI-NEXT: store ptr [[VARARGS:%.*]], ptr [[S0]], align 4
+; ABI-NEXT: [[TMP0:%.*]] = load ptr, ptr [[S0]], align 4
+; ABI-NEXT: call void @valist(ptr noundef [[TMP0]])
+; ABI-NEXT: store ptr [[VARARGS]], ptr [[S1]], align 4
+; ABI-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S1]], align 4
+; ABI-NEXT: call void @valist(ptr noundef [[TMP1]])
+; ABI-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr nonnull [[S1]])
+; ABI-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr nonnull [[S0]])
+; ABI-NEXT: ret void
+;
+entry:
+ %s0 = alloca ptr, align 4
+ %s1 = alloca ptr, align 4
+ call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %s0)
+ call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %s1)
+ call void @llvm.va_start.p0(ptr nonnull %s0)
+ %0 = load ptr, ptr %s0, align 4
+ call void @valist(ptr noundef %0)
+ call void @llvm.va_end.p0(ptr %s0)
+ call void @llvm.va_start.p0(ptr nonnull %s1)
+ %1 = load ptr, ptr %s1, align 4
+ call void @valist(ptr noundef %1)
+ call void @llvm.va_end.p0(ptr %s1)
+ call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %s1)
+ call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %s0)
+ ret void
+}
+
+define void @copy(ptr noundef %va) {
+; CHECK-LABEL: @copy(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[VA_ADDR:%.*]] = alloca ptr, align 4
+; CHECK-NEXT: [[CP:%.*]] = alloca ptr, align 4
+; CHECK-NEXT: store ptr [[VA:%.*]], ptr [[VA_ADDR]], align 4
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr nonnull [[CP]])
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr [[CP]], ptr [[VA_ADDR]], i32 4, i1 false)
+; CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[CP]], align 4
+; CHECK-NEXT: call void @valist(ptr noundef [[TMP0]])
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr nonnull [[CP]])
+; CHECK-NEXT: ret void
+;
+entry:
+ %va.addr = alloca ptr, align 4
+ %cp = alloca ptr, align 4
+ store ptr %va, ptr %va.addr, align 4
+ call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %cp)
+ call void @llvm.va_copy.p0(ptr nonnull %cp, ptr nonnull %va.addr)
+ %0 = load ptr, ptr %cp, align 4
+ call void @valist(ptr noundef %0)
+ call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %cp)
+ ret void
+}
diff --git a/llvm/test/Transforms/ExpandVariadics/invoke.ll b/llvm/test/Transforms/ExpandVariadics/invoke.ll
new file mode 100644
index 000000000000..ced2edf9274f
--- /dev/null
+++ b/llvm/test/Transforms/ExpandVariadics/invoke.ll
@@ -0,0 +1,89 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -mtriple=wasm32-unknown-unknown -S --passes=expand-variadics --expand-variadics-override=optimize < %s | FileCheck %s -check-prefixes=CHECK
+; RUN: not --crash opt -mtriple=wasm32-unknown-unknown -S --passes=expand-variadics --expand-variadics-override=lowering < %s 2>&1 | FileCheck %s -check-prefixes=ERROR
+; REQUIRES: webassembly-registered-target
+target datalayout = "e-m:e-p:32:32-p10:8:8-p20:8:8-i64:64-n32:64-S128-ni:1:10:20"
+
+; ERROR: LLVM ERROR: Cannot lower callbase instruction
+
+@_ZTIi = external constant ptr
+
+; Function Attrs: mustprogress
+define hidden void @test0(i32 noundef %x) #0 personality ptr @__gxx_wasm_personality_v0 {
+; CHECK-LABEL: @test0(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: invoke void (...) @may_throw(i32 noundef [[X:%.*]])
+; CHECK-NEXT: to label [[TRY_CONT:%.*]] unwind label [[CATCH_DISPATCH:%.*]]
+; CHECK: catch.dispatch:
+; CHECK-NEXT: [[TMP0:%.*]] = catchswitch within none [label %catch.start] unwind to caller
+; CHECK: catch.start:
+; CHECK-NEXT: [[TMP1:%.*]] = catchpad within [[TMP0]] [ptr @_ZTIi]
+; CHECK-NEXT: [[TMP2:%.*]] = tail call ptr @llvm.wasm.get.exception(token [[TMP1]])
+; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.wasm.get.ehselector(token [[TMP1]])
+; CHECK-NEXT: [[TMP4:%.*]] = tail call i32 @llvm.eh.typeid.for.p0(ptr nonnull @_ZTIi)
+; CHECK-NEXT: [[MATCHES:%.*]] = icmp eq i32 [[TMP3]], [[TMP4]]
+; CHECK-NEXT: br i1 [[MATCHES]], label [[CATCH:%.*]], label [[RETHROW:%.*]]
+; CHECK: catch:
+; CHECK-NEXT: [[TMP5:%.*]] = call ptr @__cxa_begin_catch(ptr [[TMP2]]) [ "funclet"(token [[TMP1]]) ]
+; CHECK-NEXT: call void (...) @dont_throw(i32 noundef [[X]]) [ "funclet"(token [[TMP1]]) ]
+; CHECK-NEXT: call void @__cxa_end_catch() [ "funclet"(token [[TMP1]]) ]
+; CHECK-NEXT: catchret from [[TMP1]] to label [[TRY_CONT]]
+; CHECK: rethrow:
+; CHECK-NEXT: call void @llvm.wasm.rethrow() [ "funclet"(token [[TMP1]]) ]
+; CHECK-NEXT: unreachable
+; CHECK: try.cont:
+; CHECK-NEXT: ret void
+;
+entry:
+ invoke void (...) @may_throw(i32 noundef %x)
+ to label %try.cont unwind label %catch.dispatch
+
+catch.dispatch: ; preds = %entry
+ %0 = catchswitch within none [label %catch.start] unwind to caller
+
+catch.start: ; preds = %catch.dispatch
+ %1 = catchpad within %0 [ptr @_ZTIi]
+ %2 = tail call ptr @llvm.wasm.get.exception(token %1)
+ %3 = tail call i32 @llvm.wasm.get.ehselector(token %1)
+ %4 = tail call i32 @llvm.eh.typeid.for.p0(ptr nonnull @_ZTIi)
+ %matches = icmp eq i32 %3, %4
+ br i1 %matches, label %catch, label %rethrow
+
+catch: ; preds = %catch.start
+ %5 = call ptr @__cxa_begin_catch(ptr %2) #6 [ "funclet"(token %1) ]
+ call void (...) @dont_throw(i32 noundef %x) #6 [ "funclet"(token %1) ]
+ call void @__cxa_end_catch() #6 [ "funclet"(token %1) ]
+ catchret from %1 to label %try.cont
+
+rethrow: ; preds = %catch.start
+ call void @llvm.wasm.rethrow() #5 [ "funclet"(token %1) ]
+ unreachable
+
+try.cont: ; preds = %entry, %catch
+ ret void
+}
+
+declare void @may_throw(...)
+
+declare i32 @__gxx_wasm_personality_v0(...)
+
+; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn
+declare ptr @llvm.wasm.get.exception(token)
+
+; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn
+declare i32 @llvm.wasm.get.ehselector(token)
+
+; Function Attrs: nofree nosync nounwind memory(none)
+declare i32 @llvm.eh.typeid.for.p0(ptr)
+
+declare ptr @__cxa_begin_catch(ptr)
+
+; Function Attrs: nounwind
+declare void @dont_throw(...)
+
+declare void @__cxa_end_catch()
+
+; Function Attrs: noreturn
+declare void @llvm.wasm.rethrow()
+
+
diff --git a/llvm/test/Transforms/ExpandVariadics/pass-byval-byref.ll b/llvm/test/Transforms/ExpandVariadics/pass-byval-byref.ll
new file mode 100644
index 000000000000..85fefda36a76
--- /dev/null
+++ b/llvm/test/Transforms/ExpandVariadics/pass-byval-byref.ll
@@ -0,0 +1,153 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -mtriple=wasm32-unknown-unknown -S --passes=expand-variadics --expand-variadics-override=optimize < %s | FileCheck %s -check-prefixes=OPT
+; RUN: opt -mtriple=wasm32-unknown-unknown -S --passes=expand-variadics --expand-variadics-override=lowering < %s | FileCheck %s -check-prefixes=ABI
+; REQUIRES: webassembly-registered-target
+
+; CHECK: @sink
+declare void @sink(...)
+
+
+define void @pass_byval(ptr byval(i32) %b) {
+; OPT-LABEL: @pass_byval(
+; OPT-NEXT: entry:
+; OPT-NEXT: tail call void (...) @sink(ptr byval(i32) [[B:%.*]])
+; OPT-NEXT: ret void
+;
+; ABI-LABEL: @pass_byval(
+; ABI-NEXT: entry:
+; ABI-NEXT: [[VARARG_BUFFER:%.*]] = alloca [[PASS_BYVAL_VARARG:%.*]], align 16
+; ABI-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[VARARG_BUFFER]])
+; ABI-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[PASS_BYVAL_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 0
+; ABI-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[TMP0]], ptr [[B:%.*]], i64 4, i1 false)
+; ABI-NEXT: call void @sink(ptr [[VARARG_BUFFER]])
+; ABI-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[VARARG_BUFFER]])
+; ABI-NEXT: ret void
+;
+entry:
+ tail call void (...) @sink(ptr byval(i32) %b)
+ ret void
+}
+
+%struct.libcS = type { i8, i16, i32, i32, float, double }
+
+define void @i32_libcS_byval(i32 %x, ptr noundef byval(%struct.libcS) align 8 %y) {
+; OPT-LABEL: @i32_libcS_byval(
+; OPT-NEXT: entry:
+; OPT-NEXT: tail call void (...) @sink(i32 [[X:%.*]], ptr byval([[STRUCT_LIBCS:%.*]]) align 8 [[Y:%.*]])
+; OPT-NEXT: ret void
+;
+; ABI-LABEL: @i32_libcS_byval(
+; ABI-NEXT: entry:
+; ABI-NEXT: [[INDIRECTALLOCA:%.*]] = alloca [[STRUCT_LIBCS:%.*]], align 8
+; ABI-NEXT: [[VARARG_BUFFER:%.*]] = alloca [[I32_LIBCS_BYVAL_VARARG:%.*]], align 16
+; ABI-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[INDIRECTALLOCA]], ptr [[Y:%.*]], i64 24, i1 false)
+; ABI-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr [[VARARG_BUFFER]])
+; ABI-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[I32_LIBCS_BYVAL_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 0
+; ABI-NEXT: store i32 [[X:%.*]], ptr [[TMP0]], align 4
+; ABI-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[I32_LIBCS_BYVAL_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 1
+; ABI-NEXT: store ptr [[INDIRECTALLOCA]], ptr [[TMP1]], align 4
+; ABI-NEXT: call void @sink(ptr [[VARARG_BUFFER]])
+; ABI-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr [[VARARG_BUFFER]])
+; ABI-NEXT: ret void
+;
+entry:
+ tail call void (...) @sink(i32 %x, ptr byval(%struct.libcS) align 8 %y)
+ ret void
+}
+
+define void @libcS_i32_byval(ptr byval(%struct.libcS) align 8 %x, i32 %y) {
+; OPT-LABEL: @libcS_i32_byval(
+; OPT-NEXT: entry:
+; OPT-NEXT: tail call void (...) @sink(ptr byval([[STRUCT_LIBCS:%.*]]) align 8 [[X:%.*]], i32 [[Y:%.*]])
+; OPT-NEXT: ret void
+;
+; ABI-LABEL: @libcS_i32_byval(
+; ABI-NEXT: entry:
+; ABI-NEXT: [[INDIRECTALLOCA:%.*]] = alloca [[STRUCT_LIBCS:%.*]], align 8
+; ABI-NEXT: [[VARARG_BUFFER:%.*]] = alloca [[LIBCS_I32_BYVAL_VARARG:%.*]], align 16
+; ABI-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[INDIRECTALLOCA]], ptr [[X:%.*]], i64 24, i1 false)
+; ABI-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr [[VARARG_BUFFER]])
+; ABI-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[LIBCS_I32_BYVAL_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 0
+; ABI-NEXT: store ptr [[INDIRECTALLOCA]], ptr [[TMP0]], align 4
+; ABI-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[LIBCS_I32_BYVAL_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 1
+; ABI-NEXT: store i32 [[Y:%.*]], ptr [[TMP1]], align 4
+; ABI-NEXT: call void @sink(ptr [[VARARG_BUFFER]])
+; ABI-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr [[VARARG_BUFFER]])
+; ABI-NEXT: ret void
+;
+entry:
+ tail call void (...) @sink(ptr byval(%struct.libcS) align 8 %x, i32 %y)
+ ret void
+}
+
+
+define void @pass_byref(ptr byref(i32) %b) {
+; OPT-LABEL: @pass_byref(
+; OPT-NEXT: entry:
+; OPT-NEXT: tail call void (...) @sink(ptr byref(i32) [[B:%.*]])
+; OPT-NEXT: ret void
+;
+; ABI-LABEL: @pass_byref(
+; ABI-NEXT: entry:
+; ABI-NEXT: [[VARARG_BUFFER:%.*]] = alloca [[PASS_BYREF_VARARG:%.*]], align 16
+; ABI-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[VARARG_BUFFER]])
+; ABI-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[PASS_BYREF_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 0
+; ABI-NEXT: store ptr [[B:%.*]], ptr [[TMP0]], align 4
+; ABI-NEXT: call void @sink(ptr [[VARARG_BUFFER]])
+; ABI-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[VARARG_BUFFER]])
+; ABI-NEXT: ret void
+;
+entry:
+ tail call void (...) @sink(ptr byref(i32) %b)
+ ret void
+}
+
+define void @i32_libcS_byref(i32 %x, ptr noundef byref(%struct.libcS) align 8 %y) {
+; OPT-LABEL: @i32_libcS_byref(
+; OPT-NEXT: entry:
+; OPT-NEXT: tail call void (...) @sink(i32 [[X:%.*]], ptr byref([[STRUCT_LIBCS:%.*]]) align 8 [[Y:%.*]])
+; OPT-NEXT: ret void
+;
+; ABI-LABEL: @i32_libcS_byref(
+; ABI-NEXT: entry:
+; ABI-NEXT: [[INDIRECTALLOCA:%.*]] = alloca [[STRUCT_LIBCS:%.*]], align 8
+; ABI-NEXT: [[VARARG_BUFFER:%.*]] = alloca [[I32_LIBCS_BYREF_VARARG:%.*]], align 16
+; ABI-NEXT: store ptr [[Y:%.*]], ptr [[INDIRECTALLOCA]], align 4
+; ABI-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr [[VARARG_BUFFER]])
+; ABI-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[I32_LIBCS_BYREF_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 0
+; ABI-NEXT: store i32 [[X:%.*]], ptr [[TMP0]], align 4
+; ABI-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[I32_LIBCS_BYREF_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 1
+; ABI-NEXT: store ptr [[INDIRECTALLOCA]], ptr [[TMP1]], align 4
+; ABI-NEXT: call void @sink(ptr [[VARARG_BUFFER]])
+; ABI-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr [[VARARG_BUFFER]])
+; ABI-NEXT: ret void
+;
+entry:
+ tail call void (...) @sink(i32 %x, ptr byref(%struct.libcS) align 8 %y)
+ ret void
+}
+
+define void @libcS_i32_byref(ptr byref(%struct.libcS) align 8 %x, i32 %y) {
+; OPT-LABEL: @libcS_i32_byref(
+; OPT-NEXT: entry:
+; OPT-NEXT: tail call void (...) @sink(ptr byref([[STRUCT_LIBCS:%.*]]) align 8 [[X:%.*]], i32 [[Y:%.*]])
+; OPT-NEXT: ret void
+;
+; ABI-LABEL: @libcS_i32_byref(
+; ABI-NEXT: entry:
+; ABI-NEXT: [[INDIRECTALLOCA:%.*]] = alloca [[STRUCT_LIBCS:%.*]], align 8
+; ABI-NEXT: [[VARARG_BUFFER:%.*]] = alloca [[LIBCS_I32_BYREF_VARARG:%.*]], align 16
+; ABI-NEXT: store ptr [[X:%.*]], ptr [[INDIRECTALLOCA]], align 4
+; ABI-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr [[VARARG_BUFFER]])
+; ABI-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[LIBCS_I32_BYREF_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 0
+; ABI-NEXT: store ptr [[INDIRECTALLOCA]], ptr [[TMP0]], align 4
+; ABI-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[LIBCS_I32_BYREF_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 1
+; ABI-NEXT: store i32 [[Y:%.*]], ptr [[TMP1]], align 4
+; ABI-NEXT: call void @sink(ptr [[VARARG_BUFFER]])
+; ABI-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr [[VARARG_BUFFER]])
+; ABI-NEXT: ret void
+;
+entry:
+ tail call void (...) @sink(ptr byref(%struct.libcS) align 8 %x, i32 %y)
+ ret void
+}
diff --git a/llvm/test/Transforms/ExpandVariadics/pass-indirect.ll b/llvm/test/Transforms/ExpandVariadics/pass-indirect.ll
new file mode 100644
index 000000000000..8dcbb86d02d6
--- /dev/null
+++ b/llvm/test/Transforms/ExpandVariadics/pass-indirect.ll
@@ -0,0 +1,59 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -mtriple=wasm32-unknown-unknown -S --passes=expand-variadics --expand-variadics-override=optimize < %s | FileCheck %s -check-prefixes=OPT
+; RUN: opt -mtriple=wasm32-unknown-unknown -S --passes=expand-variadics --expand-variadics-override=lowering < %s | FileCheck %s -check-prefixes=ABI
+; REQUIRES: webassembly-registered-target
+
+; CHECK: @sink
+declare void @sink(...)
+
+%struct.libcS = type { i8, i16, i32, i32, float, double }
+
+define void @i32_libcS(i32 %x, %struct.libcS %y) {
+; OPT-LABEL: @i32_libcS(
+; OPT-NEXT: entry:
+; OPT-NEXT: tail call void (...) @sink(i32 [[X:%.*]], [[STRUCT_LIBCS:%.*]] [[Y:%.*]])
+; OPT-NEXT: ret void
+;
+; ABI-LABEL: @i32_libcS(
+; ABI-NEXT: entry:
+; ABI-NEXT: [[INDIRECTALLOCA:%.*]] = alloca [[STRUCT_LIBCS:%.*]], align 8
+; ABI-NEXT: [[VARARG_BUFFER:%.*]] = alloca [[I32_LIBCS_VARARG:%.*]], align 16
+; ABI-NEXT: store [[STRUCT_LIBCS]] [[Y:%.*]], ptr [[INDIRECTALLOCA]], align 8
+; ABI-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr [[VARARG_BUFFER]])
+; ABI-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[I32_LIBCS_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 0
+; ABI-NEXT: store i32 [[X:%.*]], ptr [[TMP0]], align 4
+; ABI-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[I32_LIBCS_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 1
+; ABI-NEXT: store ptr [[INDIRECTALLOCA]], ptr [[TMP1]], align 4
+; ABI-NEXT: call void @sink(ptr [[VARARG_BUFFER]])
+; ABI-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr [[VARARG_BUFFER]])
+; ABI-NEXT: ret void
+;
+entry:
+ tail call void (...) @sink(i32 %x, %struct.libcS %y)
+ ret void
+}
+
+define void @libcS_i32(%struct.libcS %x, i32 %y) {
+; OPT-LABEL: @libcS_i32(
+; OPT-NEXT: entry:
+; OPT-NEXT: tail call void (...) @sink([[STRUCT_LIBCS:%.*]] [[X:%.*]], i32 [[Y:%.*]])
+; OPT-NEXT: ret void
+;
+; ABI-LABEL: @libcS_i32(
+; ABI-NEXT: entry:
+; ABI-NEXT: [[INDIRECTALLOCA:%.*]] = alloca [[STRUCT_LIBCS:%.*]], align 8
+; ABI-NEXT: [[VARARG_BUFFER:%.*]] = alloca [[LIBCS_I32_VARARG:%.*]], align 16
+; ABI-NEXT: store [[STRUCT_LIBCS]] [[X:%.*]], ptr [[INDIRECTALLOCA]], align 8
+; ABI-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr [[VARARG_BUFFER]])
+; ABI-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[LIBCS_I32_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 0
+; ABI-NEXT: store ptr [[INDIRECTALLOCA]], ptr [[TMP0]], align 4
+; ABI-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[LIBCS_I32_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 1
+; ABI-NEXT: store i32 [[Y:%.*]], ptr [[TMP1]], align 4
+; ABI-NEXT: call void @sink(ptr [[VARARG_BUFFER]])
+; ABI-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr [[VARARG_BUFFER]])
+; ABI-NEXT: ret void
+;
+entry:
+ tail call void (...) @sink(%struct.libcS %x, i32 %y)
+ ret void
+}
diff --git a/llvm/test/Transforms/ExpandVariadics/pass-integers.ll b/llvm/test/Transforms/ExpandVariadics/pass-integers.ll
new file mode 100644
index 000000000000..a1cb6811800c
--- /dev/null
+++ b/llvm/test/Transforms/ExpandVariadics/pass-integers.ll
@@ -0,0 +1,345 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -mtriple=wasm32-unknown-unknown -S --passes=expand-variadics --expand-variadics-override=optimize < %s | FileCheck %s -check-prefixes=OPT
+; RUN: opt -mtriple=wasm32-unknown-unknown -S --passes=expand-variadics --expand-variadics-override=lowering < %s | FileCheck %s -check-prefixes=ABI
+; REQUIRES: webassembly-registered-target
+
+; Wasm passes struct {char} as an i8 so can check the varargs passing works on integers smaller than the slot size
+
+declare void @sink(...)
+
+
+define void @pass_nothing() {
+; OPT-LABEL: @pass_nothing(
+; OPT-NEXT: entry:
+; OPT-NEXT: tail call void (...) @sink()
+; OPT-NEXT: ret void
+;
+; ABI-LABEL: @pass_nothing(
+; ABI-NEXT: entry:
+; ABI-NEXT: [[VARARG_BUFFER:%.*]] = alloca [[PASS_NOTHING_VARARG:%.*]], align 16
+; ABI-NEXT: call void @llvm.lifetime.start.p0(i64 1, ptr [[VARARG_BUFFER]])
+; ABI-NEXT: call void @sink(ptr [[VARARG_BUFFER]])
+; ABI-NEXT: call void @llvm.lifetime.end.p0(i64 1, ptr [[VARARG_BUFFER]])
+; ABI-NEXT: ret void
+;
+entry:
+ tail call void (...) @sink()
+ ret void
+}
+
+define void @pass_s1(i8 %x) {
+; OPT-LABEL: @pass_s1(
+; OPT-NEXT: entry:
+; OPT-NEXT: tail call void (...) @sink(i8 [[X:%.*]])
+; OPT-NEXT: ret void
+;
+; ABI-LABEL: @pass_s1(
+; ABI-NEXT: entry:
+; ABI-NEXT: [[VARARG_BUFFER:%.*]] = alloca [[PASS_S1_VARARG:%.*]], align 16
+; ABI-NEXT: call void @llvm.lifetime.start.p0(i64 1, ptr [[VARARG_BUFFER]])
+; ABI-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[PASS_S1_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 0
+; ABI-NEXT: store i8 [[X:%.*]], ptr [[TMP0]], align 1
+; ABI-NEXT: call void @sink(ptr [[VARARG_BUFFER]])
+; ABI-NEXT: call void @llvm.lifetime.end.p0(i64 1, ptr [[VARARG_BUFFER]])
+; ABI-NEXT: ret void
+;
+entry:
+ tail call void (...) @sink(i8 %x)
+ ret void
+}
+
+define void @pass_s2(i16 %x) {
+; OPT-LABEL: @pass_s2(
+; OPT-NEXT: entry:
+; OPT-NEXT: tail call void (...) @sink(i16 [[X:%.*]])
+; OPT-NEXT: ret void
+;
+; ABI-LABEL: @pass_s2(
+; ABI-NEXT: entry:
+; ABI-NEXT: [[VARARG_BUFFER:%.*]] = alloca [[PASS_S2_VARARG:%.*]], align 16
+; ABI-NEXT: call void @llvm.lifetime.start.p0(i64 2, ptr [[VARARG_BUFFER]])
+; ABI-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[PASS_S2_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 0
+; ABI-NEXT: store i16 [[X:%.*]], ptr [[TMP0]], align 2
+; ABI-NEXT: call void @sink(ptr [[VARARG_BUFFER]])
+; ABI-NEXT: call void @llvm.lifetime.end.p0(i64 2, ptr [[VARARG_BUFFER]])
+; ABI-NEXT: ret void
+;
+entry:
+ tail call void (...) @sink(i16 %x)
+ ret void
+}
+
+define void @pass_s3(i32 %x) {
+; OPT-LABEL: @pass_s3(
+; OPT-NEXT: entry:
+; OPT-NEXT: tail call void (...) @sink(i32 [[X:%.*]])
+; OPT-NEXT: ret void
+;
+; ABI-LABEL: @pass_s3(
+; ABI-NEXT: entry:
+; ABI-NEXT: [[VARARG_BUFFER:%.*]] = alloca [[PASS_S3_VARARG:%.*]], align 16
+; ABI-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[VARARG_BUFFER]])
+; ABI-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[PASS_S3_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 0
+; ABI-NEXT: store i32 [[X:%.*]], ptr [[TMP0]], align 4
+; ABI-NEXT: call void @sink(ptr [[VARARG_BUFFER]])
+; ABI-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[VARARG_BUFFER]])
+; ABI-NEXT: ret void
+;
+entry:
+ tail call void (...) @sink(i32 %x)
+ ret void
+}
+
+define void @pass_s4(i64 %x) {
+; OPT-LABEL: @pass_s4(
+; OPT-NEXT: entry:
+; OPT-NEXT: tail call void (...) @sink(i64 [[X:%.*]])
+; OPT-NEXT: ret void
+;
+; ABI-LABEL: @pass_s4(
+; ABI-NEXT: entry:
+; ABI-NEXT: [[VARARG_BUFFER:%.*]] = alloca [[PASS_S4_VARARG:%.*]], align 16
+; ABI-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr [[VARARG_BUFFER]])
+; ABI-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[PASS_S4_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 0
+; ABI-NEXT: store i64 [[X:%.*]], ptr [[TMP0]], align 8
+; ABI-NEXT: call void @sink(ptr [[VARARG_BUFFER]])
+; ABI-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr [[VARARG_BUFFER]])
+; ABI-NEXT: ret void
+;
+entry:
+ tail call void (...) @sink(i64 %x)
+ ret void
+}
+
+define void @pass_s5(<4 x i32> %x) {
+; OPT-LABEL: @pass_s5(
+; OPT-NEXT: entry:
+; OPT-NEXT: tail call void (...) @sink(<4 x i32> [[X:%.*]])
+; OPT-NEXT: ret void
+;
+; ABI-LABEL: @pass_s5(
+; ABI-NEXT: entry:
+; ABI-NEXT: [[VARARG_BUFFER:%.*]] = alloca [[PASS_S5_VARARG:%.*]], align 16
+; ABI-NEXT: call void @llvm.lifetime.start.p0(i64 16, ptr [[VARARG_BUFFER]])
+; ABI-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[PASS_S5_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 0
+; ABI-NEXT: store <4 x i32> [[X:%.*]], ptr [[TMP0]], align 16
+; ABI-NEXT: call void @sink(ptr [[VARARG_BUFFER]])
+; ABI-NEXT: call void @llvm.lifetime.end.p0(i64 16, ptr [[VARARG_BUFFER]])
+; ABI-NEXT: ret void
+;
+entry:
+ tail call void (...) @sink(<4 x i32> %x)
+ ret void
+}
+
+define void @pass_int_s1(i32 %i, i8 %x) {
+; OPT-LABEL: @pass_int_s1(
+; OPT-NEXT: entry:
+; OPT-NEXT: tail call void (...) @sink(i32 [[I:%.*]], i8 [[X:%.*]])
+; OPT-NEXT: ret void
+;
+; ABI-LABEL: @pass_int_s1(
+; ABI-NEXT: entry:
+; ABI-NEXT: [[VARARG_BUFFER:%.*]] = alloca [[PASS_INT_S1_VARARG:%.*]], align 16
+; ABI-NEXT: call void @llvm.lifetime.start.p0(i64 5, ptr [[VARARG_BUFFER]])
+; ABI-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[PASS_INT_S1_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 0
+; ABI-NEXT: store i32 [[I:%.*]], ptr [[TMP0]], align 4
+; ABI-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[PASS_INT_S1_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 1
+; ABI-NEXT: store i8 [[X:%.*]], ptr [[TMP1]], align 1
+; ABI-NEXT: call void @sink(ptr [[VARARG_BUFFER]])
+; ABI-NEXT: call void @llvm.lifetime.end.p0(i64 5, ptr [[VARARG_BUFFER]])
+; ABI-NEXT: ret void
+;
+entry:
+ tail call void (...) @sink(i32 %i, i8 %x)
+ ret void
+}
+
+define void @pass_int_s2(i32 %i, i16 %x) {
+; OPT-LABEL: @pass_int_s2(
+; OPT-NEXT: entry:
+; OPT-NEXT: tail call void (...) @sink(i32 [[I:%.*]], i16 [[X:%.*]])
+; OPT-NEXT: ret void
+;
+; ABI-LABEL: @pass_int_s2(
+; ABI-NEXT: entry:
+; ABI-NEXT: [[VARARG_BUFFER:%.*]] = alloca [[PASS_INT_S2_VARARG:%.*]], align 16
+; ABI-NEXT: call void @llvm.lifetime.start.p0(i64 6, ptr [[VARARG_BUFFER]])
+; ABI-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[PASS_INT_S2_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 0
+; ABI-NEXT: store i32 [[I:%.*]], ptr [[TMP0]], align 4
+; ABI-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[PASS_INT_S2_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 1
+; ABI-NEXT: store i16 [[X:%.*]], ptr [[TMP1]], align 2
+; ABI-NEXT: call void @sink(ptr [[VARARG_BUFFER]])
+; ABI-NEXT: call void @llvm.lifetime.end.p0(i64 6, ptr [[VARARG_BUFFER]])
+; ABI-NEXT: ret void
+;
+entry:
+ tail call void (...) @sink(i32 %i, i16 %x)
+ ret void
+}
+
+define void @pass_int_s3(i32 %i, i32 %x) {
+; OPT-LABEL: @pass_int_s3(
+; OPT-NEXT: entry:
+; OPT-NEXT: tail call void (...) @sink(i32 [[I:%.*]], i32 [[X:%.*]])
+; OPT-NEXT: ret void
+;
+; ABI-LABEL: @pass_int_s3(
+; ABI-NEXT: entry:
+; ABI-NEXT: [[VARARG_BUFFER:%.*]] = alloca [[PASS_INT_S3_VARARG:%.*]], align 16
+; ABI-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr [[VARARG_BUFFER]])
+; ABI-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[PASS_INT_S3_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 0
+; ABI-NEXT: store i32 [[I:%.*]], ptr [[TMP0]], align 4
+; ABI-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[PASS_INT_S3_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 1
+; ABI-NEXT: store i32 [[X:%.*]], ptr [[TMP1]], align 4
+; ABI-NEXT: call void @sink(ptr [[VARARG_BUFFER]])
+; ABI-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr [[VARARG_BUFFER]])
+; ABI-NEXT: ret void
+;
+entry:
+ tail call void (...) @sink(i32 %i, i32 %x)
+ ret void
+}
+
+define void @pass_int_s4(i32 %i, i64 %x) {
+; OPT-LABEL: @pass_int_s4(
+; OPT-NEXT: entry:
+; OPT-NEXT: tail call void (...) @sink(i32 [[I:%.*]], i64 [[X:%.*]])
+; OPT-NEXT: ret void
+;
+; ABI-LABEL: @pass_int_s4(
+; ABI-NEXT: entry:
+; ABI-NEXT: [[VARARG_BUFFER:%.*]] = alloca [[PASS_INT_S4_VARARG:%.*]], align 16
+; ABI-NEXT: call void @llvm.lifetime.start.p0(i64 16, ptr [[VARARG_BUFFER]])
+; ABI-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[PASS_INT_S4_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 0
+; ABI-NEXT: store i32 [[I:%.*]], ptr [[TMP0]], align 4
+; ABI-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[PASS_INT_S4_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 2
+; ABI-NEXT: store i64 [[X:%.*]], ptr [[TMP1]], align 8
+; ABI-NEXT: call void @sink(ptr [[VARARG_BUFFER]])
+; ABI-NEXT: call void @llvm.lifetime.end.p0(i64 16, ptr [[VARARG_BUFFER]])
+; ABI-NEXT: ret void
+;
+entry:
+ tail call void (...) @sink(i32 %i, i64 %x)
+ ret void
+}
+
+define void @pass_int_s5(i32 %i, <4 x i32> %x) {
+; OPT-LABEL: @pass_int_s5(
+; OPT-NEXT: entry:
+; OPT-NEXT: tail call void (...) @sink(i32 [[I:%.*]], <4 x i32> [[X:%.*]])
+; OPT-NEXT: ret void
+;
+; ABI-LABEL: @pass_int_s5(
+; ABI-NEXT: entry:
+; ABI-NEXT: [[VARARG_BUFFER:%.*]] = alloca [[PASS_INT_S5_VARARG:%.*]], align 16
+; ABI-NEXT: call void @llvm.lifetime.start.p0(i64 32, ptr [[VARARG_BUFFER]])
+; ABI-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[PASS_INT_S5_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 0
+; ABI-NEXT: store i32 [[I:%.*]], ptr [[TMP0]], align 4
+; ABI-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[PASS_INT_S5_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 2
+; ABI-NEXT: store <4 x i32> [[X:%.*]], ptr [[TMP1]], align 16
+; ABI-NEXT: call void @sink(ptr [[VARARG_BUFFER]])
+; ABI-NEXT: call void @llvm.lifetime.end.p0(i64 32, ptr [[VARARG_BUFFER]])
+; ABI-NEXT: ret void
+;
+entry:
+ tail call void (...) @sink(i32 %i, <4 x i32> %x)
+ ret void
+}
+
+define void @pass_asc(i8 %x1, i16 %x2, i32 %x3, i64 %x4, <4 x i32> %x5) {
+; OPT-LABEL: @pass_asc(
+; OPT-NEXT: entry:
+; OPT-NEXT: tail call void (...) @sink(i8 [[X1:%.*]], i16 [[X2:%.*]], i32 [[X3:%.*]], i64 [[X4:%.*]], <4 x i32> [[X5:%.*]])
+; OPT-NEXT: ret void
+;
+; ABI-LABEL: @pass_asc(
+; ABI-NEXT: entry:
+; ABI-NEXT: [[VARARG_BUFFER:%.*]] = alloca [[PASS_ASC_VARARG:%.*]], align 16
+; ABI-NEXT: call void @llvm.lifetime.start.p0(i64 48, ptr [[VARARG_BUFFER]])
+; ABI-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[PASS_ASC_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 0
+; ABI-NEXT: store i8 [[X1:%.*]], ptr [[TMP0]], align 1
+; ABI-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[PASS_ASC_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 2
+; ABI-NEXT: store i16 [[X2:%.*]], ptr [[TMP1]], align 2
+; ABI-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[PASS_ASC_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 4
+; ABI-NEXT: store i32 [[X3:%.*]], ptr [[TMP2]], align 4
+; ABI-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[PASS_ASC_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 6
+; ABI-NEXT: store i64 [[X4:%.*]], ptr [[TMP3]], align 8
+; ABI-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[PASS_ASC_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 8
+; ABI-NEXT: store <4 x i32> [[X5:%.*]], ptr [[TMP4]], align 16
+; ABI-NEXT: call void @sink(ptr [[VARARG_BUFFER]])
+; ABI-NEXT: call void @llvm.lifetime.end.p0(i64 48, ptr [[VARARG_BUFFER]])
+; ABI-NEXT: ret void
+;
+entry:
+ tail call void (...) @sink(i8 %x1, i16 %x2, i32 %x3, i64 %x4, <4 x i32> %x5)
+ ret void
+}
+
+define void @pass_dsc(<4 x i32> %x0, i64 %x1, i32 %x2, i16 %x3, i8 %x4) {
+; OPT-LABEL: @pass_dsc(
+; OPT-NEXT: entry:
+; OPT-NEXT: tail call void (...) @sink(<4 x i32> [[X0:%.*]], i64 [[X1:%.*]], i32 [[X2:%.*]], i16 [[X3:%.*]], i8 [[X4:%.*]])
+; OPT-NEXT: ret void
+;
+; ABI-LABEL: @pass_dsc(
+; ABI-NEXT: entry:
+; ABI-NEXT: [[VARARG_BUFFER:%.*]] = alloca [[PASS_DSC_VARARG:%.*]], align 16
+; ABI-NEXT: call void @llvm.lifetime.start.p0(i64 33, ptr [[VARARG_BUFFER]])
+; ABI-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[PASS_DSC_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 0
+; ABI-NEXT: store <4 x i32> [[X0:%.*]], ptr [[TMP0]], align 16
+; ABI-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[PASS_DSC_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 1
+; ABI-NEXT: store i64 [[X1:%.*]], ptr [[TMP1]], align 8
+; ABI-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[PASS_DSC_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 2
+; ABI-NEXT: store i32 [[X2:%.*]], ptr [[TMP2]], align 4
+; ABI-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[PASS_DSC_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 3
+; ABI-NEXT: store i16 [[X3:%.*]], ptr [[TMP3]], align 2
+; ABI-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[PASS_DSC_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 5
+; ABI-NEXT: store i8 [[X4:%.*]], ptr [[TMP4]], align 1
+; ABI-NEXT: call void @sink(ptr [[VARARG_BUFFER]])
+; ABI-NEXT: call void @llvm.lifetime.end.p0(i64 33, ptr [[VARARG_BUFFER]])
+; ABI-NEXT: ret void
+;
+entry:
+ tail call void (...) @sink(<4 x i32> %x0, i64 %x1, i32 %x2, i16 %x3, i8 %x4)
+ ret void
+}
+
+define void @pass_multiple(i32 %i, i8 %x1, i16 %x2, i32 %x3, i64 %x4, <4 x i32> %x5) {
+; OPT-LABEL: @pass_multiple(
+; OPT-NEXT: entry:
+; OPT-NEXT: tail call void (...) @sink(i32 [[I:%.*]], i16 [[X2:%.*]], i64 [[X4:%.*]])
+; OPT-NEXT: tail call void (...) @sink(i32 [[I]], i8 [[X1:%.*]], i32 [[X3:%.*]], <4 x i32> [[X5:%.*]])
+; OPT-NEXT: ret void
+;
+; ABI-LABEL: @pass_multiple(
+; ABI-NEXT: entry:
+; ABI-NEXT: [[VARARG_BUFFER:%.*]] = alloca [[PASS_MULTIPLE_VARARG:%.*]], align 16
+; ABI-NEXT: [[VARARG_BUFFER1:%.*]] = alloca [[PASS_MULTIPLE_VARARG_0:%.*]], align 16
+; ABI-NEXT: call void @llvm.lifetime.start.p0(i64 16, ptr [[VARARG_BUFFER]])
+; ABI-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[PASS_MULTIPLE_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 0
+; ABI-NEXT: store i32 [[I:%.*]], ptr [[TMP0]], align 4
+; ABI-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[PASS_MULTIPLE_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 1
+; ABI-NEXT: store i16 [[X2:%.*]], ptr [[TMP1]], align 2
+; ABI-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[PASS_MULTIPLE_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 3
+; ABI-NEXT: store i64 [[X4:%.*]], ptr [[TMP2]], align 8
+; ABI-NEXT: call void @sink(ptr [[VARARG_BUFFER]])
+; ABI-NEXT: call void @llvm.lifetime.end.p0(i64 16, ptr [[VARARG_BUFFER]])
+; ABI-NEXT: call void @llvm.lifetime.start.p0(i64 32, ptr [[VARARG_BUFFER1]])
+; ABI-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[PASS_MULTIPLE_VARARG_0]], ptr [[VARARG_BUFFER1]], i32 0, i32 0
+; ABI-NEXT: store i32 [[I]], ptr [[TMP3]], align 4
+; ABI-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[PASS_MULTIPLE_VARARG_0]], ptr [[VARARG_BUFFER1]], i32 0, i32 1
+; ABI-NEXT: store i8 [[X1:%.*]], ptr [[TMP4]], align 1
+; ABI-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[PASS_MULTIPLE_VARARG_0]], ptr [[VARARG_BUFFER1]], i32 0, i32 3
+; ABI-NEXT: store i32 [[X3:%.*]], ptr [[TMP5]], align 4
+; ABI-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[PASS_MULTIPLE_VARARG_0]], ptr [[VARARG_BUFFER1]], i32 0, i32 5
+; ABI-NEXT: store <4 x i32> [[X5:%.*]], ptr [[TMP6]], align 16
+; ABI-NEXT: call void @sink(ptr [[VARARG_BUFFER1]])
+; ABI-NEXT: call void @llvm.lifetime.end.p0(i64 32, ptr [[VARARG_BUFFER1]])
+; ABI-NEXT: ret void
+;
+entry:
+ tail call void (...) @sink(i32 %i, i16 %x2, i64 %x4)
+ tail call void (...) @sink(i32 %i, i8 %x1, i32 %x3, <4 x i32> %x5)
+ ret void
+}
diff --git a/llvm/test/Transforms/InstCombine/abs-1.ll b/llvm/test/Transforms/InstCombine/abs-1.ll
index 32bd7a37053e..0cf7cd97d8ff 100644
--- a/llvm/test/Transforms/InstCombine/abs-1.ll
+++ b/llvm/test/Transforms/InstCombine/abs-1.ll
@@ -852,11 +852,8 @@ define i8 @abs_diff_signed_sgt_nuw_extra_use3(i8 %a, i8 %b) {
define i32 @abs_diff_signed_slt_swap_wrong_pred1(i32 %a, i32 %b) {
; CHECK-LABEL: @abs_diff_signed_slt_swap_wrong_pred1(
-; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[A:%.*]], [[B:%.*]]
-; CHECK-NEXT: [[SUB_BA:%.*]] = sub nsw i32 [[B]], [[A]]
-; CHECK-NEXT: [[SUB_AB:%.*]] = sub nsw i32 [[A]], [[B]]
-; CHECK-NEXT: [[COND:%.*]] = select i1 [[CMP]], i32 [[SUB_BA]], i32 [[SUB_AB]]
-; CHECK-NEXT: ret i32 [[COND]]
+; CHECK-NEXT: [[SUB_AB:%.*]] = sub nsw i32 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT: ret i32 [[SUB_AB]]
;
%cmp = icmp eq i32 %a, %b
%sub_ba = sub nsw i32 %b, %a
diff --git a/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-sge-to-icmp-sle.ll b/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-sge-to-icmp-sle.ll
index ae503bfb1cfe..e103fe944098 100644
--- a/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-sge-to-icmp-sle.ll
+++ b/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-sge-to-icmp-sle.ll
@@ -98,8 +98,7 @@ declare i8 @gen8()
define i1 @c0() {
; CHECK-LABEL: @c0(
; CHECK-NEXT: [[X:%.*]] = call i8 @gen8()
-; CHECK-NEXT: [[TMP0:%.*]] = and i8 [[X]], 3
-; CHECK-NEXT: [[RET:%.*]] = icmp sge i8 [[X]], [[TMP0]]
+; CHECK-NEXT: [[RET:%.*]] = icmp sgt i8 [[X]], -1
; CHECK-NEXT: ret i1 [[RET]]
;
%x = call i8 @gen8()
diff --git a/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-sgt-to-icmp-sgt.ll b/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-sgt-to-icmp-sgt.ll
index d1dd411ee86b..bbd733e86a32 100644
--- a/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-sgt-to-icmp-sgt.ll
+++ b/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-sgt-to-icmp-sgt.ll
@@ -125,8 +125,7 @@ define i1 @oneuse0() {
define i1 @c0(i8 %x) {
; CHECK-LABEL: @c0(
-; CHECK-NEXT: [[TMP0:%.*]] = and i8 [[X:%.*]], 3
-; CHECK-NEXT: [[RET:%.*]] = icmp sgt i8 [[TMP0]], [[X]]
+; CHECK-NEXT: [[RET:%.*]] = icmp slt i8 [[X:%.*]], 0
; CHECK-NEXT: ret i1 [[RET]]
;
%tmp0 = and i8 %x, 3
diff --git a/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-sle-to-icmp-sle.ll b/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-sle-to-icmp-sle.ll
index 4bed21a525f0..b167c8ad25aa 100644
--- a/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-sle-to-icmp-sle.ll
+++ b/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-sle-to-icmp-sle.ll
@@ -113,8 +113,7 @@ define i1 @oneuse0() {
define i1 @c0(i8 %x) {
; CHECK-LABEL: @c0(
-; CHECK-NEXT: [[TMP0:%.*]] = and i8 [[X:%.*]], 3
-; CHECK-NEXT: [[RET:%.*]] = icmp sle i8 [[TMP0]], [[X]]
+; CHECK-NEXT: [[RET:%.*]] = icmp sgt i8 [[X:%.*]], -1
; CHECK-NEXT: ret i1 [[RET]]
;
%tmp0 = and i8 %x, 3
diff --git a/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-slt-to-icmp-sgt.ll b/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-slt-to-icmp-sgt.ll
index 8415204a4915..828150244773 100644
--- a/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-slt-to-icmp-sgt.ll
+++ b/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-slt-to-icmp-sgt.ll
@@ -108,8 +108,7 @@ declare i8 @gen8()
define i1 @c0() {
; CHECK-LABEL: @c0(
; CHECK-NEXT: [[X:%.*]] = call i8 @gen8()
-; CHECK-NEXT: [[TMP0:%.*]] = and i8 [[X]], 3
-; CHECK-NEXT: [[RET:%.*]] = icmp slt i8 [[X]], [[TMP0]]
+; CHECK-NEXT: [[RET:%.*]] = icmp slt i8 [[X]], 0
; CHECK-NEXT: ret i1 [[RET]]
;
%x = call i8 @gen8()
diff --git a/llvm/test/Transforms/InstCombine/icmp-and-lowbit-mask.ll b/llvm/test/Transforms/InstCombine/icmp-and-lowbit-mask.ll
index 8bb7fd0e522c..0aace5f52c96 100644
--- a/llvm/test/Transforms/InstCombine/icmp-and-lowbit-mask.ll
+++ b/llvm/test/Transforms/InstCombine/icmp-and-lowbit-mask.ll
@@ -7,8 +7,8 @@ define i1 @src_is_mask_zext(i16 %x_in, i8 %y) {
; CHECK-LABEL: @src_is_mask_zext(
; CHECK-NEXT: [[M_IN:%.*]] = lshr i8 -1, [[Y:%.*]]
; CHECK-NEXT: [[MASK:%.*]] = zext i8 [[M_IN]] to i16
-; CHECK-NEXT: [[X:%.*]] = xor i16 [[X_IN:%.*]], 123
-; CHECK-NEXT: [[R:%.*]] = icmp ule i16 [[X]], [[MASK]]
+; CHECK-NEXT: [[TMP1:%.*]] = xor i16 [[X_IN:%.*]], 123
+; CHECK-NEXT: [[R:%.*]] = icmp ule i16 [[TMP1]], [[MASK]]
; CHECK-NEXT: ret i1 [[R]]
;
%x = xor i16 %x_in, 123
@@ -83,8 +83,8 @@ define i1 @src_is_mask_and(i8 %x_in, i8 %y, i8 %z) {
; CHECK-NEXT: [[MY:%.*]] = lshr i8 7, [[Y:%.*]]
; CHECK-NEXT: [[MZ:%.*]] = lshr i8 -1, [[Z:%.*]]
; CHECK-NEXT: [[MASK:%.*]] = and i8 [[MY]], [[MZ]]
-; CHECK-NEXT: [[X:%.*]] = xor i8 [[X_IN:%.*]], 123
-; CHECK-NEXT: [[R:%.*]] = icmp ule i8 [[X]], [[MASK]]
+; CHECK-NEXT: [[TMP1:%.*]] = xor i8 [[X_IN:%.*]], 123
+; CHECK-NEXT: [[R:%.*]] = icmp ule i8 [[TMP1]], [[MASK]]
; CHECK-NEXT: ret i1 [[R]]
;
%x = xor i8 %x_in, 123
@@ -121,8 +121,8 @@ define i1 @src_is_mask_or(i8 %x_in, i8 %y) {
; CHECK-LABEL: @src_is_mask_or(
; CHECK-NEXT: [[MY:%.*]] = lshr i8 -1, [[Y:%.*]]
; CHECK-NEXT: [[MASK:%.*]] = and i8 [[MY]], 7
-; CHECK-NEXT: [[X:%.*]] = xor i8 [[X_IN:%.*]], 123
-; CHECK-NEXT: [[R:%.*]] = icmp ule i8 [[X]], [[MASK]]
+; CHECK-NEXT: [[TMP1:%.*]] = xor i8 [[X_IN:%.*]], 123
+; CHECK-NEXT: [[R:%.*]] = icmp ule i8 [[TMP1]], [[MASK]]
; CHECK-NEXT: ret i1 [[R]]
;
%x = xor i8 %x_in, 123
@@ -138,8 +138,8 @@ define i1 @src_is_mask_xor(i8 %x_in, i8 %y) {
; CHECK-LABEL: @src_is_mask_xor(
; CHECK-NEXT: [[Y_M1:%.*]] = add i8 [[Y:%.*]], -1
; CHECK-NEXT: [[MASK:%.*]] = xor i8 [[Y_M1]], [[Y]]
-; CHECK-NEXT: [[X:%.*]] = xor i8 [[X_IN:%.*]], 123
-; CHECK-NEXT: [[R:%.*]] = icmp ugt i8 [[X]], [[MASK]]
+; CHECK-NEXT: [[TMP1:%.*]] = xor i8 [[X_IN:%.*]], 123
+; CHECK-NEXT: [[R:%.*]] = icmp ugt i8 [[TMP1]], [[MASK]]
; CHECK-NEXT: ret i1 [[R]]
;
%x = xor i8 %x_in, 123
@@ -173,8 +173,8 @@ define i1 @src_is_mask_select(i8 %x_in, i8 %y, i1 %cond) {
; CHECK-NEXT: [[Y_M1:%.*]] = add i8 [[Y:%.*]], -1
; CHECK-NEXT: [[YMASK:%.*]] = xor i8 [[Y_M1]], [[Y]]
; CHECK-NEXT: [[MASK:%.*]] = select i1 [[COND:%.*]], i8 [[YMASK]], i8 15
-; CHECK-NEXT: [[X:%.*]] = xor i8 [[X_IN:%.*]], 123
-; CHECK-NEXT: [[R:%.*]] = icmp ugt i8 [[X]], [[MASK]]
+; CHECK-NEXT: [[TMP1:%.*]] = xor i8 [[X_IN:%.*]], 123
+; CHECK-NEXT: [[R:%.*]] = icmp ugt i8 [[TMP1]], [[MASK]]
; CHECK-NEXT: ret i1 [[R]]
;
%x = xor i8 %x_in, 123
@@ -249,8 +249,8 @@ define i1 @src_is_mask_lshr(i8 %x_in, i8 %y, i8 %z, i1 %cond) {
; CHECK-NEXT: [[YMASK:%.*]] = xor i8 [[Y_M1]], [[Y]]
; CHECK-NEXT: [[SMASK:%.*]] = select i1 [[COND:%.*]], i8 [[YMASK]], i8 15
; CHECK-NEXT: [[MASK:%.*]] = lshr i8 [[SMASK]], [[Z:%.*]]
-; CHECK-NEXT: [[X:%.*]] = xor i8 [[X_IN:%.*]], 123
-; CHECK-NEXT: [[R:%.*]] = icmp ugt i8 [[X]], [[MASK]]
+; CHECK-NEXT: [[TMP1:%.*]] = xor i8 [[X_IN:%.*]], 123
+; CHECK-NEXT: [[R:%.*]] = icmp ugt i8 [[TMP1]], [[MASK]]
; CHECK-NEXT: ret i1 [[R]]
;
%x = xor i8 %x_in, 123
@@ -269,8 +269,8 @@ define i1 @src_is_mask_ashr(i8 %x_in, i8 %y, i8 %z, i1 %cond) {
; CHECK-NEXT: [[YMASK:%.*]] = xor i8 [[Y_M1]], [[Y]]
; CHECK-NEXT: [[SMASK:%.*]] = select i1 [[COND:%.*]], i8 [[YMASK]], i8 15
; CHECK-NEXT: [[MASK:%.*]] = ashr i8 [[SMASK]], [[Z:%.*]]
-; CHECK-NEXT: [[X:%.*]] = xor i8 [[X_IN:%.*]], 123
-; CHECK-NEXT: [[R:%.*]] = icmp ugt i8 [[X]], [[MASK]]
+; CHECK-NEXT: [[TMP1:%.*]] = xor i8 [[X_IN:%.*]], 123
+; CHECK-NEXT: [[R:%.*]] = icmp ugt i8 [[TMP1]], [[MASK]]
; CHECK-NEXT: ret i1 [[R]]
;
%x = xor i8 %x_in, 123
@@ -287,8 +287,8 @@ define i1 @src_is_mask_p2_m1(i8 %x_in, i8 %y) {
; CHECK-LABEL: @src_is_mask_p2_m1(
; CHECK-NEXT: [[P2ORZ:%.*]] = shl i8 2, [[Y:%.*]]
; CHECK-NEXT: [[MASK:%.*]] = add i8 [[P2ORZ]], -1
-; CHECK-NEXT: [[X:%.*]] = xor i8 [[X_IN:%.*]], 123
-; CHECK-NEXT: [[R:%.*]] = icmp ugt i8 [[X]], [[MASK]]
+; CHECK-NEXT: [[TMP1:%.*]] = xor i8 [[X_IN:%.*]], 123
+; CHECK-NEXT: [[R:%.*]] = icmp ugt i8 [[TMP1]], [[MASK]]
; CHECK-NEXT: ret i1 [[R]]
;
%x = xor i8 %x_in, 123
@@ -304,8 +304,8 @@ define i1 @src_is_mask_umax(i8 %x_in, i8 %y) {
; CHECK-NEXT: [[Y_M1:%.*]] = add i8 [[Y:%.*]], -1
; CHECK-NEXT: [[YMASK:%.*]] = xor i8 [[Y_M1]], [[Y]]
; CHECK-NEXT: [[MASK:%.*]] = call i8 @llvm.umax.i8(i8 [[YMASK]], i8 3)
-; CHECK-NEXT: [[X:%.*]] = xor i8 [[X_IN:%.*]], 123
-; CHECK-NEXT: [[R:%.*]] = icmp ugt i8 [[X]], [[MASK]]
+; CHECK-NEXT: [[TMP1:%.*]] = xor i8 [[X_IN:%.*]], 123
+; CHECK-NEXT: [[R:%.*]] = icmp ugt i8 [[TMP1]], [[MASK]]
; CHECK-NEXT: ret i1 [[R]]
;
%x = xor i8 %x_in, 123
@@ -324,8 +324,8 @@ define i1 @src_is_mask_umin(i8 %x_in, i8 %y, i8 %z) {
; CHECK-NEXT: [[YMASK:%.*]] = xor i8 [[Y_M1]], [[Y]]
; CHECK-NEXT: [[ZMASK:%.*]] = lshr i8 15, [[Z:%.*]]
; CHECK-NEXT: [[MASK:%.*]] = call i8 @llvm.umin.i8(i8 [[YMASK]], i8 [[ZMASK]])
-; CHECK-NEXT: [[X:%.*]] = xor i8 [[X_IN:%.*]], 123
-; CHECK-NEXT: [[R:%.*]] = icmp ugt i8 [[X]], [[MASK]]
+; CHECK-NEXT: [[TMP1:%.*]] = xor i8 [[X_IN:%.*]], 123
+; CHECK-NEXT: [[R:%.*]] = icmp ugt i8 [[TMP1]], [[MASK]]
; CHECK-NEXT: ret i1 [[R]]
;
%x = xor i8 %x_in, 123
@@ -364,8 +364,8 @@ define i1 @src_is_mask_smax(i8 %x_in, i8 %y) {
; CHECK-NEXT: [[Y_M1:%.*]] = add i8 [[Y:%.*]], -1
; CHECK-NEXT: [[YMASK:%.*]] = xor i8 [[Y_M1]], [[Y]]
; CHECK-NEXT: [[MASK:%.*]] = call i8 @llvm.smax.i8(i8 [[YMASK]], i8 -1)
-; CHECK-NEXT: [[X:%.*]] = xor i8 [[X_IN:%.*]], 123
-; CHECK-NEXT: [[R:%.*]] = icmp ule i8 [[X]], [[MASK]]
+; CHECK-NEXT: [[TMP1:%.*]] = xor i8 [[X_IN:%.*]], 123
+; CHECK-NEXT: [[R:%.*]] = icmp ule i8 [[TMP1]], [[MASK]]
; CHECK-NEXT: ret i1 [[R]]
;
%x = xor i8 %x_in, 123
@@ -383,8 +383,8 @@ define i1 @src_is_mask_smin(i8 %x_in, i8 %y) {
; CHECK-NEXT: [[Y_M1:%.*]] = add i8 [[Y:%.*]], -1
; CHECK-NEXT: [[YMASK:%.*]] = xor i8 [[Y_M1]], [[Y]]
; CHECK-NEXT: [[MASK:%.*]] = call i8 @llvm.smin.i8(i8 [[YMASK]], i8 0)
-; CHECK-NEXT: [[X:%.*]] = xor i8 [[X_IN:%.*]], 123
-; CHECK-NEXT: [[R:%.*]] = icmp ule i8 [[X]], [[MASK]]
+; CHECK-NEXT: [[TMP1:%.*]] = xor i8 [[X_IN:%.*]], 123
+; CHECK-NEXT: [[R:%.*]] = icmp ule i8 [[TMP1]], [[MASK]]
; CHECK-NEXT: ret i1 [[R]]
;
%x = xor i8 %x_in, 123
@@ -401,8 +401,8 @@ define i1 @src_is_mask_bitreverse_not_mask(i8 %x_in, i8 %y) {
; CHECK-LABEL: @src_is_mask_bitreverse_not_mask(
; CHECK-NEXT: [[NMASK:%.*]] = shl nsw i8 -1, [[Y:%.*]]
; CHECK-NEXT: [[MASK:%.*]] = call i8 @llvm.bitreverse.i8(i8 [[NMASK]])
-; CHECK-NEXT: [[X:%.*]] = xor i8 [[X_IN:%.*]], 123
-; CHECK-NEXT: [[R:%.*]] = icmp ule i8 [[X]], [[MASK]]
+; CHECK-NEXT: [[TMP1:%.*]] = xor i8 [[X_IN:%.*]], 123
+; CHECK-NEXT: [[R:%.*]] = icmp ule i8 [[TMP1]], [[MASK]]
; CHECK-NEXT: ret i1 [[R]]
;
%x = xor i8 %x_in, 123
@@ -455,9 +455,9 @@ define i1 @src_is_notmask_shl(i8 %x_in, i8 %y, i1 %cond) {
define i1 @src_is_notmask_x_xor_neg_x(i8 %x_in, i8 %y, i1 %cond) {
; CHECK-LABEL: @src_is_notmask_x_xor_neg_x(
; CHECK-NEXT: [[X:%.*]] = xor i8 [[X_IN:%.*]], 123
-; CHECK-NEXT: [[NEG_Y:%.*]] = add i8 [[Y:%.*]], -1
-; CHECK-NEXT: [[NOTMASK0:%.*]] = xor i8 [[NEG_Y]], [[Y]]
-; CHECK-NEXT: [[TMP3:%.*]] = select i1 [[COND:%.*]], i8 [[NOTMASK0]], i8 7
+; CHECK-NEXT: [[TMP1:%.*]] = add i8 [[Y:%.*]], -1
+; CHECK-NEXT: [[TMP2:%.*]] = xor i8 [[TMP1]], [[Y]]
+; CHECK-NEXT: [[TMP3:%.*]] = select i1 [[COND:%.*]], i8 [[TMP2]], i8 7
; CHECK-NEXT: [[R:%.*]] = icmp ule i8 [[X]], [[TMP3]]
; CHECK-NEXT: ret i1 [[R]]
;
@@ -473,9 +473,9 @@ define i1 @src_is_notmask_x_xor_neg_x(i8 %x_in, i8 %y, i1 %cond) {
define i1 @src_is_notmask_x_xor_neg_x_inv(i8 %x_in, i8 %y, i1 %cond) {
; CHECK-LABEL: @src_is_notmask_x_xor_neg_x_inv(
; CHECK-NEXT: [[X:%.*]] = xor i8 [[X_IN:%.*]], 123
-; CHECK-NEXT: [[NEG_Y:%.*]] = add i8 [[Y:%.*]], -1
-; CHECK-NEXT: [[NOTMASK0:%.*]] = xor i8 [[NEG_Y]], [[Y]]
-; CHECK-NEXT: [[TMP3:%.*]] = select i1 [[COND:%.*]], i8 [[NOTMASK0]], i8 7
+; CHECK-NEXT: [[TMP1:%.*]] = add i8 [[Y:%.*]], -1
+; CHECK-NEXT: [[TMP2:%.*]] = xor i8 [[TMP1]], [[Y]]
+; CHECK-NEXT: [[TMP3:%.*]] = select i1 [[COND:%.*]], i8 [[TMP2]], i8 7
; CHECK-NEXT: [[R:%.*]] = icmp ule i8 [[X]], [[TMP3]]
; CHECK-NEXT: ret i1 [[R]]
;
@@ -625,9 +625,7 @@ define i1 @src_is_notmask_xor_fail(i8 %x_in, i8 %y) {
define i1 @src_is_mask_const_slt(i8 %x_in) {
; CHECK-LABEL: @src_is_mask_const_slt(
-; CHECK-NEXT: [[X:%.*]] = xor i8 [[X_IN:%.*]], 123
-; CHECK-NEXT: [[AND:%.*]] = and i8 [[X]], 7
-; CHECK-NEXT: [[R:%.*]] = icmp slt i8 [[X]], [[AND]]
+; CHECK-NEXT: [[R:%.*]] = icmp slt i8 [[X_IN:%.*]], 0
; CHECK-NEXT: ret i1 [[R]]
;
%x = xor i8 %x_in, 123
@@ -650,9 +648,7 @@ define i1 @src_is_mask_const_sgt(i8 %x_in) {
define i1 @src_is_mask_const_sle(i8 %x_in) {
; CHECK-LABEL: @src_is_mask_const_sle(
-; CHECK-NEXT: [[X:%.*]] = xor i8 [[X_IN:%.*]], 123
-; CHECK-NEXT: [[AND:%.*]] = and i8 [[X]], 31
-; CHECK-NEXT: [[R:%.*]] = icmp sle i8 [[AND]], [[X]]
+; CHECK-NEXT: [[R:%.*]] = icmp sgt i8 [[X_IN:%.*]], -1
; CHECK-NEXT: ret i1 [[R]]
;
%x = xor i8 %x_in, 123
diff --git a/llvm/test/Transforms/InstCombine/icmp-of-and-x.ll b/llvm/test/Transforms/InstCombine/icmp-of-and-x.ll
index 0f26be12c39c..75badabda01a 100644
--- a/llvm/test/Transforms/InstCombine/icmp-of-and-x.ll
+++ b/llvm/test/Transforms/InstCombine/icmp-of-and-x.ll
@@ -58,7 +58,7 @@ define i1 @icmp_sge_x_negy(i8 %x, i8 %y) {
; CHECK-NEXT: [[CY:%.*]] = icmp slt i8 [[Y:%.*]], 0
; CHECK-NEXT: call void @llvm.assume(i1 [[CY]])
; CHECK-NEXT: [[AND:%.*]] = and i8 [[X:%.*]], [[Y]]
-; CHECK-NEXT: [[Z:%.*]] = icmp sge i8 [[AND]], [[X]]
+; CHECK-NEXT: [[Z:%.*]] = icmp eq i8 [[AND]], [[X]]
; CHECK-NEXT: ret i1 [[Z]]
;
%cy = icmp slt i8 %y, 0
@@ -74,7 +74,7 @@ define i1 @icmp_slt_x_negy(i8 %x, i8 %y) {
; CHECK-NEXT: br i1 [[CY]], label [[NEGY:%.*]], label [[POSY:%.*]]
; CHECK: negy:
; CHECK-NEXT: [[AND:%.*]] = and i8 [[X:%.*]], [[Y]]
-; CHECK-NEXT: [[Z:%.*]] = icmp slt i8 [[AND]], [[X]]
+; CHECK-NEXT: [[Z:%.*]] = icmp ne i8 [[AND]], [[X]]
; CHECK-NEXT: ret i1 [[Z]]
; CHECK: posy:
; CHECK-NEXT: [[R:%.*]] = call i1 @barrier()
@@ -116,10 +116,7 @@ posy:
define i1 @icmp_sle_x_negy(i8 %x, i8 %yy) {
; CHECK-LABEL: @icmp_sle_x_negy(
-; CHECK-NEXT: [[Y:%.*]] = or i8 [[YY:%.*]], -128
-; CHECK-NEXT: [[AND:%.*]] = and i8 [[Y]], [[X:%.*]]
-; CHECK-NEXT: [[Z:%.*]] = icmp sle i8 [[AND]], [[X]]
-; CHECK-NEXT: ret i1 [[Z]]
+; CHECK-NEXT: ret i1 true
;
%y = or i8 %yy, 128
%and = and i8 %y, %x
@@ -129,10 +126,7 @@ define i1 @icmp_sle_x_negy(i8 %x, i8 %yy) {
define <2 x i1> @icmp_sgt_x_negy(<2 x i8> %x, <2 x i8> %yy) {
; CHECK-LABEL: @icmp_sgt_x_negy(
-; CHECK-NEXT: [[Y:%.*]] = or <2 x i8> [[YY:%.*]], <i8 -128, i8 -128>
-; CHECK-NEXT: [[AND:%.*]] = and <2 x i8> [[Y]], [[X:%.*]]
-; CHECK-NEXT: [[Z:%.*]] = icmp sgt <2 x i8> [[AND]], [[X]]
-; CHECK-NEXT: ret <2 x i1> [[Z]]
+; CHECK-NEXT: ret <2 x i1> zeroinitializer
;
%y = or <2 x i8> %yy, <i8 128, i8 128>
%and = and <2 x i8> %y, %x
@@ -155,9 +149,7 @@ define <2 x i1> @icmp_sgt_x_negy_fail_partial(<2 x i8> %x, <2 x i8> %yy) {
define <2 x i1> @icmp_sle_x_posy(<2 x i8> %x, <2 x i8> %yy) {
; CHECK-LABEL: @icmp_sle_x_posy(
-; CHECK-NEXT: [[Y:%.*]] = and <2 x i8> [[YY:%.*]], <i8 127, i8 127>
-; CHECK-NEXT: [[AND:%.*]] = and <2 x i8> [[Y]], [[X:%.*]]
-; CHECK-NEXT: [[Z:%.*]] = icmp sle <2 x i8> [[AND]], [[X]]
+; CHECK-NEXT: [[Z:%.*]] = icmp sgt <2 x i8> [[X:%.*]], <i8 -1, i8 -1>
; CHECK-NEXT: ret <2 x i1> [[Z]]
;
%y = and <2 x i8> %yy, <i8 127, i8 127>
@@ -183,8 +175,7 @@ define i1 @icmp_sgt_x_posy(i8 %x, i8 %y) {
; CHECK-LABEL: @icmp_sgt_x_posy(
; CHECK-NEXT: [[CY:%.*]] = icmp sgt i8 [[Y:%.*]], -1
; CHECK-NEXT: call void @llvm.assume(i1 [[CY]])
-; CHECK-NEXT: [[AND:%.*]] = and i8 [[X:%.*]], [[Y]]
-; CHECK-NEXT: [[Z:%.*]] = icmp sgt i8 [[AND]], [[X]]
+; CHECK-NEXT: [[Z:%.*]] = icmp slt i8 [[X:%.*]], 0
; CHECK-NEXT: ret i1 [[Z]]
;
%cy = icmp sge i8 %y, 0
@@ -196,9 +187,7 @@ define i1 @icmp_sgt_x_posy(i8 %x, i8 %y) {
define <2 x i1> @icmp_sgt_negx_y(<2 x i8> %xx, <2 x i8> %y) {
; CHECK-LABEL: @icmp_sgt_negx_y(
-; CHECK-NEXT: [[X:%.*]] = or <2 x i8> [[XX:%.*]], <i8 -128, i8 -128>
-; CHECK-NEXT: [[AND:%.*]] = and <2 x i8> [[X]], [[Y:%.*]]
-; CHECK-NEXT: [[Z:%.*]] = icmp sgt <2 x i8> [[AND]], [[X]]
+; CHECK-NEXT: [[Z:%.*]] = icmp sgt <2 x i8> [[Y:%.*]], <i8 -1, i8 -1>
; CHECK-NEXT: ret <2 x i1> [[Z]]
;
%x = or <2 x i8> %xx, <i8 128, i8 128>
@@ -211,8 +200,7 @@ define i1 @icmp_sle_negx_y(i8 %x, i8 %y) {
; CHECK-LABEL: @icmp_sle_negx_y(
; CHECK-NEXT: [[CX:%.*]] = icmp slt i8 [[X:%.*]], 0
; CHECK-NEXT: call void @llvm.assume(i1 [[CX]])
-; CHECK-NEXT: [[AND:%.*]] = and i8 [[X]], [[Y:%.*]]
-; CHECK-NEXT: [[Z:%.*]] = icmp sle i8 [[AND]], [[X]]
+; CHECK-NEXT: [[Z:%.*]] = icmp slt i8 [[Y:%.*]], 0
; CHECK-NEXT: ret i1 [[Z]]
;
%cx = icmp slt i8 %x, 0
@@ -239,9 +227,9 @@ define i1 @icmp_sle_negx_y_fail_maybe_zero(i8 %x, i8 %y) {
define i1 @icmp_eq_x_invertable_y_todo(i8 %x, i1 %y) {
; CHECK-LABEL: @icmp_eq_x_invertable_y_todo(
-; CHECK-NEXT: [[YY:%.*]] = select i1 [[Y:%.*]], i8 -8, i8 -25
-; CHECK-NEXT: [[AND:%.*]] = and i8 [[YY]], [[X:%.*]]
-; CHECK-NEXT: [[R:%.*]] = icmp eq i8 [[AND]], 0
+; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[Y:%.*]], i8 -8, i8 -25
+; CHECK-NEXT: [[TMP2:%.*]] = and i8 [[TMP1]], [[X:%.*]]
+; CHECK-NEXT: [[R:%.*]] = icmp eq i8 [[TMP2]], 0
; CHECK-NEXT: ret i1 [[R]]
;
%yy = select i1 %y, i8 7, i8 24
@@ -252,8 +240,8 @@ define i1 @icmp_eq_x_invertable_y_todo(i8 %x, i1 %y) {
define i1 @icmp_eq_x_invertable_y(i8 %x, i8 %y) {
; CHECK-LABEL: @icmp_eq_x_invertable_y(
-; CHECK-NEXT: [[AND:%.*]] = and i8 [[YY:%.*]], [[X:%.*]]
-; CHECK-NEXT: [[R:%.*]] = icmp eq i8 [[AND]], 0
+; CHECK-NEXT: [[TMP1:%.*]] = and i8 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT: [[R:%.*]] = icmp eq i8 [[TMP1]], 0
; CHECK-NEXT: ret i1 [[R]]
;
%yy = xor i8 %y, -1
diff --git a/llvm/test/Transforms/InstCombine/icmp-of-or-x.ll b/llvm/test/Transforms/InstCombine/icmp-of-or-x.ll
index 26f53cb4807e..304874645d5d 100644
--- a/llvm/test/Transforms/InstCombine/icmp-of-or-x.ll
+++ b/llvm/test/Transforms/InstCombine/icmp-of-or-x.ll
@@ -373,3 +373,29 @@ define i1 @pr64610(ptr %b) {
%r = icmp ugt i32 %or, %s
ret i1 %r
}
+
+define i1 @icmp_eq_x_invertable_y2_todo(i8 %x, i1 %y, i8 %z) {
+; CHECK-LABEL: @icmp_eq_x_invertable_y2_todo(
+; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[Y:%.*]], i8 -8, i8 [[Z:%.*]]
+; CHECK-NEXT: [[TMP2:%.*]] = and i8 [[TMP1]], [[X:%.*]]
+; CHECK-NEXT: [[R:%.*]] = icmp eq i8 [[TMP2]], 0
+; CHECK-NEXT: ret i1 [[R]]
+;
+ %zz = xor i8 %z, -1
+ %yy = select i1 %y, i8 7, i8 %zz
+ %or = or i8 %x, %yy
+ %r = icmp eq i8 %yy, %or
+ ret i1 %r
+}
+
+define i1 @icmp_eq_x_invertable_y2(i8 %x, i8 %y) {
+; CHECK-LABEL: @icmp_eq_x_invertable_y2(
+; CHECK-NEXT: [[TMP1:%.*]] = and i8 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT: [[R:%.*]] = icmp eq i8 [[TMP1]], 0
+; CHECK-NEXT: ret i1 [[R]]
+;
+ %yy = xor i8 %y, -1
+ %or = or i8 %x, %yy
+ %r = icmp eq i8 %yy, %or
+ ret i1 %r
+}
diff --git a/llvm/test/Transforms/InstCombine/select.ll b/llvm/test/Transforms/InstCombine/select.ll
index d3cf2af4be65..846ede45028e 100644
--- a/llvm/test/Transforms/InstCombine/select.ll
+++ b/llvm/test/Transforms/InstCombine/select.ll
@@ -667,7 +667,7 @@ define i1 @test39(i1 %cond, double %x) {
; CHECK-LABEL: @test39(
; CHECK-NEXT: ret i1 true
;
- %s = select i1 %cond, double %x, double 0x7FF0000000000000 ; RHS = +infty
+ %s = select i1 %cond, double %x, double 0x7FF0000000000000 ; RHS = +infty
%cmp = fcmp ule double %x, %s
ret i1 %cmp
}
@@ -1364,7 +1364,7 @@ define i32 @PR23757_ne(i32 %x, ptr %p) {
; CHECK-NEXT: ret i32 -2147483648
;
%cmp = icmp ne i32 %x, 2147483647
- store i1 %cmp, ptr %p ; thwart predicate canonicalization
+ store i1 %cmp, ptr %p ; thwart predicate canonicalization
%add = add nsw i32 %x, 1
%sel = select i1 %cmp, i32 -2147483648, i32 %add
ret i32 %sel
@@ -1378,7 +1378,7 @@ define i32 @PR23757_ne_swapped(i32 %x, ptr %p) {
; CHECK-NEXT: ret i32 [[ADD]]
;
%cmp = icmp ne i32 %x, 2147483647
- store i1 %cmp, ptr %p ; thwart predicate canonicalization
+ store i1 %cmp, ptr %p ; thwart predicate canonicalization
%add = add nsw i32 %x, 1
%sel = select i1 %cmp, i32 %add, i32 -2147483648
ret i32 %sel
@@ -2787,16 +2787,67 @@ define <2 x i8> @select_replacement_add_eq_vec_nonuniform(<2 x i8> %x, <2 x i8>
define <2 x i8> @select_replacement_add_eq_vec_poison(<2 x i8> %x, <2 x i8> %y) {
; CHECK-LABEL: @select_replacement_add_eq_vec_poison(
; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i8> [[X:%.*]], <i8 1, i8 poison>
+; CHECK-NEXT: [[SEL:%.*]] = select <2 x i1> [[CMP]], <2 x i8> <i8 2, i8 poison>, <2 x i8> [[Y:%.*]]
+; CHECK-NEXT: ret <2 x i8> [[SEL]]
+;
+ %cmp = icmp eq <2 x i8> %x, <i8 1, i8 poison>
+ %add = add <2 x i8> %x, <i8 1, i8 1>
+ %sel = select <2 x i1> %cmp, <2 x i8> %add, <2 x i8> %y
+ ret <2 x i8> %sel
+}
+
+define <2 x i8> @select_replacement_add_eq_vec_undef(<2 x i8> %x, <2 x i8> %y) {
+; CHECK-LABEL: @select_replacement_add_eq_vec_undef(
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i8> [[X:%.*]], <i8 1, i8 undef>
; CHECK-NEXT: [[ADD:%.*]] = add <2 x i8> [[X]], <i8 1, i8 1>
; CHECK-NEXT: [[SEL:%.*]] = select <2 x i1> [[CMP]], <2 x i8> [[ADD]], <2 x i8> [[Y:%.*]]
; CHECK-NEXT: ret <2 x i8> [[SEL]]
;
- %cmp = icmp eq <2 x i8> %x, <i8 1, i8 poison>
+ %cmp = icmp eq <2 x i8> %x, <i8 1, i8 undef>
%add = add <2 x i8> %x, <i8 1, i8 1>
%sel = select <2 x i1> %cmp, <2 x i8> %add, <2 x i8> %y
ret <2 x i8> %sel
}
+define <2 x i8> @select_replacement_add_eq_vec_undef_okay(<2 x i8> %x, <2 x i8> %y) {
+; CHECK-LABEL: @select_replacement_add_eq_vec_undef_okay(
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i8> [[X:%.*]], <i8 1, i8 1>
+; CHECK-NEXT: [[SEL:%.*]] = select <2 x i1> [[CMP]], <2 x i8> <i8 2, i8 undef>, <2 x i8> [[Y:%.*]]
+; CHECK-NEXT: ret <2 x i8> [[SEL]]
+;
+ %cmp = icmp eq <2 x i8> %x, <i8 1, i8 1>
+ %add = add <2 x i8> %x, <i8 1, i8 undef>
+ %sel = select <2 x i1> %cmp, <2 x i8> %add, <2 x i8> %y
+ ret <2 x i8> %sel
+}
+
+
+define <2 x i8> @select_replacement_add_eq_vec_undef_okay_todo(<2 x i8> %x, <2 x i8> %y) {
+; CHECK-LABEL: @select_replacement_add_eq_vec_undef_okay_todo(
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i8> [[X:%.*]], <i8 1, i8 undef>
+; CHECK-NEXT: [[ADD:%.*]] = add <2 x i8> [[X]], <i8 1, i8 undef>
+; CHECK-NEXT: [[SEL:%.*]] = select <2 x i1> [[CMP]], <2 x i8> [[ADD]], <2 x i8> [[Y:%.*]]
+; CHECK-NEXT: ret <2 x i8> [[SEL]]
+;
+ %cmp = icmp eq <2 x i8> %x, <i8 1, i8 undef>
+ %add = add <2 x i8> %x, <i8 1, i8 undef>
+ %sel = select <2 x i1> %cmp, <2 x i8> %add, <2 x i8> %y
+ ret <2 x i8> %sel
+}
+
+define <2 x i8> @select_replacement_xor_eq_vec(<2 x i8> %x, <2 x i8> %y, <2 x i8> %z) {
+; CHECK-LABEL: @select_replacement_xor_eq_vec(
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i8> [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT: [[SEL:%.*]] = select <2 x i1> [[CMP]], <2 x i8> zeroinitializer, <2 x i8> [[Z:%.*]]
+; CHECK-NEXT: ret <2 x i8> [[SEL]]
+;
+ %cmp = icmp eq <2 x i8> %x, %y
+ %add = xor <2 x i8> %x, %y
+ %sel = select <2 x i1> %cmp, <2 x i8> %add, <2 x i8> %z
+ ret <2 x i8> %sel
+}
+
+
define i8 @select_replacement_add_ne(i8 %x, i8 %y) {
; CHECK-LABEL: @select_replacement_add_ne(
; CHECK-NEXT: [[CMP:%.*]] = icmp ne i8 [[X:%.*]], 1
@@ -2835,12 +2886,25 @@ define i8 @select_replacement_sub_noundef(i8 %x, i8 noundef %y, i8 %z) {
ret i8 %sel
}
+define i8 @select_replacement_sub_noundef_but_may_be_poison(i8 %x, i8 noundef %yy, i8 %z) {
+; CHECK-LABEL: @select_replacement_sub_noundef_but_may_be_poison(
+; CHECK-NEXT: [[Y:%.*]] = shl nuw i8 [[YY:%.*]], 1
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8 [[Y]], [[X:%.*]]
+; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i8 0, i8 [[Z:%.*]]
+; CHECK-NEXT: ret i8 [[SEL]]
+;
+ %y = shl nuw i8 %yy, 1
+ %cmp = icmp eq i8 %x, %y
+ %sub = sub i8 %x, %y
+ %sel = select i1 %cmp, i8 %sub, i8 %z
+ ret i8 %sel
+}
+
; TODO: The transform is also safe without noundef.
define i8 @select_replacement_sub(i8 %x, i8 %y, i8 %z) {
; CHECK-LABEL: @select_replacement_sub(
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8 [[X:%.*]], [[Y:%.*]]
-; CHECK-NEXT: [[SUB:%.*]] = sub i8 [[X]], [[Y]]
-; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i8 [[SUB]], i8 [[Z:%.*]]
+; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i8 0, i8 [[Z:%.*]]
; CHECK-NEXT: ret i8 [[SEL]]
;
%cmp = icmp eq i8 %x, %y
diff --git a/llvm/test/Transforms/LoopStrengthReduce/AArch64/lsr-reuse.ll b/llvm/test/Transforms/LoopStrengthReduce/AArch64/lsr-reuse.ll
index 323e242620e6..64e8a6be998e 100644
--- a/llvm/test/Transforms/LoopStrengthReduce/AArch64/lsr-reuse.ll
+++ b/llvm/test/Transforms/LoopStrengthReduce/AArch64/lsr-reuse.ll
@@ -2,7 +2,7 @@
declare void @foo(i64)
-; Verify that redundant adds aren't inserted by LSR.
+; Verify that redundant adds or geps aren't inserted by LSR.
; CHECK-LABEL: @bar(
define void @bar(ptr %A) {
entry:
@@ -10,9 +10,11 @@ entry:
while.cond:
; CHECK-LABEL: while.cond:
-; CHECK: add i64 %lsr.iv, 1
; CHECK-NOT: add i64 %lsr.iv, 1
; CHECK-LABEL: land.rhs:
+; CHECK: getelementptr i8, ptr %lsr.iv, i64 -8
+; CHECK-NOT: getelementptr i8, ptr %lsr.iv, i64 -8
+; CHECK-NOT: add i64, %lsr.iv, 1
%indvars.iv28 = phi i64 [ %indvars.iv.next29, %land.rhs ], [ 50, %entry ]
%cmp = icmp sgt i64 %indvars.iv28, 0
br i1 %cmp, label %land.rhs, label %while.end
diff --git a/llvm/test/Transforms/LoopUnroll/convergent.controlled.ll b/llvm/test/Transforms/LoopUnroll/convergent.controlled.ll
new file mode 100644
index 000000000000..7fd4eb18f16e
--- /dev/null
+++ b/llvm/test/Transforms/LoopUnroll/convergent.controlled.ll
@@ -0,0 +1,562 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -passes=loop-unroll -unroll-runtime -unroll-allow-partial -S | FileCheck %s
+
+declare void @f() convergent
+declare void @g()
+
+; Although this loop contains a convergent instruction, it should be
+; fully unrolled.
+define i32 @full_unroll() {
+; CHECK-LABEL: @full_unroll(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[ANCHOR:%.*]] = call token @llvm.experimental.convergence.anchor()
+; CHECK-NEXT: br label [[L3:%.*]]
+; CHECK: l3:
+; CHECK-NEXT: [[TOK_LOOP:%.*]] = call token @llvm.experimental.convergence.loop() [ "convergencectrl"(token [[ANCHOR]]) ]
+; CHECK-NEXT: br label [[A:%.*]]
+; CHECK: a:
+; CHECK-NEXT: call void @f() [ "convergencectrl"(token [[TOK_LOOP]]) ]
+; CHECK-NEXT: br label [[A_1:%.*]]
+; CHECK: a.1:
+; CHECK-NEXT: call void @f() [ "convergencectrl"(token [[TOK_LOOP]]) ]
+; CHECK-NEXT: br label [[A_2:%.*]]
+; CHECK: a.2:
+; CHECK-NEXT: call void @f() [ "convergencectrl"(token [[TOK_LOOP]]) ]
+; CHECK-NEXT: ret i32 0
+;
+entry:
+ %anchor = call token @llvm.experimental.convergence.anchor()
+ br label %l3
+
+l3:
+ %x.0 = phi i32 [ 0, %entry ], [ %inc, %a ]
+ %tok.loop = call token @llvm.experimental.convergence.loop() [ "convergencectrl"(token %anchor) ]
+ %inc = add nsw i32 %x.0, 1
+ %exitcond = icmp eq i32 %inc, 3
+ br label %a
+
+a:
+ call void @f() [ "convergencectrl"(token %tok.loop) ]
+ br i1 %exitcond, label %exit, label %l3
+
+exit:
+ ret i32 0
+}
+
+; This loop contains a convergent instruction, but it should be partially
+; unrolled. The unroll count is the largest power of 2 that divides the
+; multiple -- 4, in this case.
+define i32 @runtime_unroll(i32 %n) {
+; CHECK-LABEL: @runtime_unroll(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[ANCHOR:%.*]] = call token @llvm.experimental.convergence.anchor()
+; CHECK-NEXT: [[LOOP_CTL:%.*]] = mul nsw i32 [[N:%.*]], 12
+; CHECK-NEXT: br label [[L3:%.*]]
+; CHECK: l3:
+; CHECK-NEXT: [[X_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC_3:%.*]], [[A_3:%.*]] ]
+; CHECK-NEXT: [[TOK_LOOP:%.*]] = call token @llvm.experimental.convergence.loop() [ "convergencectrl"(token [[ANCHOR]]) ]
+; CHECK-NEXT: br label [[A:%.*]]
+; CHECK: a:
+; CHECK-NEXT: call void @f() [ "convergencectrl"(token [[TOK_LOOP]]) ]
+; CHECK-NEXT: br label [[A_1:%.*]]
+; CHECK: a.1:
+; CHECK-NEXT: call void @f() [ "convergencectrl"(token [[TOK_LOOP]]) ]
+; CHECK-NEXT: br label [[A_2:%.*]]
+; CHECK: a.2:
+; CHECK-NEXT: call void @f() [ "convergencectrl"(token [[TOK_LOOP]]) ]
+; CHECK-NEXT: br label [[A_3]]
+; CHECK: a.3:
+; CHECK-NEXT: call void @f() [ "convergencectrl"(token [[TOK_LOOP]]) ]
+; CHECK-NEXT: [[INC_3]] = add nsw i32 [[X_0]], 4
+; CHECK-NEXT: [[EXITCOND_3:%.*]] = icmp eq i32 [[INC_3]], [[LOOP_CTL]]
+; CHECK-NEXT: br i1 [[EXITCOND_3]], label [[EXIT:%.*]], label [[L3]]
+; CHECK: exit:
+; CHECK-NEXT: ret i32 0
+;
+entry:
+ %anchor = call token @llvm.experimental.convergence.anchor()
+ %loop_ctl = mul nsw i32 %n, 12
+ br label %l3
+
+l3:
+ %x.0 = phi i32 [ 0, %entry ], [ %inc, %a ]
+ %tok.loop = call token @llvm.experimental.convergence.loop() [ "convergencectrl"(token %anchor) ]
+ br label %a
+
+a:
+ call void @f() [ "convergencectrl"(token %tok.loop) ]
+ %inc = add nsw i32 %x.0, 1
+ %exitcond = icmp eq i32 %inc, %loop_ctl
+ br i1 %exitcond, label %exit, label %l3
+
+exit:
+ ret i32 0
+}
+
+; This loop contains a convergent instruction, so its partial unroll
+; count must divide its trip multiple. This overrides its unroll
+; pragma -- we unroll exactly 8 times, even though 16 is requested.
+define i32 @pragma_unroll(i32 %n) {
+; CHECK-LABEL: @pragma_unroll(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[ANCHOR:%.*]] = call token @llvm.experimental.convergence.anchor()
+; CHECK-NEXT: [[LOOP_CTL:%.*]] = mul nsw i32 [[N:%.*]], 24
+; CHECK-NEXT: br label [[L3:%.*]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK: l3:
+; CHECK-NEXT: [[X_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC_7:%.*]], [[A_7:%.*]] ]
+; CHECK-NEXT: [[TOK_LOOP:%.*]] = call token @llvm.experimental.convergence.loop() [ "convergencectrl"(token [[ANCHOR]]) ]
+; CHECK-NEXT: br label [[A:%.*]]
+; CHECK: a:
+; CHECK-NEXT: call void @f() [ "convergencectrl"(token [[TOK_LOOP]]) ]
+; CHECK-NEXT: br label [[A_1:%.*]]
+; CHECK: a.1:
+; CHECK-NEXT: call void @f() [ "convergencectrl"(token [[TOK_LOOP]]) ]
+; CHECK-NEXT: br label [[A_2:%.*]]
+; CHECK: a.2:
+; CHECK-NEXT: call void @f() [ "convergencectrl"(token [[TOK_LOOP]]) ]
+; CHECK-NEXT: br label [[A_3:%.*]]
+; CHECK: a.3:
+; CHECK-NEXT: call void @f() [ "convergencectrl"(token [[TOK_LOOP]]) ]
+; CHECK-NEXT: br label [[A_4:%.*]]
+; CHECK: a.4:
+; CHECK-NEXT: call void @f() [ "convergencectrl"(token [[TOK_LOOP]]) ]
+; CHECK-NEXT: br label [[A_5:%.*]]
+; CHECK: a.5:
+; CHECK-NEXT: call void @f() [ "convergencectrl"(token [[TOK_LOOP]]) ]
+; CHECK-NEXT: br label [[A_6:%.*]]
+; CHECK: a.6:
+; CHECK-NEXT: call void @f() [ "convergencectrl"(token [[TOK_LOOP]]) ]
+; CHECK-NEXT: br label [[A_7]]
+; CHECK: a.7:
+; CHECK-NEXT: call void @f() [ "convergencectrl"(token [[TOK_LOOP]]) ]
+; CHECK-NEXT: [[INC_7]] = add nsw i32 [[X_0]], 8
+; CHECK-NEXT: [[EXITCOND_7:%.*]] = icmp eq i32 [[INC_7]], [[LOOP_CTL]]
+; CHECK-NEXT: br i1 [[EXITCOND_7]], label [[EXIT:%.*]], label [[L3]], !llvm.loop [[LOOP2:![0-9]+]]
+; CHECK: exit:
+; CHECK-NEXT: ret i32 0
+;
+entry:
+ %anchor = call token @llvm.experimental.convergence.anchor()
+ %loop_ctl = mul nsw i32 %n, 24
+ br label %l3, !llvm.loop !0
+
+l3:
+ %x.0 = phi i32 [ 0, %entry ], [ %inc, %a ]
+ %tok.loop = call token @llvm.experimental.convergence.loop() [ "convergencectrl"(token %anchor) ]
+ br label %a
+
+a:
+ call void @f() [ "convergencectrl"(token %tok.loop) ]
+ %inc = add nsw i32 %x.0, 1
+ %exitcond = icmp eq i32 %inc, %loop_ctl
+ br i1 %exitcond, label %exit, label %l3, !llvm.loop !0
+
+exit:
+ ret i32 0
+}
+
+; This loop contains a convergent instruction. Since the pragma loop unroll
+; count 2 divides trip count 4. The loop unroll should respect the pragma.
+define void @pragma_unroll_divisible_trip_count() {
+; CHECK-LABEL: @pragma_unroll_divisible_trip_count(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[ANCHOR:%.*]] = call token @llvm.experimental.convergence.anchor()
+; CHECK-NEXT: br label [[L3:%.*]], !llvm.loop [[LOOP4:![0-9]+]]
+; CHECK: l3:
+; CHECK-NEXT: [[X_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC_1:%.*]], [[L3]] ]
+; CHECK-NEXT: [[TOK_LOOP:%.*]] = call token @llvm.experimental.convergence.loop() [ "convergencectrl"(token [[ANCHOR]]) ]
+; CHECK-NEXT: call void @f() [ "convergencectrl"(token [[TOK_LOOP]]) ]
+; CHECK-NEXT: call void @f() [ "convergencectrl"(token [[TOK_LOOP]]) ]
+; CHECK-NEXT: [[INC_1]] = add nuw nsw i32 [[X_0]], 2
+; CHECK-NEXT: [[EXITCOND_1:%.*]] = icmp eq i32 [[INC_1]], 4
+; CHECK-NEXT: br i1 [[EXITCOND_1]], label [[EXIT:%.*]], label [[L3]], !llvm.loop [[LOOP6:![0-9]+]]
+; CHECK: exit:
+; CHECK-NEXT: ret void
+;
+entry:
+ %anchor = call token @llvm.experimental.convergence.anchor()
+ br label %l3, !llvm.loop !1
+
+l3:
+ %x.0 = phi i32 [ 0, %entry ], [ %inc, %l3 ]
+ %tok.loop = call token @llvm.experimental.convergence.loop() [ "convergencectrl"(token %anchor) ]
+ call void @f() [ "convergencectrl"(token %tok.loop) ]
+ %inc = add nsw i32 %x.0, 1
+ %exitcond = icmp eq i32 %inc, 4
+ br i1 %exitcond, label %exit, label %l3, !llvm.loop !1
+
+exit:
+ ret void
+}
+
+; This loop contains a convergent instruction. Since the pragma loop unroll
+; count 2 divides trip multiple 2. The loop unroll should respect the pragma.
+define i32 @pragma_unroll_divisible_trip_multiple(i32 %n) {
+; CHECK-LABEL: @pragma_unroll_divisible_trip_multiple(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[ANCHOR:%.*]] = call token @llvm.experimental.convergence.anchor()
+; CHECK-NEXT: [[LOOP_CTL:%.*]] = mul nsw i32 [[N:%.*]], 2
+; CHECK-NEXT: br label [[L3:%.*]], !llvm.loop [[LOOP4]]
+; CHECK: l3:
+; CHECK-NEXT: [[X_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC_1:%.*]], [[L3]] ]
+; CHECK-NEXT: [[TOK_LOOP:%.*]] = call token @llvm.experimental.convergence.loop() [ "convergencectrl"(token [[ANCHOR]]) ]
+; CHECK-NEXT: call void @f() [ "convergencectrl"(token [[TOK_LOOP]]) ]
+; CHECK-NEXT: call void @f() [ "convergencectrl"(token [[TOK_LOOP]]) ]
+; CHECK-NEXT: [[INC_1]] = add nsw i32 [[X_0]], 2
+; CHECK-NEXT: [[EXITCOND_1:%.*]] = icmp eq i32 [[INC_1]], [[LOOP_CTL]]
+; CHECK-NEXT: br i1 [[EXITCOND_1]], label [[EXIT:%.*]], label [[L3]], !llvm.loop [[LOOP7:![0-9]+]]
+; CHECK: exit:
+; CHECK-NEXT: ret i32 0
+;
+entry:
+ %anchor = call token @llvm.experimental.convergence.anchor()
+ %loop_ctl = mul nsw i32 %n, 2
+ br label %l3, !llvm.loop !1
+
+l3:
+ %x.0 = phi i32 [ 0, %entry ], [ %inc, %l3 ]
+ %tok.loop = call token @llvm.experimental.convergence.loop() [ "convergencectrl"(token %anchor) ]
+ call void @f() [ "convergencectrl"(token %tok.loop) ]
+ %inc = add nsw i32 %x.0, 1
+ %exitcond = icmp eq i32 %inc, %loop_ctl
+ br i1 %exitcond, label %exit, label %l3, !llvm.loop !1
+
+exit:
+ ret i32 0
+}
+
+; This loop contains a convergent instruction. Since the pragma loop unroll
+; count 2 is unknown to divide runtime trip count, the loop is not unrolled
+; since remainder is forbidden for unrolling convergent loop.
+define i32 @pragma_unroll_indivisible_runtime_trip_count(i32 %n) {
+; CHECK-LABEL: @pragma_unroll_indivisible_runtime_trip_count(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[ANCHOR:%.*]] = call token @llvm.experimental.convergence.anchor()
+; CHECK-NEXT: br label [[L3:%.*]], !llvm.loop [[LOOP4]]
+; CHECK: l3:
+; CHECK-NEXT: [[X_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[L3]] ]
+; CHECK-NEXT: [[TOK_LOOP:%.*]] = call token @llvm.experimental.convergence.loop() [ "convergencectrl"(token [[ANCHOR]]) ]
+; CHECK-NEXT: call void @f() [ "convergencectrl"(token [[TOK_LOOP]]) ]
+; CHECK-NEXT: [[INC]] = add nsw i32 [[X_0]], 1
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[N:%.*]]
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[EXIT:%.*]], label [[L3]], !llvm.loop [[LOOP4]]
+; CHECK: exit:
+; CHECK-NEXT: ret i32 0
+;
+entry:
+ %anchor = call token @llvm.experimental.convergence.anchor()
+ br label %l3, !llvm.loop !1
+
+l3:
+ %x.0 = phi i32 [ 0, %entry ], [ %inc, %l3 ]
+ %tok.loop = call token @llvm.experimental.convergence.loop() [ "convergencectrl"(token %anchor) ]
+ call void @f() [ "convergencectrl"(token %tok.loop) ]
+ %inc = add nsw i32 %x.0, 1
+ %exitcond = icmp eq i32 %inc, %n
+ br i1 %exitcond, label %exit, label %l3, !llvm.loop !1
+
+exit:
+ ret i32 0
+}
+
+; This loop contains a convergent instruction. Since the pragma loop unroll
+; count 2 does not divide trip count 5, the loop is not unrolled by 2
+; since remainder is forbidden for unrolling convergent loop. Instead, the
+; loop gets fully unrolled.
+define i32 @pragma_unroll_indivisible_trip_count() {
+; CHECK-LABEL: @pragma_unroll_indivisible_trip_count(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[ANCHOR:%.*]] = call token @llvm.experimental.convergence.anchor()
+; CHECK-NEXT: br label [[L3:%.*]], !llvm.loop [[LOOP4]]
+; CHECK: l3:
+; CHECK-NEXT: [[TOK_LOOP:%.*]] = call token @llvm.experimental.convergence.loop() [ "convergencectrl"(token [[ANCHOR]]) ]
+; CHECK-NEXT: call void @f() [ "convergencectrl"(token [[TOK_LOOP]]) ]
+; CHECK-NEXT: call void @f() [ "convergencectrl"(token [[TOK_LOOP]]) ]
+; CHECK-NEXT: call void @f() [ "convergencectrl"(token [[TOK_LOOP]]) ]
+; CHECK-NEXT: call void @f() [ "convergencectrl"(token [[TOK_LOOP]]) ]
+; CHECK-NEXT: call void @f() [ "convergencectrl"(token [[TOK_LOOP]]) ]
+; CHECK-NEXT: ret i32 0
+;
+entry:
+ %anchor = call token @llvm.experimental.convergence.anchor()
+ br label %l3, !llvm.loop !1
+
+l3:
+ %x.0 = phi i32 [ 0, %entry ], [ %inc, %l3 ]
+ %tok.loop = call token @llvm.experimental.convergence.loop() [ "convergencectrl"(token %anchor) ]
+ call void @f() [ "convergencectrl"(token %tok.loop) ]
+ %inc = add nsw i32 %x.0, 1
+ %exitcond = icmp eq i32 %inc, 5
+ br i1 %exitcond, label %exit, label %l3, !llvm.loop !1
+
+exit:
+ ret i32 0
+}
+
+; This loop contains a convergent instruction that is anchored inside the loop
+; itself. It is unrolled by 2 with remainder, as requested by the loop metadata.
+define i32 @pragma_unroll_with_remainder(i32 %n) {
+; CHECK-LABEL: @pragma_unroll_with_remainder(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = freeze i32 [[N:%.*]]
+; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[TMP0]], -1
+; CHECK-NEXT: [[XTRAITER:%.*]] = and i32 [[TMP0]], 1
+; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i32 [[TMP1]], 1
+; CHECK-NEXT: br i1 [[TMP2]], label [[EXIT_UNR_LCSSA:%.*]], label [[ENTRY_NEW:%.*]]
+; CHECK: entry.new:
+; CHECK-NEXT: [[UNROLL_ITER:%.*]] = sub i32 [[TMP0]], [[XTRAITER]]
+; CHECK-NEXT: br label [[L3:%.*]], !llvm.loop [[LOOP4]]
+; CHECK: l3:
+; CHECK-NEXT: [[X_0:%.*]] = phi i32 [ 0, [[ENTRY_NEW]] ], [ [[INC_1:%.*]], [[L3]] ]
+; CHECK-NEXT: [[NITER:%.*]] = phi i32 [ 0, [[ENTRY_NEW]] ], [ [[NITER_NEXT_1:%.*]], [[L3]] ]
+; CHECK-NEXT: [[TOK_LOOP:%.*]] = call token @llvm.experimental.convergence.anchor()
+; CHECK-NEXT: call void @f() [ "convergencectrl"(token [[TOK_LOOP]]) ]
+; CHECK-NEXT: [[TOK_LOOP_1:%.*]] = call token @llvm.experimental.convergence.anchor()
+; CHECK-NEXT: call void @f() [ "convergencectrl"(token [[TOK_LOOP_1]]) ]
+; CHECK-NEXT: [[INC_1]] = add nsw i32 [[X_0]], 2
+; CHECK-NEXT: [[NITER_NEXT_1]] = add i32 [[NITER]], 2
+; CHECK-NEXT: [[NITER_NCMP_1:%.*]] = icmp eq i32 [[NITER_NEXT_1]], [[UNROLL_ITER]]
+; CHECK-NEXT: br i1 [[NITER_NCMP_1]], label [[EXIT_UNR_LCSSA_LOOPEXIT:%.*]], label [[L3]], !llvm.loop [[LOOP8:![0-9]+]]
+; CHECK: exit.unr-lcssa.loopexit:
+; CHECK-NEXT: br label [[EXIT_UNR_LCSSA]]
+; CHECK: exit.unr-lcssa:
+; CHECK-NEXT: [[LCMP_MOD:%.*]] = icmp ne i32 [[XTRAITER]], 0
+; CHECK-NEXT: br i1 [[LCMP_MOD]], label [[L3_EPIL_PREHEADER:%.*]], label [[EXIT:%.*]]
+; CHECK: l3.epil.preheader:
+; CHECK-NEXT: br label [[L3_EPIL:%.*]]
+; CHECK: l3.epil:
+; CHECK-NEXT: [[TOK_LOOP_EPIL:%.*]] = call token @llvm.experimental.convergence.anchor()
+; CHECK-NEXT: call void @f() [ "convergencectrl"(token [[TOK_LOOP_EPIL]]) ]
+; CHECK-NEXT: br label [[EXIT]]
+; CHECK: exit:
+; CHECK-NEXT: ret i32 0
+;
+entry:
+ br label %l3, !llvm.loop !1
+
+l3:
+ %x.0 = phi i32 [ 0, %entry ], [ %inc, %l3 ]
+ %tok.loop = call token @llvm.experimental.convergence.anchor()
+ call void @f() [ "convergencectrl"(token %tok.loop) ]
+ %inc = add nsw i32 %x.0, 1
+ %exitcond = icmp eq i32 %inc, %n
+ br i1 %exitcond, label %exit, label %l3, !llvm.loop !1
+
+exit:
+ ret i32 0
+}
+
+; Don't unroll a loop that is extended by convergence controls.
+;
+; We could theoretically duplicate the extension part, but this is not
+; implemented.
+define i32 @extended_loop(i32 %n) {
+; CHECK-LABEL: @extended_loop(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label [[L3:%.*]], !llvm.loop [[LOOP4]]
+; CHECK: l3:
+; CHECK-NEXT: [[X_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[L3]] ]
+; CHECK-NEXT: [[TOK_LOOP:%.*]] = call token @llvm.experimental.convergence.anchor()
+; CHECK-NEXT: [[INC]] = add nsw i32 [[X_0]], 1
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[N:%.*]]
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[EXIT:%.*]], label [[L3]], !llvm.loop [[LOOP4]]
+; CHECK: exit:
+; CHECK-NEXT: call void @f() [ "convergencectrl"(token [[TOK_LOOP]]) ]
+; CHECK-NEXT: ret i32 0
+;
+entry:
+ br label %l3, !llvm.loop !1
+
+l3:
+ %x.0 = phi i32 [ 0, %entry ], [ %inc, %l3 ]
+ %tok.loop = call token @llvm.experimental.convergence.anchor()
+ %inc = add nsw i32 %x.0, 1
+ %exitcond = icmp eq i32 %inc, %n
+ br i1 %exitcond, label %exit, label %l3, !llvm.loop !1
+
+exit:
+ call void @f() [ "convergencectrl"(token %tok.loop) ]
+ ret i32 0
+}
+
+; Inner loop is extended beyond the outer loop. No unrolling possible.
+
+define i32 @extended_inner_loop_1(i32 %n, i1 %cond) {
+; CHECK-LABEL: @extended_inner_loop_1(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label [[L3:%.*]]
+; CHECK: l3:
+; CHECK-NEXT: [[X_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[LATCH:%.*]] ]
+; CHECK-NEXT: [[TOK_LOOP:%.*]] = call token @llvm.experimental.convergence.anchor()
+; CHECK-NEXT: [[INC]] = add nsw i32 [[X_0]], 1
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 4
+; CHECK-NEXT: br label [[L2:%.*]], !llvm.loop [[LOOP4]]
+; CHECK: l2:
+; CHECK-NEXT: [[TOK_L2:%.*]] = call token @llvm.experimental.convergence.anchor()
+; CHECK-NEXT: call void @f() [ "convergencectrl"(token [[TOK_L2]]) ]
+; CHECK-NEXT: br i1 [[COND:%.*]], label [[L2]], label [[LATCH]], !llvm.loop [[LOOP4]]
+; CHECK: latch:
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[EXIT:%.*]], label [[L3]]
+; CHECK: exit:
+; CHECK-NEXT: call void @f() [ "convergencectrl"(token [[TOK_L2]]) ]
+; CHECK-NEXT: ret i32 0
+;
+entry:
+ br label %l3
+
+l3:
+ %x.0 = phi i32 [ 0, %entry ], [ %inc, %latch ]
+ %tok.loop = call token @llvm.experimental.convergence.anchor()
+ %inc = add nsw i32 %x.0, 1
+ %exitcond = icmp eq i32 %inc, 4
+ br label %l2, !llvm.loop !1
+
+l2:
+ %tok.l2 = call token @llvm.experimental.convergence.anchor()
+ call void @f() [ "convergencectrl"(token %tok.l2) ]
+ br i1 %cond, label %l2, label %latch, !llvm.loop !1
+
+latch:
+ br i1 %exitcond, label %exit, label %l3
+
+exit:
+ call void @f() [ "convergencectrl"(token %tok.l2) ]
+ ret i32 0
+}
+
+; Inner loop is extended inside the outer loop. Outer loop is unrolled.
+
+define i32 @extended_inner_loop_2(i32 %n, i1 %cond) {
+; CHECK-LABEL: @extended_inner_loop_2(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label [[L3:%.*]]
+; CHECK: l3:
+; CHECK-NEXT: br label [[L2:%.*]], !llvm.loop [[LOOP4]]
+; CHECK: l2:
+; CHECK-NEXT: [[TOK_L2:%.*]] = call token @llvm.experimental.convergence.anchor()
+; CHECK-NEXT: call void @f() [ "convergencectrl"(token [[TOK_L2]]) ]
+; CHECK-NEXT: br i1 [[COND:%.*]], label [[L2]], label [[LATCH:%.*]], !llvm.loop [[LOOP4]]
+; CHECK: latch:
+; CHECK-NEXT: call void @f() [ "convergencectrl"(token [[TOK_L2]]) ]
+; CHECK-NEXT: br label [[L2_1:%.*]], !llvm.loop [[LOOP4]]
+; CHECK: l2.1:
+; CHECK-NEXT: [[TOK_L2_1:%.*]] = call token @llvm.experimental.convergence.anchor()
+; CHECK-NEXT: call void @f() [ "convergencectrl"(token [[TOK_L2_1]]) ]
+; CHECK-NEXT: br i1 [[COND]], label [[L2_1]], label [[LATCH_1:%.*]], !llvm.loop [[LOOP4]]
+; CHECK: latch.1:
+; CHECK-NEXT: call void @f() [ "convergencectrl"(token [[TOK_L2_1]]) ]
+; CHECK-NEXT: br label [[L2_2:%.*]], !llvm.loop [[LOOP4]]
+; CHECK: l2.2:
+; CHECK-NEXT: [[TOK_L2_2:%.*]] = call token @llvm.experimental.convergence.anchor()
+; CHECK-NEXT: call void @f() [ "convergencectrl"(token [[TOK_L2_2]]) ]
+; CHECK-NEXT: br i1 [[COND]], label [[L2_2]], label [[LATCH_2:%.*]], !llvm.loop [[LOOP4]]
+; CHECK: latch.2:
+; CHECK-NEXT: call void @f() [ "convergencectrl"(token [[TOK_L2_2]]) ]
+; CHECK-NEXT: br label [[L2_3:%.*]], !llvm.loop [[LOOP4]]
+; CHECK: l2.3:
+; CHECK-NEXT: [[TOK_L2_3:%.*]] = call token @llvm.experimental.convergence.anchor()
+; CHECK-NEXT: call void @f() [ "convergencectrl"(token [[TOK_L2_3]]) ]
+; CHECK-NEXT: br i1 [[COND]], label [[L2_3]], label [[LATCH_3:%.*]], !llvm.loop [[LOOP4]]
+; CHECK: latch.3:
+; CHECK-NEXT: call void @f() [ "convergencectrl"(token [[TOK_L2_3]]) ]
+; CHECK-NEXT: ret i32 0
+;
+entry:
+ br label %l3
+
+l3:
+ %x.0 = phi i32 [ 0, %entry ], [ %inc, %latch ]
+ %tok.loop = call token @llvm.experimental.convergence.anchor()
+ %inc = add nsw i32 %x.0, 1
+ %exitcond = icmp eq i32 %inc, 4
+ br label %l2, !llvm.loop !1
+
+l2:
+ %tok.l2 = call token @llvm.experimental.convergence.anchor()
+ call void @f() [ "convergencectrl"(token %tok.l2) ]
+ br i1 %cond, label %l2, label %latch, !llvm.loop !1
+
+latch:
+ call void @f() [ "convergencectrl"(token %tok.l2) ]
+ br i1 %exitcond, label %exit, label %l3
+
+exit:
+ ret i32 0
+}
+
+; No extension. Both loops unrolled.
+
+define i32 @unroll_nest(i32 %n, i1 %cond) {
+; CHECK-LABEL: @unroll_nest(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label [[L3:%.*]]
+; CHECK: l3:
+; CHECK-NEXT: br label [[L2:%.*]], !llvm.loop [[LOOP4]]
+; CHECK: l2:
+; CHECK-NEXT: [[TOK_L2:%.*]] = call token @llvm.experimental.convergence.anchor()
+; CHECK-NEXT: call void @f() [ "convergencectrl"(token [[TOK_L2]]) ]
+; CHECK-NEXT: br i1 [[COND:%.*]], label [[L2_1:%.*]], label [[LATCH:%.*]], !llvm.loop [[LOOP4]]
+; CHECK: l2.1:
+; CHECK-NEXT: [[TOK_L2_1:%.*]] = call token @llvm.experimental.convergence.anchor()
+; CHECK-NEXT: call void @f() [ "convergencectrl"(token [[TOK_L2_1]]) ]
+; CHECK-NEXT: br i1 [[COND]], label [[L2]], label [[LATCH]], !llvm.loop [[LOOP9:![0-9]+]]
+; CHECK: latch:
+; CHECK-NEXT: br label [[L2_12:%.*]], !llvm.loop [[LOOP4]]
+; CHECK: l2.12:
+; CHECK-NEXT: [[TOK_L2_11:%.*]] = call token @llvm.experimental.convergence.anchor()
+; CHECK-NEXT: call void @f() [ "convergencectrl"(token [[TOK_L2_11]]) ]
+; CHECK-NEXT: br i1 [[COND]], label [[L2_1_1:%.*]], label [[LATCH_1:%.*]], !llvm.loop [[LOOP4]]
+; CHECK: l2.1.1:
+; CHECK-NEXT: [[TOK_L2_1_1:%.*]] = call token @llvm.experimental.convergence.anchor()
+; CHECK-NEXT: call void @f() [ "convergencectrl"(token [[TOK_L2_1_1]]) ]
+; CHECK-NEXT: br i1 [[COND]], label [[L2_12]], label [[LATCH_1]], !llvm.loop [[LOOP9]]
+; CHECK: latch.1:
+; CHECK-NEXT: br label [[L2_2:%.*]], !llvm.loop [[LOOP4]]
+; CHECK: l2.2:
+; CHECK-NEXT: [[TOK_L2_2:%.*]] = call token @llvm.experimental.convergence.anchor()
+; CHECK-NEXT: call void @f() [ "convergencectrl"(token [[TOK_L2_2]]) ]
+; CHECK-NEXT: br i1 [[COND]], label [[L2_1_2:%.*]], label [[LATCH_2:%.*]], !llvm.loop [[LOOP4]]
+; CHECK: l2.1.2:
+; CHECK-NEXT: [[TOK_L2_1_2:%.*]] = call token @llvm.experimental.convergence.anchor()
+; CHECK-NEXT: call void @f() [ "convergencectrl"(token [[TOK_L2_1_2]]) ]
+; CHECK-NEXT: br i1 [[COND]], label [[L2_2]], label [[LATCH_2]], !llvm.loop [[LOOP9]]
+; CHECK: latch.2:
+; CHECK-NEXT: br label [[L2_3:%.*]], !llvm.loop [[LOOP4]]
+; CHECK: l2.3:
+; CHECK-NEXT: [[TOK_L2_3:%.*]] = call token @llvm.experimental.convergence.anchor()
+; CHECK-NEXT: call void @f() [ "convergencectrl"(token [[TOK_L2_3]]) ]
+; CHECK-NEXT: br i1 [[COND]], label [[L2_1_3:%.*]], label [[LATCH_3:%.*]], !llvm.loop [[LOOP4]]
+; CHECK: l2.1.3:
+; CHECK-NEXT: [[TOK_L2_1_3:%.*]] = call token @llvm.experimental.convergence.anchor()
+; CHECK-NEXT: call void @f() [ "convergencectrl"(token [[TOK_L2_1_3]]) ]
+; CHECK-NEXT: br i1 [[COND]], label [[L2_3]], label [[LATCH_3]], !llvm.loop [[LOOP9]]
+; CHECK: latch.3:
+; CHECK-NEXT: ret i32 0
+;
+entry:
+ br label %l3
+
+l3:
+ %x.0 = phi i32 [ 0, %entry ], [ %inc, %latch ]
+ %tok.loop = call token @llvm.experimental.convergence.anchor()
+ %inc = add nsw i32 %x.0, 1
+ %exitcond = icmp eq i32 %inc, 4
+ br label %l2, !llvm.loop !1
+
+l2:
+ %tok.l2 = call token @llvm.experimental.convergence.anchor()
+ call void @f() [ "convergencectrl"(token %tok.l2) ]
+ br i1 %cond, label %l2, label %latch, !llvm.loop !1
+
+latch:
+ br i1 %exitcond, label %exit, label %l3
+
+exit:
+ ret i32 0
+}
+
+declare token @llvm.experimental.convergence.anchor()
+declare token @llvm.experimental.convergence.loop()
+
+!0 = !{!0, !{!"llvm.loop.unroll.count", i32 16}}
+!1 = !{!1, !{!"llvm.loop.unroll.count", i32 2}}
diff --git a/llvm/test/tools/llvm-cov/gcov/intermediate-format.test b/llvm/test/tools/llvm-cov/gcov/intermediate-format.test
index 583e670c2d3f..a3f4695c3204 100644
--- a/llvm/test/tools/llvm-cov/gcov/intermediate-format.test
+++ b/llvm/test/tools/llvm-cov/gcov/intermediate-format.test
@@ -1,5 +1,3 @@
-REQUIRES: shell
-
RUN: rm -rf %t && mkdir %t && cd %t
RUN: cp %S/Inputs/test.gcno %S/Inputs/test.gcda .
diff --git a/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx1.s b/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx1.s
index bd7a4894b45d..7150a586fe6f 100644
--- a/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx1.s
+++ b/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx1.s
@@ -1455,20 +1455,20 @@ vzeroupper
# CHECK-NEXT: 3 2 1.00 * vpextrq $1, %xmm0, (%rax)
# CHECK-NEXT: 2 3 1.00 vpextrw $1, %xmm0, %ecx
# CHECK-NEXT: 3 2 1.00 * vpextrw $1, %xmm0, (%rax)
-# CHECK-NEXT: 3 3 2.00 vphaddd %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: 4 9 2.00 * vphaddd (%rax), %xmm1, %xmm2
-# CHECK-NEXT: 3 3 2.00 vphaddsw %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: 4 9 2.00 * vphaddsw (%rax), %xmm1, %xmm2
-# CHECK-NEXT: 3 3 2.00 vphaddw %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: 4 9 2.00 * vphaddw (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 3 3 1.00 vphaddd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 4 9 1.00 * vphaddd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 3 3 1.00 vphaddsw %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 4 9 1.00 * vphaddsw (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 3 3 1.00 vphaddw %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 4 9 1.00 * vphaddw (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 4 1.00 vphminposuw %xmm0, %xmm2
# CHECK-NEXT: 2 10 1.00 * vphminposuw (%rax), %xmm2
-# CHECK-NEXT: 3 3 2.00 vphsubd %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: 4 9 2.00 * vphsubd (%rax), %xmm1, %xmm2
-# CHECK-NEXT: 3 3 2.00 vphsubsw %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: 4 9 2.00 * vphsubsw (%rax), %xmm1, %xmm2
-# CHECK-NEXT: 3 3 2.00 vphsubw %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: 4 9 2.00 * vphsubw (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 3 3 1.00 vphsubd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 4 9 1.00 * vphsubd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 3 3 1.00 vphsubsw %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 4 9 1.00 * vphsubsw (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 3 3 1.00 vphsubw %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 4 9 1.00 * vphsubw (%rax), %xmm1, %xmm2
# CHECK-NEXT: 2 2 2.00 vpinsrb $1, %eax, %xmm1, %xmm2
# CHECK-NEXT: 2 6 1.00 * vpinsrb $1, (%rax), %xmm1, %xmm2
# CHECK-NEXT: 2 2 2.00 vpinsrd $1, %eax, %xmm1, %xmm2
@@ -1738,7 +1738,7 @@ vzeroupper
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11]
-# CHECK-NEXT: - 126.00 322.92 237.92 160.50 160.50 19.00 291.92 6.25 19.00 19.00 19.00
+# CHECK-NEXT: - 126.00 325.58 252.58 160.50 160.50 19.00 274.58 6.25 19.00 19.00 19.00
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] Instructions:
@@ -1908,22 +1908,22 @@ vzeroupper
# CHECK-NEXT: - - - - - - 0.50 - - 0.50 0.50 0.50 vextractf128 $1, %ymm0, (%rax)
# CHECK-NEXT: - - 1.00 - - - - 1.00 - - - - vextractps $1, %xmm0, %ecx
# CHECK-NEXT: - - - - - - 0.50 1.00 - 0.50 0.50 0.50 vextractps $1, %xmm0, (%rax)
-# CHECK-NEXT: - - 0.33 0.33 - - - 2.33 - - - - vhaddpd %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 2.33 - - - - vhaddpd (%rax), %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 2.33 - - - - vhaddpd %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 2.33 - - - - vhaddpd (%rax), %ymm1, %ymm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 2.33 - - - - vhaddps %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 2.33 - - - - vhaddps (%rax), %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 2.33 - - - - vhaddps %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 2.33 - - - - vhaddps (%rax), %ymm1, %ymm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 2.33 - - - - vhsubpd %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 2.33 - - - - vhsubpd (%rax), %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 2.33 - - - - vhsubpd %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 2.33 - - - - vhsubpd (%rax), %ymm1, %ymm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 2.33 - - - - vhsubps %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 2.33 - - - - vhsubps (%rax), %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 2.33 - - - - vhsubps %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 2.33 - - - - vhsubps (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 - - - 2.00 - - - - vhaddpd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 2.00 - - - - vhaddpd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - 2.00 - - - - vhaddpd %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 2.00 - - - - vhaddpd (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 - - - 2.00 - - - - vhaddps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 2.00 - - - - vhaddps (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - 2.00 - - - - vhaddps %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 2.00 - - - - vhaddps (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 - - - 2.00 - - - - vhsubpd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 2.00 - - - - vhsubpd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - 2.00 - - - - vhsubpd %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 2.00 - - - - vhsubpd (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 - - - 2.00 - - - - vhsubps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 2.00 - - - - vhsubps (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - 2.00 - - - - vhsubps %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 2.00 - - - - vhsubps (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - - - - - - 1.00 - - - - vinsertf128 $1, %xmm0, %ymm1, %ymm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vinsertf128 $1, (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - - - - - - 1.00 - - - - vinsertps $1, %xmm0, %xmm1, %xmm2
@@ -2167,20 +2167,20 @@ vzeroupper
# CHECK-NEXT: - - - - - - 0.50 1.00 - 0.50 0.50 0.50 vpextrq $1, %xmm0, (%rax)
# CHECK-NEXT: - - 1.00 - - - - 1.00 - - - - vpextrw $1, %xmm0, %ecx
# CHECK-NEXT: - - - - - - 0.50 1.00 - 0.50 0.50 0.50 vpextrw $1, %xmm0, (%rax)
-# CHECK-NEXT: - - 0.33 0.33 - - - 2.33 - - - - vphaddd %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 2.33 - - - - vphaddd (%rax), %xmm1, %xmm2
-# CHECK-NEXT: - - 0.50 0.50 - - - 2.00 - - - - vphaddsw %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 2.00 - - - - vphaddsw (%rax), %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 2.33 - - - - vphaddw %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 2.33 - - - - vphaddw (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.33 1.33 - - - 1.33 - - - - vphaddd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.33 1.33 0.50 0.50 - 1.33 - - - - vphaddd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 1.50 - - - 1.00 - - - - vphaddsw %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 1.50 0.50 0.50 - 1.00 - - - - vphaddsw (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.33 1.33 - - - 1.33 - - - - vphaddw %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.33 1.33 0.50 0.50 - 1.33 - - - - vphaddw (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 1.00 - - - - - - - - - vphminposuw %xmm0, %xmm2
# CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - - - vphminposuw (%rax), %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 2.33 - - - - vphsubd %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 2.33 - - - - vphsubd (%rax), %xmm1, %xmm2
-# CHECK-NEXT: - - 0.50 0.50 - - - 2.00 - - - - vphsubsw %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 2.00 - - - - vphsubsw (%rax), %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 2.33 - - - - vphsubw %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 2.33 - - - - vphsubw (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.33 1.33 - - - 1.33 - - - - vphsubd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.33 1.33 0.50 0.50 - 1.33 - - - - vphsubd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 1.50 - - - 1.00 - - - - vphsubsw %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 1.50 0.50 0.50 - 1.00 - - - - vphsubsw (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.33 1.33 - - - 1.33 - - - - vphsubw %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.33 1.33 0.50 0.50 - 1.33 - - - - vphsubw (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - - - - - - 2.00 - - - - vpinsrb $1, %eax, %xmm1, %xmm2
# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - - - vpinsrb $1, (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - - - - - - 2.00 - - - - vpinsrd $1, %eax, %xmm1, %xmm2
diff --git a/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx2.s b/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx2.s
index dcf883445ba4..c251dc349587 100644
--- a/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx2.s
+++ b/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx2.s
@@ -576,18 +576,18 @@ vpxor (%rax), %ymm1, %ymm2
# CHECK-NEXT: 5 20 2.00 * vpgatherqd %xmm0, (%rax,%ymm1,2), %xmm2
# CHECK-NEXT: 5 18 1.00 * vpgatherqq %xmm0, (%rax,%xmm1,2), %xmm2
# CHECK-NEXT: 5 20 2.00 * vpgatherqq %ymm0, (%rax,%ymm1,2), %ymm2
-# CHECK-NEXT: 3 3 2.00 vphaddd %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: 4 10 2.00 * vphaddd (%rax), %ymm1, %ymm2
-# CHECK-NEXT: 3 3 2.00 vphaddsw %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: 4 10 2.00 * vphaddsw (%rax), %ymm1, %ymm2
-# CHECK-NEXT: 3 3 2.00 vphaddw %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: 4 10 2.00 * vphaddw (%rax), %ymm1, %ymm2
-# CHECK-NEXT: 3 3 2.00 vphsubd %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: 4 10 2.00 * vphsubd (%rax), %ymm1, %ymm2
-# CHECK-NEXT: 3 3 2.00 vphsubsw %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: 4 10 2.00 * vphsubsw (%rax), %ymm1, %ymm2
-# CHECK-NEXT: 3 3 2.00 vphsubw %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: 4 10 2.00 * vphsubw (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 3 3 1.00 vphaddd %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 4 10 1.00 * vphaddd (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 3 3 1.00 vphaddsw %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 4 10 1.00 * vphaddsw (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 3 3 1.00 vphaddw %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 4 10 1.00 * vphaddw (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 3 3 1.00 vphsubd %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 4 10 1.00 * vphsubd (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 3 3 1.00 vphsubsw %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 4 10 1.00 * vphsubsw (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 3 3 1.00 vphsubw %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 4 10 1.00 * vphsubw (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 5 0.50 vpmaddubsw %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 12 0.50 * vpmaddubsw (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 5 0.50 vpmaddwd %ymm0, %ymm1, %ymm2
@@ -778,7 +778,7 @@ vpxor (%rax), %ymm1, %ymm2
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11]
-# CHECK-NEXT: - - 110.33 104.33 98.00 98.00 2.50 149.33 - 2.50 2.50 2.50
+# CHECK-NEXT: - - 110.33 116.33 98.00 98.00 2.50 137.33 - 2.50 2.50 2.50
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] Instructions:
@@ -898,18 +898,18 @@ vpxor (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - 1.33 0.33 2.00 2.00 - 1.33 - - - - vpgatherqd %xmm0, (%rax,%ymm1,2), %xmm2
# CHECK-NEXT: - - 1.33 0.33 1.00 1.00 - 1.33 - - - - vpgatherqq %xmm0, (%rax,%xmm1,2), %xmm2
# CHECK-NEXT: - - 1.33 0.33 2.00 2.00 - 1.33 - - - - vpgatherqq %ymm0, (%rax,%ymm1,2), %ymm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 2.33 - - - - vphaddd %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 2.33 - - - - vphaddd (%rax), %ymm1, %ymm2
-# CHECK-NEXT: - - 0.50 0.50 - - - 2.00 - - - - vphaddsw %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 2.00 - - - - vphaddsw (%rax), %ymm1, %ymm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 2.33 - - - - vphaddw %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 2.33 - - - - vphaddw (%rax), %ymm1, %ymm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 2.33 - - - - vphsubd %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 2.33 - - - - vphsubd (%rax), %ymm1, %ymm2
-# CHECK-NEXT: - - 0.50 0.50 - - - 2.00 - - - - vphsubsw %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 2.00 - - - - vphsubsw (%rax), %ymm1, %ymm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 2.33 - - - - vphsubw %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 2.33 - - - - vphsubw (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - 0.33 1.33 - - - 1.33 - - - - vphaddd %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - 0.33 1.33 0.50 0.50 - 1.33 - - - - vphaddd (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 1.50 - - - 1.00 - - - - vphaddsw %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 1.50 0.50 0.50 - 1.00 - - - - vphaddsw (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - 0.33 1.33 - - - 1.33 - - - - vphaddw %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - 0.33 1.33 0.50 0.50 - 1.33 - - - - vphaddw (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - 0.33 1.33 - - - 1.33 - - - - vphsubd %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - 0.33 1.33 0.50 0.50 - 1.33 - - - - vphsubd (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 1.50 - - - 1.00 - - - - vphsubsw %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 1.50 0.50 0.50 - 1.00 - - - - vphsubsw (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - 0.33 1.33 - - - 1.33 - - - - vphsubw %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - 0.33 1.33 0.50 0.50 - 1.33 - - - - vphsubw (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - vpmaddubsw %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vpmaddubsw (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - vpmaddwd %ymm0, %ymm1, %ymm2
diff --git a/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-sse3.s b/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-sse3.s
index 4d1942450ec6..0d075a9bc3bf 100644
--- a/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-sse3.s
+++ b/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-sse3.s
@@ -81,7 +81,7 @@ mwait
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11]
-# CHECK-NEXT: - - 6.67 7.67 5.00 5.00 - 23.67 4.00 - - -
+# CHECK-NEXT: - - 8.00 9.00 5.00 5.00 - 21.00 4.00 - - -
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] Instructions:
@@ -89,14 +89,14 @@ mwait
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - addsubpd (%rax), %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - addsubps %xmm0, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - addsubps (%rax), %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 2.33 - - - - haddpd %xmm0, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 2.33 - - - - haddpd (%rax), %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 2.33 - - - - haddps %xmm0, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 2.33 - - - - haddps (%rax), %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 2.33 - - - - hsubpd %xmm0, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 2.33 - - - - hsubpd (%rax), %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 2.33 - - - - hsubps %xmm0, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 2.33 - - - - hsubps (%rax), %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - 2.00 - - - - haddpd %xmm0, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 2.00 - - - - haddpd (%rax), %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - 2.00 - - - - haddps %xmm0, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 2.00 - - - - haddps (%rax), %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - 2.00 - - - - hsubpd %xmm0, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 2.00 - - - - hsubpd (%rax), %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - 2.00 - - - - hsubps %xmm0, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 2.00 - - - - hsubps (%rax), %xmm2
# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - lddqu (%rax), %xmm2
# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - - - monitor
# CHECK-NEXT: - - - - - - - 1.00 - - - - movddup %xmm0, %xmm2
diff --git a/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-ssse3.s b/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-ssse3.s
index 3a6668cedb60..d034cbd0f639 100644
--- a/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-ssse3.s
+++ b/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-ssse3.s
@@ -124,28 +124,28 @@ psignw (%rax), %xmm2
# CHECK-NEXT: 2 7 1.00 * palignr $1, (%rax), %xmm2
# CHECK-NEXT: 3 3 2.00 phaddd %mm0, %mm2
# CHECK-NEXT: 4 8 2.00 * phaddd (%rax), %mm2
-# CHECK-NEXT: 3 3 2.00 phaddd %xmm0, %xmm2
-# CHECK-NEXT: 4 9 2.00 * phaddd (%rax), %xmm2
+# CHECK-NEXT: 3 3 1.00 phaddd %xmm0, %xmm2
+# CHECK-NEXT: 4 9 1.00 * phaddd (%rax), %xmm2
# CHECK-NEXT: 3 3 2.00 phaddsw %mm0, %mm2
# CHECK-NEXT: 4 8 2.00 * phaddsw (%rax), %mm2
-# CHECK-NEXT: 3 3 2.00 phaddsw %xmm0, %xmm2
-# CHECK-NEXT: 4 9 2.00 * phaddsw (%rax), %xmm2
+# CHECK-NEXT: 3 3 1.00 phaddsw %xmm0, %xmm2
+# CHECK-NEXT: 4 9 1.00 * phaddsw (%rax), %xmm2
# CHECK-NEXT: 3 3 2.00 phaddw %mm0, %mm2
# CHECK-NEXT: 4 8 2.00 * phaddw (%rax), %mm2
-# CHECK-NEXT: 3 3 2.00 phaddw %xmm0, %xmm2
-# CHECK-NEXT: 4 9 2.00 * phaddw (%rax), %xmm2
+# CHECK-NEXT: 3 3 1.00 phaddw %xmm0, %xmm2
+# CHECK-NEXT: 4 9 1.00 * phaddw (%rax), %xmm2
# CHECK-NEXT: 3 3 2.00 phsubd %mm0, %mm2
# CHECK-NEXT: 4 8 2.00 * phsubd (%rax), %mm2
-# CHECK-NEXT: 3 3 2.00 phsubd %xmm0, %xmm2
-# CHECK-NEXT: 4 9 2.00 * phsubd (%rax), %xmm2
+# CHECK-NEXT: 3 3 1.00 phsubd %xmm0, %xmm2
+# CHECK-NEXT: 4 9 1.00 * phsubd (%rax), %xmm2
# CHECK-NEXT: 3 3 2.00 phsubsw %mm0, %mm2
# CHECK-NEXT: 4 8 2.00 * phsubsw (%rax), %mm2
-# CHECK-NEXT: 3 3 2.00 phsubsw %xmm0, %xmm2
-# CHECK-NEXT: 4 9 2.00 * phsubsw (%rax), %xmm2
+# CHECK-NEXT: 3 3 1.00 phsubsw %xmm0, %xmm2
+# CHECK-NEXT: 4 9 1.00 * phsubsw (%rax), %xmm2
# CHECK-NEXT: 3 3 2.00 phsubw %mm0, %mm2
# CHECK-NEXT: 4 8 2.00 * phsubw (%rax), %mm2
-# CHECK-NEXT: 3 3 2.00 phsubw %xmm0, %xmm2
-# CHECK-NEXT: 4 9 2.00 * phsubw (%rax), %xmm2
+# CHECK-NEXT: 3 3 1.00 phsubw %xmm0, %xmm2
+# CHECK-NEXT: 4 9 1.00 * phsubw (%rax), %xmm2
# CHECK-NEXT: 1 5 1.00 pmaddubsw %mm0, %mm2
# CHECK-NEXT: 2 10 1.00 * pmaddubsw (%rax), %mm2
# CHECK-NEXT: 1 5 0.50 pmaddubsw %xmm0, %xmm2
@@ -187,7 +187,7 @@ psignw (%rax), %xmm2
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11]
-# CHECK-NEXT: - - 30.67 13.67 16.00 16.00 - 67.67 - - - -
+# CHECK-NEXT: - - 30.67 25.67 16.00 16.00 - 55.67 - - - -
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] Instructions:
@@ -209,28 +209,28 @@ psignw (%rax), %xmm2
# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - - - palignr $1, (%rax), %xmm2
# CHECK-NEXT: - - 0.50 - - - - 2.50 - - - - phaddd %mm0, %mm2
# CHECK-NEXT: - - 0.50 - 0.50 0.50 - 2.50 - - - - phaddd (%rax), %mm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 2.33 - - - - phaddd %xmm0, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 2.33 - - - - phaddd (%rax), %xmm2
+# CHECK-NEXT: - - 0.33 1.33 - - - 1.33 - - - - phaddd %xmm0, %xmm2
+# CHECK-NEXT: - - 0.33 1.33 0.50 0.50 - 1.33 - - - - phaddd (%rax), %xmm2
# CHECK-NEXT: - - 1.00 - - - - 2.00 - - - - phaddsw %mm0, %mm2
# CHECK-NEXT: - - 1.00 - 0.50 0.50 - 2.00 - - - - phaddsw (%rax), %mm2
-# CHECK-NEXT: - - 0.50 0.50 - - - 2.00 - - - - phaddsw %xmm0, %xmm2
-# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 2.00 - - - - phaddsw (%rax), %xmm2
+# CHECK-NEXT: - - 0.50 1.50 - - - 1.00 - - - - phaddsw %xmm0, %xmm2
+# CHECK-NEXT: - - 0.50 1.50 0.50 0.50 - 1.00 - - - - phaddsw (%rax), %xmm2
# CHECK-NEXT: - - 0.50 - - - - 2.50 - - - - phaddw %mm0, %mm2
# CHECK-NEXT: - - 0.50 - 0.50 0.50 - 2.50 - - - - phaddw (%rax), %mm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 2.33 - - - - phaddw %xmm0, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 2.33 - - - - phaddw (%rax), %xmm2
+# CHECK-NEXT: - - 0.33 1.33 - - - 1.33 - - - - phaddw %xmm0, %xmm2
+# CHECK-NEXT: - - 0.33 1.33 0.50 0.50 - 1.33 - - - - phaddw (%rax), %xmm2
# CHECK-NEXT: - - 0.50 - - - - 2.50 - - - - phsubd %mm0, %mm2
# CHECK-NEXT: - - 0.50 - 0.50 0.50 - 2.50 - - - - phsubd (%rax), %mm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 2.33 - - - - phsubd %xmm0, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 2.33 - - - - phsubd (%rax), %xmm2
+# CHECK-NEXT: - - 0.33 1.33 - - - 1.33 - - - - phsubd %xmm0, %xmm2
+# CHECK-NEXT: - - 0.33 1.33 0.50 0.50 - 1.33 - - - - phsubd (%rax), %xmm2
# CHECK-NEXT: - - 1.00 - - - - 2.00 - - - - phsubsw %mm0, %mm2
# CHECK-NEXT: - - 1.00 - 0.50 0.50 - 2.00 - - - - phsubsw (%rax), %mm2
-# CHECK-NEXT: - - 0.50 0.50 - - - 2.00 - - - - phsubsw %xmm0, %xmm2
-# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 2.00 - - - - phsubsw (%rax), %xmm2
+# CHECK-NEXT: - - 0.50 1.50 - - - 1.00 - - - - phsubsw %xmm0, %xmm2
+# CHECK-NEXT: - - 0.50 1.50 0.50 0.50 - 1.00 - - - - phsubsw (%rax), %xmm2
# CHECK-NEXT: - - 0.50 - - - - 2.50 - - - - phsubw %mm0, %mm2
# CHECK-NEXT: - - 0.50 - 0.50 0.50 - 2.50 - - - - phsubw (%rax), %mm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 2.33 - - - - phsubw %xmm0, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 2.33 - - - - phsubw (%rax), %xmm2
+# CHECK-NEXT: - - 0.33 1.33 - - - 1.33 - - - - phsubw %xmm0, %xmm2
+# CHECK-NEXT: - - 0.33 1.33 0.50 0.50 - 1.33 - - - - phsubw (%rax), %xmm2
# CHECK-NEXT: - - 1.00 - - - - - - - - - pmaddubsw %mm0, %mm2
# CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - - - pmaddubsw (%rax), %mm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - pmaddubsw %xmm0, %xmm2
diff --git a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx1.s b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx1.s
index cabb002b8241..f4904e4467ff 100644
--- a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx1.s
+++ b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx1.s
@@ -1736,7 +1736,7 @@ vzeroupper
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9]
-# CHECK-NEXT: - 126.00 325.25 202.25 173.83 173.83 38.00 326.25 7.25 11.33
+# CHECK-NEXT: - 126.00 327.92 204.92 173.83 173.83 38.00 320.92 7.25 11.33
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions:
@@ -1906,22 +1906,22 @@ vzeroupper
# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 vextractf128 $1, %ymm0, (%rax)
# CHECK-NEXT: - - 1.00 - - - - 1.00 - - vextractps $1, %xmm0, %ecx
# CHECK-NEXT: - - - - 0.33 0.33 1.00 1.00 - 0.33 vextractps $1, %xmm0, (%rax)
-# CHECK-NEXT: - - 0.33 0.33 - - - 2.33 - - vhaddpd %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 2.33 - - vhaddpd (%rax), %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 2.33 - - vhaddpd %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 2.33 - - vhaddpd (%rax), %ymm1, %ymm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 2.33 - - vhaddps %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 2.33 - - vhaddps (%rax), %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 2.33 - - vhaddps %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 2.33 - - vhaddps (%rax), %ymm1, %ymm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 2.33 - - vhsubpd %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 2.33 - - vhsubpd (%rax), %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 2.33 - - vhsubpd %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 2.33 - - vhsubpd (%rax), %ymm1, %ymm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 2.33 - - vhsubps %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 2.33 - - vhsubps (%rax), %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 2.33 - - vhsubps %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 2.33 - - vhsubps (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 - - - 2.00 - - vhaddpd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 2.00 - - vhaddpd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - 2.00 - - vhaddpd %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 2.00 - - vhaddpd (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 - - - 2.00 - - vhaddps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 2.00 - - vhaddps (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - 2.00 - - vhaddps %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 2.00 - - vhaddps (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 - - - 2.00 - - vhsubpd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 2.00 - - vhsubpd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - 2.00 - - vhsubpd %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 2.00 - - vhsubpd (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 - - - 2.00 - - vhsubps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 2.00 - - vhsubps (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - 2.00 - - vhsubps %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 2.00 - - vhsubps (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - - - - - - 1.00 - - vinsertf128 $1, %xmm0, %ymm1, %ymm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vinsertf128 $1, (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - - - - - - 1.00 - - vinsertps $1, %xmm0, %xmm1, %xmm2
diff --git a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-sse3.s b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-sse3.s
index e6bec1953fb8..0b6b03508389 100644
--- a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-sse3.s
+++ b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-sse3.s
@@ -79,7 +79,7 @@ mwait
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9]
-# CHECK-NEXT: - - 6.67 6.67 5.00 5.00 - 24.67 4.00 -
+# CHECK-NEXT: - - 8.00 8.00 5.00 5.00 - 22.00 4.00 -
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions:
@@ -87,14 +87,14 @@ mwait
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - addsubpd (%rax), %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - addsubps %xmm0, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - addsubps (%rax), %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 2.33 - - haddpd %xmm0, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 2.33 - - haddpd (%rax), %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 2.33 - - haddps %xmm0, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 2.33 - - haddps (%rax), %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 2.33 - - hsubpd %xmm0, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 2.33 - - hsubpd (%rax), %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 2.33 - - hsubps %xmm0, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 2.33 - - hsubps (%rax), %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - 2.00 - - haddpd %xmm0, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 2.00 - - haddpd (%rax), %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - 2.00 - - haddps %xmm0, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 2.00 - - haddps (%rax), %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - 2.00 - - hsubpd %xmm0, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 2.00 - - hsubpd (%rax), %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - 2.00 - - hsubps %xmm0, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 2.00 - - hsubps (%rax), %xmm2
# CHECK-NEXT: - - - - 0.50 0.50 - - - - lddqu (%rax), %xmm2
# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - monitor
# CHECK-NEXT: - - - - - - - 1.00 - - movddup %xmm0, %xmm2
diff --git a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/subtarget.ll b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/subtarget.ll
index f1262c5a2fc2..f79f358e83a7 100644
--- a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/subtarget.ll
+++ b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/subtarget.ll
@@ -28,6 +28,11 @@ define amdgpu_kernel void @test_kernel() {
; RUN: llvm-objdump -D -mllvm --amdhsa-code-object-version=6 %t.o > %t-detect.txt
; RUN: diff %t-specify.txt %t-detect.txt
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1152 -filetype=obj -O0 -o %t.o %s
+; RUN: llvm-objdump -D --arch-name=amdgcn --mcpu=gfx1152 %t.o > %t-specify.txt
+; RUN: llvm-objdump -D %t.o > %t-detect.txt
+; RUN: diff %t-specify.txt %t-detect.txt
+
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1151 -filetype=obj -O0 -o %t.o %s
; RUN: llvm-objdump -D --arch-name=amdgcn --mcpu=gfx1151 %t.o > %t-specify.txt
; RUN: llvm-objdump -D %t.o > %t-detect.txt
diff --git a/llvm/test/tools/llvm-rc/windres-prefix.test b/llvm/test/tools/llvm-rc/windres-prefix.test
index 4c53fdfc3db6..7dda51d06352 100644
--- a/llvm/test/tools/llvm-rc/windres-prefix.test
+++ b/llvm/test/tools/llvm-rc/windres-prefix.test
@@ -1,5 +1,3 @@
-; REQUIRES: shell
-
; RUN: rm -rf %t && mkdir %t
; Check that a triple prefix on the executable gets picked up as target triple.
diff --git a/llvm/test/tools/llvm-readobj/ELF/AMDGPU/elf-headers.test b/llvm/test/tools/llvm-readobj/ELF/AMDGPU/elf-headers.test
index f9e90e21400e..50d437b50b69 100644
--- a/llvm/test/tools/llvm-readobj/ELF/AMDGPU/elf-headers.test
+++ b/llvm/test/tools/llvm-readobj/ELF/AMDGPU/elf-headers.test
@@ -421,6 +421,15 @@
# RUN: yaml2obj %s -o %t -DABI_VERSION=2 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1151
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=2 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1151 -DFLAG_VALUE=0x4A
+# RUN: yaml2obj %s -o %t -DABI_VERSION=0 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1152
+# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=0 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1152 -DFLAG_VALUE=0x55
+
+# RUN: yaml2obj %s -o %t -DABI_VERSION=1 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1152
+# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=1 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1152 -DFLAG_VALUE=0x55
+
+# RUN: yaml2obj %s -o %t -DABI_VERSION=2 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1152
+# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=2 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1152 -DFLAG_VALUE=0x55
+
# RUN: yaml2obj %s -o %t -DABI_VERSION=0 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1200
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=0 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1200 -DFLAG_VALUE=0x48
diff --git a/llvm/test/tools/llvm-reduce/remove-debug-info-nodes.ll b/llvm/test/tools/llvm-reduce/remove-debug-info-nodes.ll
index 1ceeca8b6561..127543c64c33 100644
--- a/llvm/test/tools/llvm-reduce/remove-debug-info-nodes.ll
+++ b/llvm/test/tools/llvm-reduce/remove-debug-info-nodes.ll
@@ -2,7 +2,7 @@
; DICompileUnit and DISuprogram.
;
; RUN: llvm-reduce --delta-passes=di-metadata --abort-on-invalid-reduction --test FileCheck --test-arg --check-prefixes=CHECK-INTERESTINGNESS --test-arg %s --test-arg --input-file %s -o %t
-; RUN: FileCheck <%t --enable-var-scope %s
+; RUN: FileCheck <%t --enable-var-scope %s --implicit-check-not=DIGlobalVariableExpression
; CHECK-INTERESTINGNESS: define void @test() !dbg [[SUBPROG:![0-9]+]]
; CHECK-INTERESTINGNESS: !llvm.module.flags = !{
@@ -21,12 +21,10 @@
; CHECK: !llvm.dbg.cu = !{[[CU:.+]]}
-; CHECK-DAG: [[CU]] = distinct !DICompileUnit(language: DW_LANG_C99,{{.*}}, retainedTypes: [[TYPES:![0-9]+]], globals: [[GLOBALS:![0-9]+]]
-; CHECK-DAG: [[EMPTY:![0-9]+]] = !{}
+; CHECK-DAG: [[CU]] = distinct !DICompileUnit(language: DW_LANG_C99,{{.*}}, retainedTypes: [[TYPES:![0-9]+]], globals: [[EMPTY:![0-9]+]]
+; CHECK-DAG: [[EMPTY]] = !{}
; CHECK-DAG: [[TYPES]] = !{[[T0:![0-9]+]]
; CHECK-DAG: [[T0]] = !DIBasicType(name: "unsigned int",
-; CHECK-DAG: [[GLOBALS]] = !{{{![0-9]+}}
-
; CHECK-DAG: [[SUBPROG]] = distinct !DISubprogram(name: "test", {{.*}}retainedNodes: [[EMPTY]])
define void @test() !dbg !17 {
diff --git a/llvm/test/tools/split-file/output-is-special.test b/llvm/test/tools/split-file/output-is-special.test
index 98bb4d36a4ff..0b1e0f786c4d 100644
--- a/llvm/test/tools/split-file/output-is-special.test
+++ b/llvm/test/tools/split-file/output-is-special.test
@@ -1,5 +1,4 @@
# UNSUPPORTED: system-windows
-# REQUIRES: shell
## Don't delete the output if it is special, otherwise root may accidentally
## remove important special files.
diff --git a/llvm/tools/llvm-readobj/ELFDumper.cpp b/llvm/tools/llvm-readobj/ELFDumper.cpp
index 40494da8b21b..c696934a959b 100644
--- a/llvm/tools/llvm-readobj/ELFDumper.cpp
+++ b/llvm/tools/llvm-readobj/ELFDumper.cpp
@@ -1631,6 +1631,7 @@ const EnumEntry<unsigned> ElfHeaderMipsFlags[] = {
ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1103, "gfx1103"), \
ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1150, "gfx1150"), \
ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1151, "gfx1151"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1152, "gfx1152"), \
ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1200, "gfx1200"), \
ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1201, "gfx1201"), \
ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC, "gfx9-generic"), \
diff --git a/llvm/tools/llvm-reduce/deltas/ReduceDIMetadata.cpp b/llvm/tools/llvm-reduce/deltas/ReduceDIMetadata.cpp
index f4d8496aba4a..38352d6342d4 100644
--- a/llvm/tools/llvm-reduce/deltas/ReduceDIMetadata.cpp
+++ b/llvm/tools/llvm-reduce/deltas/ReduceDIMetadata.cpp
@@ -65,12 +65,13 @@ void identifyUninterestingMDNodes(Oracle &O, MDNodeList &MDs) {
SmallVector<Metadata *, 16> TN;
for (size_t I = 0; I < Tup->getNumOperands(); ++I) {
// Ignore any operands that are not DebugInfo metadata nodes.
- if (isa_and_nonnull<DINode>(Tup->getOperand(I)))
- // Don't add uninteresting operands to the tuple.
- if (!O.shouldKeep())
- continue;
-
- TN.push_back(Tup->getOperand(I));
+ if (Metadata *Op = Tup->getOperand(I).get()) {
+ if (isa<DINode>(Op) || isa<DIGlobalVariableExpression>(Op))
+ // Don't add uninteresting operands to the tuple.
+ if (!O.shouldKeep())
+ continue;
+ TN.push_back(Op);
+ }
}
if (TN.size() != Tup->getNumOperands())
DbgNode->replaceOperandWith(OpIdx, DbgNode->get(DbgNode->getContext(), TN));
diff --git a/llvm/unittests/Bitcode/DataLayoutUpgradeTest.cpp b/llvm/unittests/Bitcode/DataLayoutUpgradeTest.cpp
index 7148e2902fa7..ca50187e5e5e 100644
--- a/llvm/unittests/Bitcode/DataLayoutUpgradeTest.cpp
+++ b/llvm/unittests/Bitcode/DataLayoutUpgradeTest.cpp
@@ -63,6 +63,11 @@ TEST(DataLayoutUpgradeTest, ValidDataLayoutUpgrade) {
"riscv64"),
"e-m:e-p:64:64-i64:64-i128:128-n32:64-S128");
+ // Check that LoongArch64 upgrades -n64 to -n32:64.
+ EXPECT_EQ(UpgradeDataLayoutString("e-m:e-p:64:64-i64:64-i128:128-n64-S128",
+ "loongarch64"),
+ "e-m:e-p:64:64-i64:64-i128:128-n32:64-S128");
+
// Check that SPIR && SPIRV targets add -G1 if it's not present.
EXPECT_EQ(UpgradeDataLayoutString("e-p:32:32", "spir"), "e-p:32:32-G1");
EXPECT_EQ(UpgradeDataLayoutString("e-p:32:32", "spir64"), "e-p:32:32-G1");
diff --git a/llvm/unittests/ProfileData/MemProfTest.cpp b/llvm/unittests/ProfileData/MemProfTest.cpp
index a913718d0fe0..e120bc3e3594 100644
--- a/llvm/unittests/ProfileData/MemProfTest.cpp
+++ b/llvm/unittests/ProfileData/MemProfTest.cpp
@@ -662,4 +662,111 @@ TEST(MemProf, MissingFrameId) {
ASSERT_TRUE(FrameIdConv.LastUnmappedId.has_value());
EXPECT_EQ(*FrameIdConv.LastUnmappedId, 3U);
}
+
+// Verify CallStackRadixTreeBuilder can handle empty inputs.
+TEST(MemProf, RadixTreeBuilderEmpty) {
+ llvm::DenseMap<FrameId, llvm::memprof::LinearFrameId> MemProfFrameIndexes;
+ llvm::MapVector<CallStackId, llvm::SmallVector<FrameId>> MemProfCallStackData;
+ llvm::memprof::CallStackRadixTreeBuilder Builder;
+ Builder.build(std::move(MemProfCallStackData), MemProfFrameIndexes);
+ ASSERT_THAT(Builder.getRadixArray(), testing::IsEmpty());
+ const auto &Mappings = Builder.getCallStackPos();
+ ASSERT_THAT(Mappings, testing::IsEmpty());
+}
+
+// Verify CallStackRadixTreeBuilder can handle one trivial call stack.
+TEST(MemProf, RadixTreeBuilderOne) {
+ llvm::DenseMap<FrameId, llvm::memprof::LinearFrameId> MemProfFrameIndexes = {
+ {11, 1}, {12, 2}, {13, 3}};
+ llvm::SmallVector<llvm::memprof::FrameId> CS1 = {13, 12, 11};
+ llvm::MapVector<CallStackId, llvm::SmallVector<FrameId>> MemProfCallStackData;
+ MemProfCallStackData.insert({llvm::memprof::hashCallStack(CS1), CS1});
+ llvm::memprof::CallStackRadixTreeBuilder Builder;
+ Builder.build(std::move(MemProfCallStackData), MemProfFrameIndexes);
+ EXPECT_THAT(Builder.getRadixArray(), testing::ElementsAreArray({
+ 3U, // Size of CS1,
+ 3U, // MemProfFrameIndexes[13]
+ 2U, // MemProfFrameIndexes[12]
+ 1U // MemProfFrameIndexes[11]
+ }));
+ const auto &Mappings = Builder.getCallStackPos();
+ ASSERT_THAT(Mappings, SizeIs(1));
+ EXPECT_THAT(Mappings, testing::Contains(testing::Pair(
+ llvm::memprof::hashCallStack(CS1), 0U)));
+}
+
+// Verify CallStackRadixTreeBuilder can form a link between two call stacks.
+TEST(MemProf, RadixTreeBuilderTwo) {
+ llvm::DenseMap<FrameId, llvm::memprof::LinearFrameId> MemProfFrameIndexes = {
+ {11, 1}, {12, 2}, {13, 3}};
+ llvm::SmallVector<llvm::memprof::FrameId> CS1 = {12, 11};
+ llvm::SmallVector<llvm::memprof::FrameId> CS2 = {13, 12, 11};
+ llvm::MapVector<CallStackId, llvm::SmallVector<FrameId>> MemProfCallStackData;
+ MemProfCallStackData.insert({llvm::memprof::hashCallStack(CS1), CS1});
+ MemProfCallStackData.insert({llvm::memprof::hashCallStack(CS2), CS2});
+ llvm::memprof::CallStackRadixTreeBuilder Builder;
+ Builder.build(std::move(MemProfCallStackData), MemProfFrameIndexes);
+ EXPECT_THAT(Builder.getRadixArray(),
+ testing::ElementsAreArray({
+ 2U, // Size of CS1
+ static_cast<uint32_t>(-3), // Jump 3 steps
+ 3U, // Size of CS2
+ 3U, // MemProfFrameIndexes[13]
+ 2U, // MemProfFrameIndexes[12]
+ 1U // MemProfFrameIndexes[11]
+ }));
+ const auto &Mappings = Builder.getCallStackPos();
+ ASSERT_THAT(Mappings, SizeIs(2));
+ EXPECT_THAT(Mappings, testing::Contains(testing::Pair(
+ llvm::memprof::hashCallStack(CS1), 0U)));
+ EXPECT_THAT(Mappings, testing::Contains(testing::Pair(
+ llvm::memprof::hashCallStack(CS2), 2U)));
+}
+
+// Verify CallStackRadixTreeBuilder can form a jump to a prefix that itself has
+// another jump to another prefix.
+TEST(MemProf, RadixTreeBuilderSuccessiveJumps) {
+ llvm::DenseMap<FrameId, llvm::memprof::LinearFrameId> MemProfFrameIndexes = {
+ {11, 1}, {12, 2}, {13, 3}, {14, 4}, {15, 5}, {16, 6}, {17, 7}, {18, 8},
+ };
+ llvm::SmallVector<llvm::memprof::FrameId> CS1 = {14, 13, 12, 11};
+ llvm::SmallVector<llvm::memprof::FrameId> CS2 = {15, 13, 12, 11};
+ llvm::SmallVector<llvm::memprof::FrameId> CS3 = {17, 16, 12, 11};
+ llvm::SmallVector<llvm::memprof::FrameId> CS4 = {18, 16, 12, 11};
+ llvm::MapVector<CallStackId, llvm::SmallVector<FrameId>> MemProfCallStackData;
+ MemProfCallStackData.insert({llvm::memprof::hashCallStack(CS1), CS1});
+ MemProfCallStackData.insert({llvm::memprof::hashCallStack(CS2), CS2});
+ MemProfCallStackData.insert({llvm::memprof::hashCallStack(CS3), CS3});
+ MemProfCallStackData.insert({llvm::memprof::hashCallStack(CS4), CS4});
+ llvm::memprof::CallStackRadixTreeBuilder Builder;
+ Builder.build(std::move(MemProfCallStackData), MemProfFrameIndexes);
+ EXPECT_THAT(Builder.getRadixArray(),
+ testing::ElementsAreArray({
+ 4U, // Size of CS1
+ 4U, // MemProfFrameIndexes[14]
+ static_cast<uint32_t>(-3), // Jump 3 steps
+ 4U, // Size of CS2
+ 5U, // MemProfFrameIndexes[15]
+ 3U, // MemProfFrameIndexes[13]
+ static_cast<uint32_t>(-7), // Jump 7 steps
+ 4U, // Size of CS3
+ 7U, // MemProfFrameIndexes[17]
+ static_cast<uint32_t>(-3), // Jump 3 steps
+ 4U, // Size of CS4
+ 8U, // MemProfFrameIndexes[18]
+ 6U, // MemProfFrameIndexes[16]
+ 2U, // MemProfFrameIndexes[12]
+ 1U // MemProfFrameIndexes[11]
+ }));
+ const auto &Mappings = Builder.getCallStackPos();
+ ASSERT_THAT(Mappings, SizeIs(4));
+ EXPECT_THAT(Mappings, testing::Contains(testing::Pair(
+ llvm::memprof::hashCallStack(CS1), 0U)));
+ EXPECT_THAT(Mappings, testing::Contains(testing::Pair(
+ llvm::memprof::hashCallStack(CS2), 3U)));
+ EXPECT_THAT(Mappings, testing::Contains(testing::Pair(
+ llvm::memprof::hashCallStack(CS3), 7U)));
+ EXPECT_THAT(Mappings, testing::Contains(testing::Pair(
+ llvm::memprof::hashCallStack(CS4), 10U)));
+}
} // namespace
diff --git a/llvm/unittests/Support/VirtualFileSystemTest.cpp b/llvm/unittests/Support/VirtualFileSystemTest.cpp
index e9fd9671ea6a..9e9b4fbcdedd 100644
--- a/llvm/unittests/Support/VirtualFileSystemTest.cpp
+++ b/llvm/unittests/Support/VirtualFileSystemTest.cpp
@@ -1138,6 +1138,11 @@ TEST_F(InMemoryFileSystemTest, DuplicatedFile) {
ASSERT_FALSE(FS.addFile("/a/b", 0, MemoryBuffer::getMemBuffer("a")));
ASSERT_TRUE(FS.addFile("/a", 0, MemoryBuffer::getMemBuffer("a")));
ASSERT_FALSE(FS.addFile("/a", 0, MemoryBuffer::getMemBuffer("b")));
+ ASSERT_TRUE(FS.addFile("/b/c/d", 0, MemoryBuffer::getMemBuffer("a")));
+ ASSERT_FALSE(FS.addFile("/b/c", 0, MemoryBuffer::getMemBuffer("a")));
+ ASSERT_TRUE(FS.addFile(
+ "/b/c", 0, MemoryBuffer::getMemBuffer(""), /*User=*/std::nullopt,
+ /*Group=*/std::nullopt, sys::fs::file_type::directory_file));
}
TEST_F(InMemoryFileSystemTest, DirectoryIteration) {
diff --git a/llvm/unittests/TargetParser/Host.cpp b/llvm/unittests/TargetParser/Host.cpp
index 6aa1d7a087eb..61921a99e171 100644
--- a/llvm/unittests/TargetParser/Host.cpp
+++ b/llvm/unittests/TargetParser/Host.cpp
@@ -125,6 +125,9 @@ TEST(getLinuxHostCPUName, AArch64) {
EXPECT_EQ(sys::detail::getHostCPUNameForARM("CPU implementer : 0xc0\n"
"CPU part : 0xac5"),
"ampere1b");
+ EXPECT_EQ(sys::detail::getHostCPUNameForARM("CPU implementer : 0x51\n"
+ "CPU part : 0x001"),
+ "oryon-1");
// MSM8992/4 weirdness
StringRef MSM8992ProcCpuInfo = R"(
diff --git a/llvm/unittests/TargetParser/TargetParserTest.cpp b/llvm/unittests/TargetParser/TargetParserTest.cpp
index 797d7dfbca20..571031d07fcc 100644
--- a/llvm/unittests/TargetParser/TargetParserTest.cpp
+++ b/llvm/unittests/TargetParser/TargetParserTest.cpp
@@ -1815,11 +1815,23 @@ INSTANTIATE_TEST_SUITE_P(
{AArch64::AEK_CRC, AArch64::AEK_AES, AArch64::AEK_SHA2,
AArch64::AEK_FP, AArch64::AEK_SIMD, AArch64::AEK_FP16,
AArch64::AEK_RAS, AArch64::AEK_LSE, AArch64::AEK_RDM}),
- "8.2-A")),
+ "8.2-A"),
+ ARMCPUTestParams<AArch64::ExtensionBitset>(
+ "oryon-1", "armv8.6-a", "crypto-neon-fp-armv8",
+ (AArch64::ExtensionBitset(
+ {AArch64::AEK_CRC, AArch64::AEK_FP, AArch64::AEK_PAUTH,
+ AArch64::AEK_FCMA, AArch64::AEK_JSCVT, AArch64::AEK_SIMD,
+ AArch64::AEK_RAS, AArch64::AEK_LSE, AArch64::AEK_RDM,
+ AArch64::AEK_RCPC, AArch64::AEK_DOTPROD, AArch64::AEK_SM4,
+ AArch64::AEK_SHA3, AArch64::AEK_BF16, AArch64::AEK_SHA2,
+ AArch64::AEK_AES, AArch64::AEK_I8MM, AArch64::AEK_RAND,
+ AArch64::AEK_PROFILE, AArch64::AEK_CRYPTO})),
+ "8.6-A")),
+
ARMCPUTestParams<AArch64::ExtensionBitset>::PrintToStringParamName);
// Note: number of CPUs includes aliases.
-static constexpr unsigned NumAArch64CPUArchs = 76;
+static constexpr unsigned NumAArch64CPUArchs = 77;
TEST(TargetParserTest, testAArch64CPUArchList) {
SmallVector<StringRef, NumAArch64CPUArchs> List;
diff --git a/llvm/utils/gn/secondary/bolt/lib/Core/BUILD.gn b/llvm/utils/gn/secondary/bolt/lib/Core/BUILD.gn
index 210dd1278509..e88df022be78 100644
--- a/llvm/utils/gn/secondary/bolt/lib/Core/BUILD.gn
+++ b/llvm/utils/gn/secondary/bolt/lib/Core/BUILD.gn
@@ -29,6 +29,7 @@ static_library("Core") {
"DynoStats.cpp",
"Exceptions.cpp",
"FunctionLayout.cpp",
+ "GDBIndex.cpp",
"HashUtilities.cpp",
"JumpTable.cpp",
"MCPlusBuilder.cpp",
diff --git a/llvm/utils/gn/secondary/clang/lib/AST/BUILD.gn b/llvm/utils/gn/secondary/clang/lib/AST/BUILD.gn
index 0cf99256c9bd..d3a3ee755808 100644
--- a/llvm/utils/gn/secondary/clang/lib/AST/BUILD.gn
+++ b/llvm/utils/gn/secondary/clang/lib/AST/BUILD.gn
@@ -110,6 +110,7 @@ static_library("AST") {
"Interp/InterpShared.cpp",
"Interp/InterpStack.cpp",
"Interp/InterpState.cpp",
+ "Interp/MemberPointer.cpp",
"Interp/Pointer.cpp",
"Interp/PrimType.cpp",
"Interp/Program.cpp",
diff --git a/llvm/utils/gn/secondary/llvm/lib/Transforms/IPO/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Transforms/IPO/BUILD.gn
index 0d134c7bdffb..bcf2ea751056 100644
--- a/llvm/utils/gn/secondary/llvm/lib/Transforms/IPO/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/lib/Transforms/IPO/BUILD.gn
@@ -33,6 +33,7 @@ static_library("IPO") {
"DeadArgumentElimination.cpp",
"ElimAvailExtern.cpp",
"EmbedBitcodePass.cpp",
+ "ExpandVariadics.cpp",
"ExtractGV.cpp",
"ForceFunctionAttrs.cpp",
"FunctionAttrs.cpp",
diff --git a/llvm/utils/lit/lit/llvm/config.py b/llvm/utils/lit/lit/llvm/config.py
index 1d4babc99984..afb7f078072f 100644
--- a/llvm/utils/lit/lit/llvm/config.py
+++ b/llvm/utils/lit/lit/llvm/config.py
@@ -588,7 +588,10 @@ class LLVMConfig(object):
if getattr(self.config, pp, None)
]
- self.with_environment("LD_LIBRARY_PATH", lib_paths, append_path=True)
+ if platform.system() == "AIX":
+ self.with_environment("LIBPATH", lib_paths, append_path=True)
+ else:
+ self.with_environment("LD_LIBRARY_PATH", lib_paths, append_path=True)
shl = getattr(self.config, "llvm_shlib_dir", None)
pext = getattr(self.config, "llvm_plugin_ext", None)
diff --git a/mlir/include/mlir/Dialect/LLVMIR/FunctionCallUtils.h b/mlir/include/mlir/Dialect/LLVMIR/FunctionCallUtils.h
index 123ce36cb0a7..852490cf7428 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/FunctionCallUtils.h
+++ b/mlir/include/mlir/Dialect/LLVMIR/FunctionCallUtils.h
@@ -33,36 +33,36 @@ class LLVMFuncOp;
/// external C function calls. The list of functions provided here must be
/// implemented separately (e.g. as part of a support runtime library or as part
/// of the libc).
-LLVM::LLVMFuncOp lookupOrCreatePrintI64Fn(ModuleOp moduleOp);
-LLVM::LLVMFuncOp lookupOrCreatePrintU64Fn(ModuleOp moduleOp);
-LLVM::LLVMFuncOp lookupOrCreatePrintF16Fn(ModuleOp moduleOp);
-LLVM::LLVMFuncOp lookupOrCreatePrintBF16Fn(ModuleOp moduleOp);
-LLVM::LLVMFuncOp lookupOrCreatePrintF32Fn(ModuleOp moduleOp);
-LLVM::LLVMFuncOp lookupOrCreatePrintF64Fn(ModuleOp moduleOp);
+LLVM::LLVMFuncOp lookupOrCreatePrintI64Fn(Operation *moduleOp);
+LLVM::LLVMFuncOp lookupOrCreatePrintU64Fn(Operation *moduleOp);
+LLVM::LLVMFuncOp lookupOrCreatePrintF16Fn(Operation *moduleOp);
+LLVM::LLVMFuncOp lookupOrCreatePrintBF16Fn(Operation *moduleOp);
+LLVM::LLVMFuncOp lookupOrCreatePrintF32Fn(Operation *moduleOp);
+LLVM::LLVMFuncOp lookupOrCreatePrintF64Fn(Operation *moduleOp);
/// Declares a function to print a C-string.
/// If a custom runtime function is defined via `runtimeFunctionName`, it must
/// have the signature void(char const*). The default function is `printString`.
LLVM::LLVMFuncOp
-lookupOrCreatePrintStringFn(ModuleOp moduleOp,
+lookupOrCreatePrintStringFn(Operation *moduleOp,
std::optional<StringRef> runtimeFunctionName = {});
-LLVM::LLVMFuncOp lookupOrCreatePrintOpenFn(ModuleOp moduleOp);
-LLVM::LLVMFuncOp lookupOrCreatePrintCloseFn(ModuleOp moduleOp);
-LLVM::LLVMFuncOp lookupOrCreatePrintCommaFn(ModuleOp moduleOp);
-LLVM::LLVMFuncOp lookupOrCreatePrintNewlineFn(ModuleOp moduleOp);
-LLVM::LLVMFuncOp lookupOrCreateMallocFn(ModuleOp moduleOp, Type indexType);
-LLVM::LLVMFuncOp lookupOrCreateAlignedAllocFn(ModuleOp moduleOp,
+LLVM::LLVMFuncOp lookupOrCreatePrintOpenFn(Operation *moduleOp);
+LLVM::LLVMFuncOp lookupOrCreatePrintCloseFn(Operation *moduleOp);
+LLVM::LLVMFuncOp lookupOrCreatePrintCommaFn(Operation *moduleOp);
+LLVM::LLVMFuncOp lookupOrCreatePrintNewlineFn(Operation *moduleOp);
+LLVM::LLVMFuncOp lookupOrCreateMallocFn(Operation *moduleOp, Type indexType);
+LLVM::LLVMFuncOp lookupOrCreateAlignedAllocFn(Operation *moduleOp,
Type indexType);
-LLVM::LLVMFuncOp lookupOrCreateFreeFn(ModuleOp moduleOp);
-LLVM::LLVMFuncOp lookupOrCreateGenericAllocFn(ModuleOp moduleOp,
+LLVM::LLVMFuncOp lookupOrCreateFreeFn(Operation *moduleOp);
+LLVM::LLVMFuncOp lookupOrCreateGenericAllocFn(Operation *moduleOp,
Type indexType);
-LLVM::LLVMFuncOp lookupOrCreateGenericAlignedAllocFn(ModuleOp moduleOp,
+LLVM::LLVMFuncOp lookupOrCreateGenericAlignedAllocFn(Operation *moduleOp,
Type indexType);
-LLVM::LLVMFuncOp lookupOrCreateGenericFreeFn(ModuleOp moduleOp);
-LLVM::LLVMFuncOp lookupOrCreateMemRefCopyFn(ModuleOp moduleOp, Type indexType,
+LLVM::LLVMFuncOp lookupOrCreateGenericFreeFn(Operation *moduleOp);
+LLVM::LLVMFuncOp lookupOrCreateMemRefCopyFn(Operation *moduleOp, Type indexType,
Type unrankedDescriptorType);
/// Create a FuncOp with signature `resultType`(`paramTypes`)` and name `name`.
-LLVM::LLVMFuncOp lookupOrCreateFn(ModuleOp moduleOp, StringRef name,
+LLVM::LLVMFuncOp lookupOrCreateFn(Operation *moduleOp, StringRef name,
ArrayRef<Type> paramTypes = {},
Type resultType = {}, bool isVarArg = false);
diff --git a/mlir/include/mlir/Dialect/Tensor/TransformOps/TensorTransformOps.td b/mlir/include/mlir/Dialect/Tensor/TransformOps/TensorTransformOps.td
index fea5afa0b7bb..81bab1b0c82f 100644
--- a/mlir/include/mlir/Dialect/Tensor/TransformOps/TensorTransformOps.td
+++ b/mlir/include/mlir/Dialect/Tensor/TransformOps/TensorTransformOps.td
@@ -114,12 +114,14 @@ def ApplyReassociativeReshapeFoldingPatternsOp : Op<Transform_Dialect,
def ApplyRewriteTensorOpsAsConstantPatternsOp : Op<Transform_Dialect,
"apply_patterns.tensor.rewrite_as_constant",
[DeclareOpInterfaceMethods<PatternDescriptorOpInterface>]> {
+ let arguments = (ins UnitAttr:$aggressive);
let description = [{
Indicates that tensor ops (such as tensor.generate) should be replaced with
constants (arith.constant) when possible.
}];
- let assemblyFormat = "attr-dict";
+ let assemblyFormat =
+ "(`aggressive` $aggressive^)? attr-dict";
}
def Transform_TensorPadOp : Transform_ConcreteOpType<"tensor.pad">;
diff --git a/mlir/include/mlir/Dialect/Tensor/Transforms/Transforms.h b/mlir/include/mlir/Dialect/Tensor/Transforms/Transforms.h
index 7dabc266c023..7f983b8b3cfd 100644
--- a/mlir/include/mlir/Dialect/Tensor/Transforms/Transforms.h
+++ b/mlir/include/mlir/Dialect/Tensor/Transforms/Transforms.h
@@ -91,9 +91,12 @@ void populateSimplifyPackAndUnpackPatterns(RewritePatternSet &patterns);
/// respectively.
void populateFoldIntoPackAndUnpackPatterns(RewritePatternSet &patterns);
+using ControlFoldFn = std::function<bool(OpOperand *)>;
+
/// Populates `patterns` with patterns that replace tensor ops (such as
/// tensor.generate) with constants when possible.
-void populateRewriteAsConstantPatterns(RewritePatternSet &patterns);
+void populateRewriteAsConstantPatterns(RewritePatternSet &patterns,
+ const ControlFoldFn &controlFn);
//===----------------------------------------------------------------------===//
// Transform helpers
diff --git a/mlir/include/mlir/Target/LLVMIR/Export.h b/mlir/include/mlir/Target/LLVMIR/Export.h
index 224496865513..893aaaa4faff 100644
--- a/mlir/include/mlir/Target/LLVMIR/Export.h
+++ b/mlir/include/mlir/Target/LLVMIR/Export.h
@@ -20,10 +20,11 @@ class Module;
namespace mlir {
class Operation;
-/// Translate operation that satisfies LLVM dialect module requirements into an
-/// LLVM IR module living in the given context. This translates operations from
-/// any dilalect that has a registered implementation of
-/// LLVMTranslationDialectInterface.
+/// Translates a given LLVM dialect `module` into an LLVM IR module living in
+/// the given context. Operates on any operation from dialects that provide a
+/// registered implementation of the LLVMTranslationDialectInterface. Returns
+/// nullptr when the translation fails.
+/// Verifies the produced LLVM module, except when `disableVerification` is set.
std::unique_ptr<llvm::Module>
translateModuleToLLVMIR(Operation *module, llvm::LLVMContext &llvmContext,
llvm::StringRef name = "LLVMDialectModule",
diff --git a/mlir/lib/Conversion/MemRefToLLVM/AllocLikeConversion.cpp b/mlir/lib/Conversion/MemRefToLLVM/AllocLikeConversion.cpp
index b29abc94ce40..e48ca5180b70 100644
--- a/mlir/lib/Conversion/MemRefToLLVM/AllocLikeConversion.cpp
+++ b/mlir/lib/Conversion/MemRefToLLVM/AllocLikeConversion.cpp
@@ -10,18 +10,14 @@
#include "mlir/Analysis/DataLayoutAnalysis.h"
#include "mlir/Dialect/LLVMIR/FunctionCallUtils.h"
#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
+#include "mlir/IR/SymbolTable.h"
using namespace mlir;
namespace {
-// TODO: Fix the LLVM utilities for looking up functions to take Operation*
-// with SymbolTable trait instead of ModuleOp and make similar change here. This
-// allows call sites to use getParentWithTrait<OpTrait::SymbolTable> instead
-// of getParentOfType<ModuleOp> to pass down the operation.
LLVM::LLVMFuncOp getNotalignedAllocFn(const LLVMTypeConverter *typeConverter,
- ModuleOp module, Type indexType) {
+ Operation *module, Type indexType) {
bool useGenericFn = typeConverter->getOptions().useGenericFunctions;
-
if (useGenericFn)
return LLVM::lookupOrCreateGenericAllocFn(module, indexType);
@@ -29,7 +25,7 @@ LLVM::LLVMFuncOp getNotalignedAllocFn(const LLVMTypeConverter *typeConverter,
}
LLVM::LLVMFuncOp getAlignedAllocFn(const LLVMTypeConverter *typeConverter,
- ModuleOp module, Type indexType) {
+ Operation *module, Type indexType) {
bool useGenericFn = typeConverter->getOptions().useGenericFunctions;
if (useGenericFn)
@@ -79,7 +75,8 @@ std::tuple<Value, Value> AllocationOpLLVMLowering::allocateBufferManuallyAlign(
// Allocate the underlying buffer.
Type elementPtrType = this->getElementPtrType(memRefType);
LLVM::LLVMFuncOp allocFuncOp = getNotalignedAllocFn(
- getTypeConverter(), op->getParentOfType<ModuleOp>(), getIndexType());
+ getTypeConverter(), op->getParentWithTrait<OpTrait::SymbolTable>(),
+ getIndexType());
auto results = rewriter.create<LLVM::CallOp>(loc, allocFuncOp, sizeBytes);
Value allocatedPtr =
@@ -144,7 +141,8 @@ Value AllocationOpLLVMLowering::allocateBufferAutoAlign(
Type elementPtrType = this->getElementPtrType(memRefType);
LLVM::LLVMFuncOp allocFuncOp = getAlignedAllocFn(
- getTypeConverter(), op->getParentOfType<ModuleOp>(), getIndexType());
+ getTypeConverter(), op->getParentWithTrait<OpTrait::SymbolTable>(),
+ getIndexType());
auto results = rewriter.create<LLVM::CallOp>(
loc, allocFuncOp, ValueRange({allocAlignment, sizeBytes}));
diff --git a/mlir/lib/Dialect/LLVMIR/IR/FunctionCallUtils.cpp b/mlir/lib/Dialect/LLVMIR/IR/FunctionCallUtils.cpp
index 0004c2e3403e..88421a16ccf9 100644
--- a/mlir/lib/Dialect/LLVMIR/IR/FunctionCallUtils.cpp
+++ b/mlir/lib/Dialect/LLVMIR/IR/FunctionCallUtils.cpp
@@ -45,49 +45,53 @@ static constexpr llvm::StringRef kGenericFree = "_mlir_memref_to_llvm_free";
static constexpr llvm::StringRef kMemRefCopy = "memrefCopy";
/// Generic print function lookupOrCreate helper.
-LLVM::LLVMFuncOp mlir::LLVM::lookupOrCreateFn(ModuleOp moduleOp, StringRef name,
+LLVM::LLVMFuncOp mlir::LLVM::lookupOrCreateFn(Operation *moduleOp,
+ StringRef name,
ArrayRef<Type> paramTypes,
Type resultType, bool isVarArg) {
- auto func = moduleOp.lookupSymbol<LLVM::LLVMFuncOp>(name);
+ assert(moduleOp->hasTrait<OpTrait::SymbolTable>() &&
+ "expected SymbolTable operation");
+ auto func = llvm::dyn_cast_or_null<LLVM::LLVMFuncOp>(
+ SymbolTable::lookupSymbolIn(moduleOp, name));
if (func)
return func;
- OpBuilder b(moduleOp.getBodyRegion());
+ OpBuilder b(moduleOp->getRegion(0));
return b.create<LLVM::LLVMFuncOp>(
moduleOp->getLoc(), name,
LLVM::LLVMFunctionType::get(resultType, paramTypes, isVarArg));
}
-LLVM::LLVMFuncOp mlir::LLVM::lookupOrCreatePrintI64Fn(ModuleOp moduleOp) {
+LLVM::LLVMFuncOp mlir::LLVM::lookupOrCreatePrintI64Fn(Operation *moduleOp) {
return lookupOrCreateFn(moduleOp, kPrintI64,
IntegerType::get(moduleOp->getContext(), 64),
LLVM::LLVMVoidType::get(moduleOp->getContext()));
}
-LLVM::LLVMFuncOp mlir::LLVM::lookupOrCreatePrintU64Fn(ModuleOp moduleOp) {
+LLVM::LLVMFuncOp mlir::LLVM::lookupOrCreatePrintU64Fn(Operation *moduleOp) {
return lookupOrCreateFn(moduleOp, kPrintU64,
IntegerType::get(moduleOp->getContext(), 64),
LLVM::LLVMVoidType::get(moduleOp->getContext()));
}
-LLVM::LLVMFuncOp mlir::LLVM::lookupOrCreatePrintF16Fn(ModuleOp moduleOp) {
+LLVM::LLVMFuncOp mlir::LLVM::lookupOrCreatePrintF16Fn(Operation *moduleOp) {
return lookupOrCreateFn(moduleOp, kPrintF16,
IntegerType::get(moduleOp->getContext(), 16), // bits!
LLVM::LLVMVoidType::get(moduleOp->getContext()));
}
-LLVM::LLVMFuncOp mlir::LLVM::lookupOrCreatePrintBF16Fn(ModuleOp moduleOp) {
+LLVM::LLVMFuncOp mlir::LLVM::lookupOrCreatePrintBF16Fn(Operation *moduleOp) {
return lookupOrCreateFn(moduleOp, kPrintBF16,
IntegerType::get(moduleOp->getContext(), 16), // bits!
LLVM::LLVMVoidType::get(moduleOp->getContext()));
}
-LLVM::LLVMFuncOp mlir::LLVM::lookupOrCreatePrintF32Fn(ModuleOp moduleOp) {
+LLVM::LLVMFuncOp mlir::LLVM::lookupOrCreatePrintF32Fn(Operation *moduleOp) {
return lookupOrCreateFn(moduleOp, kPrintF32,
Float32Type::get(moduleOp->getContext()),
LLVM::LLVMVoidType::get(moduleOp->getContext()));
}
-LLVM::LLVMFuncOp mlir::LLVM::lookupOrCreatePrintF64Fn(ModuleOp moduleOp) {
+LLVM::LLVMFuncOp mlir::LLVM::lookupOrCreatePrintF64Fn(Operation *moduleOp) {
return lookupOrCreateFn(moduleOp, kPrintF64,
Float64Type::get(moduleOp->getContext()),
LLVM::LLVMVoidType::get(moduleOp->getContext()));
@@ -103,72 +107,72 @@ static LLVM::LLVMPointerType getVoidPtr(MLIRContext *context) {
}
LLVM::LLVMFuncOp mlir::LLVM::lookupOrCreatePrintStringFn(
- ModuleOp moduleOp, std::optional<StringRef> runtimeFunctionName) {
+ Operation *moduleOp, std::optional<StringRef> runtimeFunctionName) {
return lookupOrCreateFn(moduleOp, runtimeFunctionName.value_or(kPrintString),
getCharPtr(moduleOp->getContext()),
LLVM::LLVMVoidType::get(moduleOp->getContext()));
}
-LLVM::LLVMFuncOp mlir::LLVM::lookupOrCreatePrintOpenFn(ModuleOp moduleOp) {
+LLVM::LLVMFuncOp mlir::LLVM::lookupOrCreatePrintOpenFn(Operation *moduleOp) {
return lookupOrCreateFn(moduleOp, kPrintOpen, {},
LLVM::LLVMVoidType::get(moduleOp->getContext()));
}
-LLVM::LLVMFuncOp mlir::LLVM::lookupOrCreatePrintCloseFn(ModuleOp moduleOp) {
+LLVM::LLVMFuncOp mlir::LLVM::lookupOrCreatePrintCloseFn(Operation *moduleOp) {
return lookupOrCreateFn(moduleOp, kPrintClose, {},
LLVM::LLVMVoidType::get(moduleOp->getContext()));
}
-LLVM::LLVMFuncOp mlir::LLVM::lookupOrCreatePrintCommaFn(ModuleOp moduleOp) {
+LLVM::LLVMFuncOp mlir::LLVM::lookupOrCreatePrintCommaFn(Operation *moduleOp) {
return lookupOrCreateFn(moduleOp, kPrintComma, {},
LLVM::LLVMVoidType::get(moduleOp->getContext()));
}
-LLVM::LLVMFuncOp mlir::LLVM::lookupOrCreatePrintNewlineFn(ModuleOp moduleOp) {
+LLVM::LLVMFuncOp mlir::LLVM::lookupOrCreatePrintNewlineFn(Operation *moduleOp) {
return lookupOrCreateFn(moduleOp, kPrintNewline, {},
LLVM::LLVMVoidType::get(moduleOp->getContext()));
}
-LLVM::LLVMFuncOp mlir::LLVM::lookupOrCreateMallocFn(ModuleOp moduleOp,
+LLVM::LLVMFuncOp mlir::LLVM::lookupOrCreateMallocFn(Operation *moduleOp,
Type indexType) {
return LLVM::lookupOrCreateFn(moduleOp, kMalloc, indexType,
getVoidPtr(moduleOp->getContext()));
}
-LLVM::LLVMFuncOp mlir::LLVM::lookupOrCreateAlignedAllocFn(ModuleOp moduleOp,
+LLVM::LLVMFuncOp mlir::LLVM::lookupOrCreateAlignedAllocFn(Operation *moduleOp,
Type indexType) {
return LLVM::lookupOrCreateFn(moduleOp, kAlignedAlloc, {indexType, indexType},
getVoidPtr(moduleOp->getContext()));
}
-LLVM::LLVMFuncOp mlir::LLVM::lookupOrCreateFreeFn(ModuleOp moduleOp) {
+LLVM::LLVMFuncOp mlir::LLVM::lookupOrCreateFreeFn(Operation *moduleOp) {
return LLVM::lookupOrCreateFn(
moduleOp, kFree, getVoidPtr(moduleOp->getContext()),
LLVM::LLVMVoidType::get(moduleOp->getContext()));
}
-LLVM::LLVMFuncOp mlir::LLVM::lookupOrCreateGenericAllocFn(ModuleOp moduleOp,
+LLVM::LLVMFuncOp mlir::LLVM::lookupOrCreateGenericAllocFn(Operation *moduleOp,
Type indexType) {
return LLVM::lookupOrCreateFn(moduleOp, kGenericAlloc, indexType,
getVoidPtr(moduleOp->getContext()));
}
LLVM::LLVMFuncOp
-mlir::LLVM::lookupOrCreateGenericAlignedAllocFn(ModuleOp moduleOp,
+mlir::LLVM::lookupOrCreateGenericAlignedAllocFn(Operation *moduleOp,
Type indexType) {
return LLVM::lookupOrCreateFn(moduleOp, kGenericAlignedAlloc,
{indexType, indexType},
getVoidPtr(moduleOp->getContext()));
}
-LLVM::LLVMFuncOp mlir::LLVM::lookupOrCreateGenericFreeFn(ModuleOp moduleOp) {
+LLVM::LLVMFuncOp mlir::LLVM::lookupOrCreateGenericFreeFn(Operation *moduleOp) {
return LLVM::lookupOrCreateFn(
moduleOp, kGenericFree, getVoidPtr(moduleOp->getContext()),
LLVM::LLVMVoidType::get(moduleOp->getContext()));
}
LLVM::LLVMFuncOp
-mlir::LLVM::lookupOrCreateMemRefCopyFn(ModuleOp moduleOp, Type indexType,
+mlir::LLVM::lookupOrCreateMemRefCopyFn(Operation *moduleOp, Type indexType,
Type unrankedDescriptorType) {
return LLVM::lookupOrCreateFn(
moduleOp, kMemRefCopy,
diff --git a/mlir/lib/Dialect/Tensor/TransformOps/TensorTransformOps.cpp b/mlir/lib/Dialect/Tensor/TransformOps/TensorTransformOps.cpp
index 5c6a32ce9a68..33016f84056e 100644
--- a/mlir/lib/Dialect/Tensor/TransformOps/TensorTransformOps.cpp
+++ b/mlir/lib/Dialect/Tensor/TransformOps/TensorTransformOps.cpp
@@ -127,7 +127,20 @@ void transform::ApplyReassociativeReshapeFoldingPatternsOp::populatePatterns(
void transform::ApplyRewriteTensorOpsAsConstantPatternsOp::populatePatterns(
RewritePatternSet &patterns) {
- tensor::populateRewriteAsConstantPatterns(patterns);
+ ControlFoldFn defaultControlFn = [](OpOperand *fusedOperand) {
+ Operation *producer = fusedOperand->get().getDefiningOp();
+ return producer && producer->hasOneUse();
+ };
+
+ ControlFoldFn aggressiveControlFn = [](OpOperand *fusedOperand) {
+ return true;
+ };
+
+ // Add folding with reshape by expansion patterns.
+ if (getAggressive())
+ tensor::populateRewriteAsConstantPatterns(patterns, aggressiveControlFn);
+ else
+ tensor::populateRewriteAsConstantPatterns(patterns, defaultControlFn);
}
//===----------------------------------------------------------------------===//
diff --git a/mlir/lib/Dialect/Tensor/Transforms/PackAndUnpackPatterns.cpp b/mlir/lib/Dialect/Tensor/Transforms/PackAndUnpackPatterns.cpp
index 5d6e3ec9756a..c681cadcb27c 100644
--- a/mlir/lib/Dialect/Tensor/Transforms/PackAndUnpackPatterns.cpp
+++ b/mlir/lib/Dialect/Tensor/Transforms/PackAndUnpackPatterns.cpp
@@ -48,6 +48,34 @@ static LogicalResult isPackOn1D(RewriterBase &rewriter, Operation *op,
return success();
}
+// If the `linalgOp` represents a transpose, return the permutation vector for
+// the transpose. Otherwise, return failure.
+static FailureOr<SmallVector<int64_t>>
+getTransposeOpPermutation(linalg::LinalgOp linalgOp) {
+ if (auto transposeOp = dyn_cast<linalg::TransposeOp>(linalgOp.getOperation()))
+ return SmallVector<int64_t>(transposeOp.getPermutation());
+ if (linalgOp.getNumParallelLoops() != linalgOp.getNumLoops())
+ return failure();
+
+ if (linalgOp.getNumDpsInputs() != 1 || linalgOp.getNumDpsInits() != 1)
+ return failure();
+ auto mapRange = linalgOp.getIndexingMapsArray();
+ if (!mapRange.front().isPermutation() || !mapRange.back().isPermutation() ||
+ mapRange.front() == mapRange.back()) {
+ return failure();
+ }
+ if (!llvm::hasSingleElement(linalgOp.getBlock()->getOperations()))
+ return failure();
+ AffineMap outMap = mapRange.back();
+ AffineMap inMap = mapRange.front();
+ // To get the permutation, look at each output index and find which
+ // dimension in the input we're reading from for that index.
+ return llvm::map_to_vector(outMap.getResults(),
+ [&](AffineExpr expr) -> int64_t {
+ return *inMap.getResultPosition(expr);
+ });
+}
+
/// Packing one-dimensional tensor can be expressed as an expand shape op.
struct SimplifyPackToExpandShape : public OpRewritePattern<PackOp> {
using OpRewritePattern<PackOp>::OpRewritePattern;
@@ -246,14 +274,10 @@ static bool checkAndPermute(ArrayRef<int64_t> permutation,
for (unsigned int i = 0; i < rank; ++i) {
int64_t remappedPosition = permutation[i];
-
- if (!inVec.empty()) {
- if (remappedPosition >= rank) {
- return false;
- }
+ if (remappedPosition >= rank)
+ return false;
+ if (!inVec.empty())
remappedPosition = inVec[remappedPosition];
- }
-
resVec.push_back(remappedPosition);
}
@@ -263,20 +287,25 @@ static bool checkAndPermute(ArrayRef<int64_t> permutation,
/// Fold 'pack' -> 'transpose' into 'pack' since 'pack' already has transpose
/// semantics.
struct FoldProducerPackWithConsumerLinalgTransposeOp
- : public OpRewritePattern<linalg::TransposeOp> {
- using OpRewritePattern<linalg::TransposeOp>::OpRewritePattern;
+ : public OpInterfaceRewritePattern<linalg::LinalgOp> {
+ using OpInterfaceRewritePattern<linalg::LinalgOp>::OpInterfaceRewritePattern;
- LogicalResult matchAndRewrite(linalg::TransposeOp transposeOp,
+ LogicalResult matchAndRewrite(linalg::LinalgOp linalgOp,
PatternRewriter &rewriter) const override {
- auto packOp = transposeOp.getOperand(0).getDefiningOp<PackOp>();
+ auto packOp = linalgOp->getOperand(0).getDefiningOp<PackOp>();
if (!packOp)
return failure();
+ FailureOr<SmallVector<int64_t>> maybePerm =
+ getTransposeOpPermutation(linalgOp);
+ if (failed(maybePerm))
+ return failure();
+
auto innerDimsPos = packOp.getInnerDimsPos();
auto mixedInnerTiles = packOp.getMixedTiles();
auto outerDimsPerm = packOp.getOuterDimsPerm();
- auto transposePerm = transposeOp.getPermutation();
+ auto transposePerm = maybePerm.value();
SmallVector<int64_t> newOuterDimsPermVec;
SmallVector<int64_t> newInnerDimsPosVec;
SmallVector<OpFoldResult> newMixedInnerTilesVec;
@@ -285,7 +314,7 @@ struct FoldProducerPackWithConsumerLinalgTransposeOp
if (!checkAndPermute(transposePerm, outerDimsPerm, newOuterDimsPermVec,
srcRank))
return rewriter.notifyMatchFailure(
- transposeOp,
+ linalgOp,
"Cannot fold in tensor.pack if a tile dimension was transposed "
"with a non-tile dimension in linalg.transpose.");
@@ -297,11 +326,11 @@ struct FoldProducerPackWithConsumerLinalgTransposeOp
}
Value output = packOp.createDestinationTensor(
- rewriter, transposeOp.getLoc(), packOp.getSource(),
- newMixedInnerTilesVec, newInnerDimsPosVec, newOuterDimsPermVec);
+ rewriter, linalgOp.getLoc(), packOp.getSource(), newMixedInnerTilesVec,
+ newInnerDimsPosVec, newOuterDimsPermVec);
rewriter.replaceOpWithNewOp<PackOp>(
- transposeOp, packOp.getSource(), output, newInnerDimsPosVec,
+ linalgOp, packOp.getSource(), output, newInnerDimsPosVec,
newMixedInnerTilesVec, packOp.getPaddingValue(), newOuterDimsPermVec);
return success();
@@ -316,12 +345,16 @@ struct FoldConsumerPackWithProducerLinalgTransposeOp
LogicalResult matchAndRewrite(PackOp packOp,
PatternRewriter &rewriter) const override {
- auto transposeOp = packOp.getSource().getDefiningOp<linalg::TransposeOp>();
+ auto linalgOp = packOp.getSource().getDefiningOp<linalg::LinalgOp>();
+ if (!linalgOp)
+ return failure();
- if (!transposeOp)
+ FailureOr<SmallVector<int64_t>> maybePerm =
+ getTransposeOpPermutation(linalgOp);
+ if (failed(maybePerm))
return failure();
- auto transposePermutation = transposeOp.getPermutation();
+ auto transposePermutation = maybePerm.value();
auto outerDimsPerm = packOp.getOuterDimsPerm();
auto innerDimsPos = packOp.getInnerDimsPos();
SmallVector<int64_t> newInnerDimsPosVec;
@@ -337,11 +370,11 @@ struct FoldConsumerPackWithProducerLinalgTransposeOp
newInnerDimsPosVec.push_back(transposePermutation[dim]);
Value output = packOp.createDestinationTensor(
- rewriter, packOp.getLoc(), transposeOp.getOperand(0),
+ rewriter, packOp.getLoc(), linalgOp->getOperand(0),
packOp.getMixedTiles(), newInnerDimsPosVec, newOuterDimsPermVec);
rewriter.replaceOpWithNewOp<PackOp>(
- packOp, transposeOp.getOperand(0), output, newInnerDimsPosVec,
+ packOp, linalgOp->getOperand(0), output, newInnerDimsPosVec,
packOp.getMixedTiles(), packOp.getPaddingValue(), newOuterDimsPermVec);
return success();
@@ -351,34 +384,38 @@ struct FoldConsumerPackWithProducerLinalgTransposeOp
/// Fold 'unpack' -> 'transpose' into 'unpack' since 'unpack' already has
/// transpose semantics.
struct FoldProducerUnPackWithConsumerLinalgTransposeOp
- : public OpRewritePattern<linalg::TransposeOp> {
- using OpRewritePattern<linalg::TransposeOp>::OpRewritePattern;
+ : public OpInterfaceRewritePattern<linalg::LinalgOp> {
+ using OpInterfaceRewritePattern<linalg::LinalgOp>::OpInterfaceRewritePattern;
- LogicalResult matchAndRewrite(linalg::TransposeOp transposeOp,
+ LogicalResult matchAndRewrite(linalg::LinalgOp linalgOp,
PatternRewriter &rewriter) const override {
- auto unPackOp = transposeOp.getOperand(0).getDefiningOp<UnPackOp>();
+ auto unPackOp = linalgOp->getOperand(0).getDefiningOp<UnPackOp>();
if (!unPackOp)
return failure();
- auto transposePermutation = transposeOp.getPermutation();
+ FailureOr<SmallVector<int64_t>> maybePerm =
+ getTransposeOpPermutation(linalgOp);
+ if (failed(maybePerm))
+ return failure();
+
auto outerDimsPerm = unPackOp.getOuterDimsPerm();
auto innerDimsPos = unPackOp.getInnerDimsPos();
SmallVector<int64_t> newInnerDimsPosVec;
SmallVector<int64_t> newOuterDimsPermVec =
- llvm::to_vector(transposePermutation);
-
- if (!outerDimsPerm.empty())
- applyPermutationToVector(newOuterDimsPermVec, outerDimsPerm);
+ invertPermutationVector(maybePerm.value());
// Can't use applyPermutationToVector for newInnerDimsPosVec since input and
// permutation rank won't necessarily be equal in all cases.
for (auto dim : innerDimsPos)
- newInnerDimsPosVec.push_back(transposePermutation[dim]);
+ newInnerDimsPosVec.push_back(newOuterDimsPermVec[dim]);
+
+ if (!outerDimsPerm.empty())
+ applyPermutationToVector(newOuterDimsPermVec, outerDimsPerm);
// Reuse the destination of the transpose op.
rewriter.replaceOpWithNewOp<UnPackOp>(
- transposeOp, unPackOp.getSource(), transposeOp.getDpsInits()[0],
+ linalgOp, unPackOp.getSource(), linalgOp.getDpsInits()[0],
newInnerDimsPosVec, unPackOp.getMixedTiles(), newOuterDimsPermVec);
return success();
@@ -393,13 +430,17 @@ struct FoldConsumerUnPackWithProducerLinalgTransposeOp
LogicalResult matchAndRewrite(UnPackOp unPackOp,
PatternRewriter &rewriter) const override {
- auto transposeOp =
- unPackOp.getSource().getDefiningOp<linalg::TransposeOp>();
+ auto linalgOp = unPackOp.getSource().getDefiningOp<linalg::LinalgOp>();
+ if (!linalgOp)
+ return failure();
- if (!transposeOp)
+ FailureOr<SmallVector<int64_t>> maybePerm =
+ getTransposeOpPermutation(linalgOp);
+ if (failed(maybePerm))
return failure();
- auto transposePermutation = transposeOp.getPermutation();
+ SmallVector<int64_t> inverseTransposePerm =
+ invertPermutationVector(maybePerm.value());
auto outerDimsPerm = unPackOp.getOuterDimsPerm();
auto innerDimsPos = unPackOp.getInnerDimsPos();
int64_t destRank = unPackOp.getSourceRank() - innerDimsPos.size();
@@ -408,7 +449,7 @@ struct FoldConsumerUnPackWithProducerLinalgTransposeOp
SmallVector<int64_t> newInnerDimsPosVec;
SmallVector<OpFoldResult> newMixedInnerTilesVec;
- if (!checkAndPermute(transposePermutation, outerDimsPerm,
+ if (!checkAndPermute(inverseTransposePerm, outerDimsPerm,
newOuterDimsPermVec, destRank))
return rewriter.notifyMatchFailure(
unPackOp,
@@ -416,18 +457,18 @@ struct FoldConsumerUnPackWithProducerLinalgTransposeOp
"with a non-tile dimension in linalg.transpose.");
// Process transpose operation for tiled inner dimensions
- for (unsigned int i = destRank; i < transposePermutation.size(); ++i) {
- int64_t remappedPosition = transposePermutation[i] - destRank;
+ for (unsigned int i = destRank; i < inverseTransposePerm.size(); ++i) {
+ int64_t remappedPosition = inverseTransposePerm[i] - destRank;
newMixedInnerTilesVec.push_back(mixedInnerTilesVec[remappedPosition]);
newInnerDimsPosVec.push_back(innerDimsPos[remappedPosition]);
}
Value output = unPackOp.createDestinationTensor(
- rewriter, unPackOp.getLoc(), transposeOp.getOperand(0),
+ rewriter, unPackOp.getLoc(), linalgOp->getOperand(0),
newMixedInnerTilesVec, newInnerDimsPosVec, newOuterDimsPermVec);
rewriter.replaceOpWithNewOp<UnPackOp>(
- unPackOp, transposeOp.getOperand(0), output, newInnerDimsPosVec,
+ unPackOp, linalgOp->getOperand(0), output, newInnerDimsPosVec,
newMixedInnerTilesVec, newOuterDimsPermVec);
return success();
diff --git a/mlir/lib/Dialect/Tensor/Transforms/RewriteAsConstant.cpp b/mlir/lib/Dialect/Tensor/Transforms/RewriteAsConstant.cpp
index 11e1de543ac9..7c9fced540ad 100644
--- a/mlir/lib/Dialect/Tensor/Transforms/RewriteAsConstant.cpp
+++ b/mlir/lib/Dialect/Tensor/Transforms/RewriteAsConstant.cpp
@@ -8,9 +8,12 @@
//
#include "mlir/Dialect/Tensor/IR/Tensor.h"
#include "mlir/Dialect/Tensor/Transforms/Transforms.h"
+#include "mlir/Dialect/Utils/IndexingUtils.h"
#include "mlir/IR/Matchers.h"
#include "mlir/IR/PatternMatch.h"
+#include "llvm/ADT/TypeSwitch.h"
+
using namespace mlir;
using namespace mlir::tensor;
@@ -45,9 +48,169 @@ struct GenerateToConstant : public OpRewritePattern<GenerateOp> {
}
};
+/// Transform a linear index from one indexing space to another given:
+///
+/// - the shape of the source indexing space,
+/// - the strides of the target indexing space,
+/// - a linear index into the source indexing space.
+///
+/// This function is logically a sequence of linearize/delinearize over
+/// different bases but avoids allocating intermediate SmallVectors.
+int64_t transformIndexSpace(ArrayRef<int64_t> inputShape,
+ ArrayRef<int64_t> outputStrides,
+ int64_t srcLinearIndex) {
+ assert(inputShape.size() == outputStrides.size());
+
+ int64_t dstLinearIndex = 0;
+
+ for (int64_t dim = inputShape.size() - 1; dim >= 0; --dim) {
+ // Compute the index into the current dimension of the source tensor.
+ // `quotient` is the remaining linear index after accounting for the
+ // current dimension.
+ //
+ // `remainder` is the index into the source tensor for the current
+ // dimension.
+ auto [quotient, remainder] = std::div(srcLinearIndex, inputShape[dim]);
+
+ srcLinearIndex = quotient;
+
+ // Add the contribution of the current dimension to the output using the
+ // permutation map.
+ dstLinearIndex += outputStrides[dim] * remainder;
+ }
+
+ return dstLinearIndex;
+}
+
+template <typename ElemType, typename AttrType>
+Value constantFoldPadOp(PatternRewriter &rewriter, Location loc,
+ DenseElementsAttr input, AttrType padValue,
+ ArrayRef<int64_t> padLow, ArrayRef<int64_t> padHigh) {
+ auto inputValues = input.tryGetValues<ElemType>();
+ if (failed(inputValues))
+ return nullptr;
+
+ auto oldShape = input.getType().getShape();
+
+ // Compute the output shape of the new value.
+ auto newShape =
+ llvm::map_to_vector(llvm::zip(oldShape, padLow, padHigh),
+ [](std::tuple<int64_t, int64_t, int64_t> pack) {
+ auto [old, low, high] = pack;
+ return old + low + high;
+ });
+
+ int64_t outputSize = computeProduct(newShape);
+
+ // Fully initialize the vector with the padding value.
+ // The non-padded area will then be copied.
+ SmallVector<ElemType> values(outputSize, padValue.getValue());
+
+ // Strides for input and output are used to transform between the indexing
+ // space of the input and output tensors.
+ SmallVector<int64_t> outputStrides = computeStrides(newShape);
+
+ // The contribution of the low padding to the offset in the output tensor.
+ // This is the starting position of the source tensor within the padding
+ // tensor.
+ int64_t startingOffset = linearize(padLow, outputStrides);
+
+ // Copy values from the input tensor to the corresponding sub-region
+ // of the output tensor.
+ for (auto [inputIndex, inputValue] : llvm::enumerate(*inputValues)) {
+ auto outputIndex = transformIndexSpace(oldShape, outputStrides, inputIndex);
+ values[outputIndex + startingOffset] = inputValue;
+ }
+
+ // Create an attribute for the folded value.
+ auto newType = input.getType().clone(newShape);
+ auto newAttr = DenseElementsAttr::get(newType, values);
+
+ Operation *constantOp =
+ rewriter.getContext()
+ ->getLoadedDialect<TensorDialect>()
+ ->materializeConstant(rewriter, newAttr, newType, loc);
+
+ return constantOp ? constantOp->getResult(0) : nullptr;
+}
+
+struct PadOpToConstant final : public OpRewritePattern<PadOp> {
+
+ PadOpToConstant(MLIRContext *context, const ControlFoldFn &controlFn,
+ PatternBenefit benefit = 1)
+ : OpRewritePattern<PadOp>(context, benefit), controlFn{controlFn} {}
+
+ LogicalResult matchAndRewrite(PadOp padTensorOp,
+ PatternRewriter &rewriter) const override {
+ if (padTensorOp.getNofold())
+ return rewriter.notifyMatchFailure(
+ padTensorOp, "refusing to fold nofold pad operation");
+
+ TypedValue<RankedTensorType> input = padTensorOp.getSource();
+ RankedTensorType resultType = padTensorOp.getResult().getType();
+
+ DenseElementsAttr inputAttr = nullptr;
+ if (!matchPattern(input, m_Constant(&inputAttr)))
+ return failure();
+
+ Value paddingValue = padTensorOp.getConstantPaddingValue();
+
+ // Extract the constant value used for padding or bail out.
+ Attribute paddingAttr = nullptr;
+ if (!paddingValue || !matchPattern(paddingValue, m_Constant(&paddingAttr)))
+ return rewriter.notifyMatchFailure(padTensorOp,
+ "unable to get constant value");
+
+ // Try to extract the constant values of the low and high padding.
+ auto lowPad = getConstantIntValues(padTensorOp.getMixedLowPad());
+ auto highPad = getConstantIntValues(padTensorOp.getMixedHighPad());
+
+ // If the padding cannot be extracted, bail out.
+ if (!lowPad || !highPad)
+ return rewriter.notifyMatchFailure(padTensorOp,
+ "unable to extract constant padding");
+
+ // We have a potential candidate, consult the control function to
+ // determine if the op should fold.
+ if (!controlFn(&padTensorOp.getSourceMutable()))
+ return rewriter.notifyMatchFailure(padTensorOp,
+ "not folding due to cost function");
+
+ Location loc = padTensorOp.getLoc();
+
+ // Try constant folding the supported cases of integer and float values.
+ Value newOp =
+ llvm::TypeSwitch<Attribute, Value>(paddingAttr)
+ .Case([&](FloatAttr floatAttr) {
+ return constantFoldPadOp<llvm::APFloat>(
+ rewriter, loc, inputAttr, floatAttr, *lowPad, *highPad);
+ })
+ .Case([&](IntegerAttr integerAttr) {
+ return constantFoldPadOp<llvm::APInt>(
+ rewriter, loc, inputAttr, integerAttr, *lowPad, *highPad);
+ })
+ .Default(Value());
+
+ if (!newOp)
+ return rewriter.notifyMatchFailure(padTensorOp,
+ "tensor type not supported");
+
+ if (newOp.getType() != resultType)
+ newOp = rewriter.create<tensor::CastOp>(loc, resultType, newOp);
+
+ rewriter.replaceOp(padTensorOp, newOp);
+ return success();
+ }
+
+private:
+ ControlFoldFn controlFn;
+};
+
} // namespace
void mlir::tensor::populateRewriteAsConstantPatterns(
- RewritePatternSet &patterns) {
+ RewritePatternSet &patterns, const ControlFoldFn &controlFn) {
patterns.add<GenerateToConstant>(patterns.getContext());
+
+ patterns.add<PadOpToConstant>(patterns.getContext(), controlFn);
}
diff --git a/mlir/lib/Dialect/Utils/IndexingUtils.cpp b/mlir/lib/Dialect/Utils/IndexingUtils.cpp
index 4c960659d80c..aba225be720c 100644
--- a/mlir/lib/Dialect/Utils/IndexingUtils.cpp
+++ b/mlir/lib/Dialect/Utils/IndexingUtils.cpp
@@ -92,7 +92,7 @@ int64_t mlir::computeProduct(ArrayRef<int64_t> basis) {
assert(llvm::all_of(basis, [](int64_t s) { return s > 0; }) &&
"basis must be nonnegative");
if (basis.empty())
- return 0;
+ return 1;
return std::accumulate(basis.begin(), basis.end(), 1,
std::multiplies<int64_t>());
}
diff --git a/mlir/lib/IR/AsmPrinter.cpp b/mlir/lib/IR/AsmPrinter.cpp
index 29e36210f127..6a362afc52f2 100644
--- a/mlir/lib/IR/AsmPrinter.cpp
+++ b/mlir/lib/IR/AsmPrinter.cpp
@@ -2061,7 +2061,8 @@ void AsmPrinter::Impl::printLocationInternal(LocationAttr loc, bool pretty,
/// Print a floating point value in a way that the parser will be able to
/// round-trip losslessly.
-static void printFloatValue(const APFloat &apValue, raw_ostream &os) {
+static void printFloatValue(const APFloat &apValue, raw_ostream &os,
+ bool *printedHex = nullptr) {
// We would like to output the FP constant value in exponential notation,
// but we cannot do this if doing so will lose precision. Check here to
// make sure that we only output it in exponential format if we can parse
@@ -2102,6 +2103,8 @@ static void printFloatValue(const APFloat &apValue, raw_ostream &os) {
// Print special values in hexadecimal format. The sign bit should be included
// in the literal.
+ if (printedHex)
+ *printedHex = true;
SmallVector<char, 16> str;
APInt apInt = apValue.bitcastToAPInt();
apInt.toString(str, /*Radix=*/16, /*Signed=*/false,
@@ -2275,10 +2278,12 @@ void AsmPrinter::Impl::printAttributeImpl(Attribute attr,
return;
} else if (auto floatAttr = llvm::dyn_cast<FloatAttr>(attr)) {
- printFloatValue(floatAttr.getValue(), os);
+ bool printedHex = false;
+ printFloatValue(floatAttr.getValue(), os, &printedHex);
// FloatAttr elides the type if F64.
- if (typeElision == AttrTypeElision::May && floatAttr.getType().isF64())
+ if (typeElision == AttrTypeElision::May && floatAttr.getType().isF64() &&
+ !printedHex)
return;
} else if (auto strAttr = llvm::dyn_cast<StringAttr>(attr)) {
diff --git a/mlir/test/Dialect/Tensor/fold-into-pack-and-unpack.mlir b/mlir/test/Dialect/Tensor/fold-into-pack-and-unpack.mlir
index 9a3143f5e550..629a4c213572 100644
--- a/mlir/test/Dialect/Tensor/fold-into-pack-and-unpack.mlir
+++ b/mlir/test/Dialect/Tensor/fold-into-pack-and-unpack.mlir
@@ -636,3 +636,142 @@ func.func @tensor_padded_unpack_linalg_transpose_fold(%arg0: tensor<71x7x4x16x16
// CHECK-SAME: into %[[OUT:.+]] : tensor<71x7x4x16x16xf32> -> tensor<100x71x64xf32>
// CHECK: return %[[UNPACK]] : tensor<100x71x64xf32>
// CHECK: }
+
+// -----
+
+func.func @non_involution_transpose_unpack_fold(%arg0: tensor<2x3x5x4x16xi32>) -> tensor<5x48x8xi32> {
+ %0 = tensor.empty() : tensor<5x2x3x16x4xi32>
+ %transposed = linalg.transpose ins(%arg0 : tensor<2x3x5x4x16xi32>)
+ outs(%0 : tensor<5x2x3x16x4xi32>)
+ permutation = [2, 0, 1, 4, 3]
+ %1 = tensor.empty() : tensor<5x48x8xi32>
+ %unpack = tensor.unpack %transposed
+ outer_dims_perm = [0, 2, 1]
+ inner_dims_pos = [1, 2]
+ inner_tiles = [16, 4] into
+ %1 : tensor<5x2x3x16x4xi32> -> tensor<5x48x8xi32>
+ return %unpack : tensor<5x48x8xi32>
+}
+//CHECK-LABEL: func.func @non_involution_transpose_unpack_fold(
+// CHECK-SAME: %[[ARG0:.+]]: tensor<2x3x5x4x16xi32>) -> tensor<5x48x8xi32> {
+// CHECK: %[[OUT:.+]] = tensor.empty() : tensor<5x48x8xi32>
+// CHECK: %[[UNPACK:.+]] = tensor.unpack %[[ARG0]]
+// CHECK-SAME: outer_dims_perm = [2, 1, 0]
+// CHECK-SAME: inner_dims_pos = [2, 1]
+// CHECK-SAME: inner_tiles = [4, 16]
+// CHEKC-SAME: into %[[OUT]] : tensor<2x3x5x4x16xi32> -> tensor<5x48x8xi32>
+// CHECK: return %[[UNPACK]] : tensor<5x48x8xi32>
+// CHECK: }
+
+// -----
+
+func.func @unpack_non_involution_transpose_fold(%arg0: tensor<57x3x56x1x64xf32>) -> tensor<3648x3x56xf32> {
+ %0 = tensor.empty() : tensor<3x56x3648xf32>
+ %unpack = tensor.unpack %arg0
+ outer_dims_perm = [2, 0, 1]
+ inner_dims_pos = [1, 2]
+ inner_tiles = [1, 64]
+ into %0 : tensor<57x3x56x1x64xf32> -> tensor<3x56x3648xf32>
+
+ %1 = tensor.empty() : tensor<3648x3x56xf32>
+ %transposed = linalg.transpose
+ ins(%unpack : tensor<3x56x3648xf32>)
+ outs(%1 : tensor<3648x3x56xf32>)
+ permutation = [2, 0, 1]
+ return %transposed : tensor<3648x3x56xf32>
+}
+// CHECK-LABEL: func.func @unpack_non_involution_transpose_fold(
+// CHECK-SAME: %[[ARG0:.+]]: tensor<57x3x56x1x64xf32>) -> tensor<3648x3x56xf32> {
+// CHECK: %[[OUT:.+]] = tensor.empty() : tensor<3648x3x56xf32>
+// CHECK: %[[UNPACK:.+]] = tensor.unpack %[[ARG0]]
+// CHECK-SAME: outer_dims_perm = [0, 1, 2]
+// CHECK-SAME: inner_dims_pos = [2, 0]
+// CHECK-SAME: inner_tiles = [1, 64]
+// CHECK-SAME: into %[[OUT:.+]] : tensor<57x3x56x1x64xf32> -> tensor<3648x3x56xf32>
+// CHECK: return %[[UNPACK]] : tensor<3648x3x56xf32>
+// CHECK: }
+
+// -----
+
+func.func @transpose_unpacked_dims_no_fold(%arg0: tensor<2x16x5x4x3xi32>) -> tensor<5x32x12xi32> {
+ %0 = tensor.empty() : tensor<5x2x3x16x4xi32>
+ %transposed = linalg.transpose ins(%arg0 : tensor<2x16x5x4x3xi32>)
+ outs(%0 : tensor<5x2x3x16x4xi32>)
+ permutation = [2, 0, 4, 1, 3]
+ %1 = tensor.empty() : tensor<5x32x12xi32>
+ %unpack = tensor.unpack %transposed
+ inner_dims_pos = [1, 2]
+ inner_tiles = [16, 4] into
+ %1 : tensor<5x2x3x16x4xi32> -> tensor<5x32x12xi32>
+ return %unpack : tensor<5x32x12xi32>
+}
+//CHECK-LABEL: func.func @transpose_unpacked_dims_no_fold(
+// CHECK: linalg.transpose
+// CHECK: tensor.unpack
+
+// -----
+
+#map = affine_map<(d0, d1, d2, d3, d4)->(d1, d2, d0, d4, d3)>
+#map1 = affine_map<(d0, d1, d2, d3, d4)->(d0, d1, d2, d3, d4)>
+func.func @generic_transpose_unpack_fold(%arg0: tensor<2x3x5x4x16xi32>) -> tensor<5x48x8xi32> {
+ %0 = tensor.empty() : tensor<5x2x3x16x4xi32>
+ %transposed = linalg.generic {
+ iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel"],
+ indexing_maps = [#map, #map1]}
+ ins(%arg0 : tensor<2x3x5x4x16xi32>)
+ outs(%0 : tensor<5x2x3x16x4xi32>) {
+ ^bb0(%in : i32, %out : i32):
+ linalg.yield %in : i32
+ } -> tensor<5x2x3x16x4xi32>
+ %1 = tensor.empty() : tensor<5x48x8xi32>
+ %unpack = tensor.unpack %transposed
+ outer_dims_perm = [0, 2, 1]
+ inner_dims_pos = [1, 2]
+ inner_tiles = [16, 4] into
+ %1 : tensor<5x2x3x16x4xi32> -> tensor<5x48x8xi32>
+ return %unpack : tensor<5x48x8xi32>
+}
+//CHECK-LABEL: func.func @generic_transpose_unpack_fold(
+// CHECK-SAME: %[[ARG0:.+]]: tensor<2x3x5x4x16xi32>) -> tensor<5x48x8xi32> {
+// CHECK: %[[OUT:.+]] = tensor.empty() : tensor<5x48x8xi32>
+// CHECK: %[[UNPACK:.+]] = tensor.unpack %[[ARG0]]
+// CHECK-SAME: outer_dims_perm = [2, 1, 0]
+// CHECK-SAME: inner_dims_pos = [2, 1]
+// CHECK-SAME: inner_tiles = [4, 16]
+// CHEKC-SAME: into %[[OUT]] : tensor<2x3x5x4x16xi32> -> tensor<5x48x8xi32>
+// CHECK: return %[[UNPACK]] : tensor<5x48x8xi32>
+// CHECK: }
+
+// -----
+
+#map = affine_map<(d0, d1, d2)->(d1, d2, d0)>
+#map1 = affine_map<(d0, d1, d2)->(d0, d1, d2)>
+func.func @unpack_generic_transpose_fold(%arg0: tensor<57x3x56x1x64xf32>) -> tensor<3648x3x56xf32> {
+ %0 = tensor.empty() : tensor<3x56x3648xf32>
+ %unpack = tensor.unpack %arg0
+ outer_dims_perm = [2, 0, 1]
+ inner_dims_pos = [1, 2]
+ inner_tiles = [1, 64]
+ into %0 : tensor<57x3x56x1x64xf32> -> tensor<3x56x3648xf32>
+
+ %1 = tensor.empty() : tensor<3648x3x56xf32>
+ %transposed = linalg.generic {
+ iterator_types = ["parallel", "parallel", "parallel"],
+ indexing_maps = [#map, #map1]}
+ ins(%unpack : tensor<3x56x3648xf32>)
+ outs(%1 : tensor<3648x3x56xf32>) {
+ ^bb0(%in : f32, %out : f32):
+ linalg.yield %in : f32
+ } -> tensor<3648x3x56xf32>
+ return %transposed : tensor<3648x3x56xf32>
+}
+// CHECK-LABEL: func.func @unpack_generic_transpose_fold(
+// CHECK-SAME: %[[ARG0:.+]]: tensor<57x3x56x1x64xf32>) -> tensor<3648x3x56xf32> {
+// CHECK: %[[OUT:.+]] = tensor.empty() : tensor<3648x3x56xf32>
+// CHECK: %[[UNPACK:.+]] = tensor.unpack %[[ARG0]]
+// CHECK-SAME: outer_dims_perm = [0, 1, 2]
+// CHECK-SAME: inner_dims_pos = [2, 0]
+// CHECK-SAME: inner_tiles = [1, 64]
+// CHECK-SAME: into %[[OUT:.+]] : tensor<57x3x56x1x64xf32> -> tensor<3648x3x56xf32>
+// CHECK: return %[[UNPACK]] : tensor<3648x3x56xf32>
+// CHECK: }
diff --git a/mlir/test/Dialect/Tensor/rewrite-as-constant.mlir b/mlir/test/Dialect/Tensor/rewrite-as-constant.mlir
index 1a1cf9e407d8..35ee6f1caf0d 100644
--- a/mlir/test/Dialect/Tensor/rewrite-as-constant.mlir
+++ b/mlir/test/Dialect/Tensor/rewrite-as-constant.mlir
@@ -21,3 +21,138 @@ func.func @tensor_generate_constant() -> tensor<2x3x5xf32> {
} : tensor<2x3x5xf32>
return %0 : tensor<2x3x5xf32>
}
+
+// CHECK-LABEL: func @pad_of_ints(
+// CHECK: %[[cst:.*]] = arith.constant dense<[
+// CHECK-SAME{LITERAL}: [0, 0, 0, 0],
+// CHECK-SAME{LITERAL}: [0, 6, 7, 0],
+// CHECK-SAME{LITERAL}: [0, 8, 9, 0],
+// CHECK-SAME{LITERAL}: [0, 0, 0, 0]
+// CHECK-SAME{LITERAL}: ]> : tensor<4x4xi32>
+// CHECK: %[[cast:.*]] = tensor.cast %[[cst]] : tensor<4x4xi32> to tensor<?x?xi32>
+// CHECK: return %[[cast]]
+func.func @pad_of_ints() -> tensor<?x?xi32> {
+ %init = arith.constant dense<[[6, 7], [8, 9]]> : tensor<2x2xi32>
+ %pad_value = arith.constant 0 : i32
+
+ %c1 = arith.constant 1 : index
+
+ %0 = tensor.pad %init low[%c1, %c1] high[%c1, %c1] {
+ ^bb0(%arg1: index, %arg2: index):
+ tensor.yield %pad_value : i32
+ } : tensor<2x2xi32> to tensor<?x?xi32>
+
+ return %0 : tensor<?x?xi32>
+}
+
+// CHECK-LABEL: func @pad_of_floats(
+// CHECK: %[[cst:.*]] = arith.constant dense<[
+// CHECK-SAME{LITERAL}: [0.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00],
+// CHECK-SAME{LITERAL}: [0.000000e+00, 6.000000e+00, 7.000000e+00, 0.000000e+00],
+// CHECK-SAME{LITERAL}: [0.000000e+00, 8.000000e+00, 9.000000e+00, 0.000000e+00],
+// CHECK-SAME{LITERAL}: [0.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00]
+// CHECK-SAME{LITERAL}: ]> : tensor<4x4xf32>
+// CHECK: return %[[cst]]
+
+func.func @pad_of_floats() -> tensor<4x4xf32> {
+ %init = arith.constant dense<[[6.0, 7.0], [8.0, 9.0]]> : tensor<2x2xf32>
+ %pad_value = arith.constant 0.0 : f32
+
+ %0 = tensor.pad %init low[1, 1] high[1, 1] {
+ ^bb0(%arg1: index, %arg2: index):
+ tensor.yield %pad_value : f32
+ } : tensor<2x2xf32> to tensor<4x4xf32>
+
+ return %0 : tensor<4x4xf32>
+}
+
+// CHECK-LABEL: func @pad_of_ints_no_low_dims(
+// CHECK: %[[cst:.*]] = arith.constant dense<[
+// CHECK-SAME{LITERAL}: [6, 7, 0],
+// CHECK-SAME{LITERAL}: [8, 9, 0],
+// CHECK-SAME{LITERAL}: [0, 0, 0]
+// CHECK-SAME{LITERAL}: ]> : tensor<3x3xi32>
+// CHECK: return %[[cst]]
+func.func @pad_of_ints_no_low_dims() -> tensor<3x3xi32> {
+ %init = arith.constant dense<[[6, 7], [8, 9]]> : tensor<2x2xi32>
+ %pad_value = arith.constant 0 : i32
+
+ %0 = tensor.pad %init low[0, 0] high[1, 1] {
+ ^bb0(%arg1: index, %arg2: index):
+ tensor.yield %pad_value : i32
+ } : tensor<2x2xi32> to tensor<3x3xi32>
+
+ return %0 : tensor<3x3xi32>
+}
+
+// CHECK-LABEL: func @pad_of_ints_no_high_dims(
+// CHECK: %[[cst:.*]] = arith.constant dense<[
+// CHECK-SAME{LITERAL}: [0, 0, 0],
+// CHECK-SAME{LITERAL}: [0, 6, 7],
+// CHECK-SAME{LITERAL}: [0, 8, 9]
+// CHECK-SAME{LITERAL}: ]> : tensor<3x3xi32>
+// CHECK: return %[[cst]]
+func.func @pad_of_ints_no_high_dims() -> tensor<3x3xi32> {
+ %init = arith.constant dense<[[6, 7], [8, 9]]> : tensor<2x2xi32>
+ %pad_value = arith.constant 0 : i32
+
+ %0 = tensor.pad %init low[1, 1] high[0, 0] {
+ ^bb0(%arg1: index, %arg2: index):
+ tensor.yield %pad_value : i32
+ } : tensor<2x2xi32> to tensor<3x3xi32>
+
+ return %0 : tensor<3x3xi32>
+}
+
+// CHECK-LABEL: func @pad_multi_use_do_not_fold(
+// CHECK: %[[pad:.+]] = tensor.pad
+// CHECK: return %[[pad]]
+func.func @pad_multi_use_do_not_fold() -> (tensor<?x?xi32>, tensor<2x2xi32>) {
+ %init = arith.constant dense<[[6, 7], [8, 9]]> : tensor<2x2xi32>
+ %pad_value = arith.constant 0 : i32
+
+ %c1 = arith.constant 1 : index
+
+ %0 = tensor.pad %init low[%c1, %c1] high[%c1, %c1] {
+ ^bb0(%arg1: index, %arg2: index):
+ tensor.yield %pad_value : i32
+ } : tensor<2x2xi32> to tensor<?x?xi32>
+
+ return %0, %init : tensor<?x?xi32>, tensor<2x2xi32>
+}
+
+// -----
+
+module attributes {transform.with_named_sequence} {
+ transform.named_sequence @__transform_main(%root : !transform.any_op {transform.readonly}) {
+ %func_op = transform.structured.match ops{["func.func"]} in %root : (!transform.any_op) -> !transform.op<"func.func">
+ transform.apply_patterns to %func_op {
+ transform.apply_patterns.tensor.rewrite_as_constant aggressive
+ } : !transform.op<"func.func">
+ transform.yield
+ }
+}
+
+// CHECK-LABEL: func @pad_aggressive_fold(
+// CHECK: %[[init:.*]] = arith.constant dense<7> : tensor<2x2xi32>
+// CHECK: %[[cst:.*]] = arith.constant dense<[
+// CHECK-SAME{LITERAL}: [0, 0, 0, 0],
+// CHECK-SAME{LITERAL}: [0, 7, 7, 0],
+// CHECK-SAME{LITERAL}: [0, 7, 7, 0],
+// CHECK-SAME{LITERAL}: [0, 0, 0, 0]
+// CHECK-SAME{LITERAL}: ]> : tensor<4x4xi32>
+// CHECK: %[[cast:.*]] = tensor.cast %[[cst]] : tensor<4x4xi32> to tensor<?x?xi32>
+// CHECK: return %[[cast]]
+func.func @pad_aggressive_fold() -> (tensor<?x?xi32>, tensor<2x2xi32>) {
+ %init = arith.constant dense<7> : tensor<2x2xi32>
+ %pad_value = arith.constant 0 : i32
+
+ %c1 = arith.constant 1 : index
+
+ %0 = tensor.pad %init low[%c1, %c1] high[%c1, %c1] {
+ ^bb0(%arg1: index, %arg2: index):
+ tensor.yield %pad_value : i32
+ } : tensor<2x2xi32> to tensor<?x?xi32>
+
+ return %0, %init : tensor<?x?xi32>, tensor<2x2xi32>
+}
diff --git a/mlir/test/IR/array-of-attr.mlir b/mlir/test/IR/array-of-attr.mlir
index 1b6fe5520595..c2a607596582 100644
--- a/mlir/test/IR/array-of-attr.mlir
+++ b/mlir/test/IR/array-of-attr.mlir
@@ -12,3 +12,7 @@ test.array_of_attr_op
// CHECK: test.array_of_attr_op
// CHECK-SAME: a = [], b = [], c = []
test.array_of_attr_op a = [], b = [], c = []
+
+// CHECK: "test.test_array_float"
+// CHECK-SAME: 1.000000e+00 : f32, 1.000000e+00, 0x7FF0000000000000 : f64
+"test.test_array_float"() {test.float_arr = [1.0 : f32, 1.0 : f64, 0x7FF0000000000000 : f64]} : () -> ()
diff --git a/offload/DeviceRTL/CMakeLists.txt b/offload/DeviceRTL/CMakeLists.txt
index b35600367276..d88430a52b8b 100644
--- a/offload/DeviceRTL/CMakeLists.txt
+++ b/offload/DeviceRTL/CMakeLists.txt
@@ -46,7 +46,7 @@ set(all_amdgpu_architectures "gfx700;gfx701;gfx801;gfx803;gfx900;gfx902;gfx906"
"gfx908;gfx90a;gfx90c;gfx940;gfx941;gfx942;gfx1010"
"gfx1030;gfx1031;gfx1032;gfx1033;gfx1034;gfx1035"
"gfx1036;gfx1100;gfx1101;gfx1102;gfx1103;gfx1150"
- "gfx1151")
+ "gfx1151;gfx1152")
set(all_nvptx_architectures "sm_35;sm_37;sm_50;sm_52;sm_53;sm_60;sm_61;sm_62"
"sm_70;sm_72;sm_75;sm_80;sm_86;sm_87;sm_89;sm_90")
set(all_gpu_architectures
diff --git a/offload/include/PluginManager.h b/offload/include/PluginManager.h
index 1d6804da75d9..fce2adca49c1 100644
--- a/offload/include/PluginManager.h
+++ b/offload/include/PluginManager.h
@@ -64,10 +64,6 @@ struct PluginManager {
std::make_unique<DeviceImageTy>(TgtBinDesc, TgtDeviceImage));
}
- /// Initialize as many devices as possible for this plugin. Devices that fail
- /// to initialize are ignored.
- void initDevices(GenericPluginTy &RTL);
-
/// Return the device presented to the user as device \p DeviceNo if it is
/// initialized and ready. Otherwise return an error explaining the problem.
llvm::Expected<DeviceTy &> getDevice(uint32_t DeviceNo);
@@ -117,20 +113,31 @@ struct PluginManager {
return Devices.getExclusiveAccessor();
}
- int getNumUsedPlugins() const { return DeviceOffsets.size(); }
-
// Initialize all plugins.
void initAllPlugins();
/// Iterator range for all plugins (in use or not, but always valid).
auto plugins() { return llvm::make_pointee_range(Plugins); }
+ /// Iterator range for all plugins (in use or not, but always valid).
+ auto plugins() const { return llvm::make_pointee_range(Plugins); }
+
/// Return the user provided requirements.
int64_t getRequirements() const { return Requirements.getRequirements(); }
/// Add \p Flags to the user provided requirements.
void addRequirements(int64_t Flags) { Requirements.addRequirements(Flags); }
+ /// Returns the number of plugins that are active.
+ int getNumActivePlugins() const {
+ int count = 0;
+ for (auto &R : plugins())
+ if (R.is_initialized())
+ ++count;
+
+ return count;
+ }
+
private:
bool RTLsLoaded = false;
llvm::SmallVector<__tgt_bin_desc *> DelayedBinDesc;
@@ -138,11 +145,9 @@ private:
// List of all plugins, in use or not.
llvm::SmallVector<std::unique_ptr<GenericPluginTy>> Plugins;
- // Mapping of plugins to offsets in the device table.
- llvm::DenseMap<const GenericPluginTy *, int32_t> DeviceOffsets;
-
- // Mapping of plugins to the number of used devices.
- llvm::DenseMap<const GenericPluginTy *, int32_t> DeviceUsed;
+ // Mapping of plugins to the OpenMP device identifier.
+ llvm::DenseMap<std::pair<const GenericPluginTy *, int32_t>, int32_t>
+ DeviceIds;
// Set of all device images currently in use.
llvm::DenseSet<const __tgt_device_image *> UsedImages;
diff --git a/offload/plugins-nextgen/amdgpu/src/rtl.cpp b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
index c6dd954746e4..663cfdc5fdf0 100644
--- a/offload/plugins-nextgen/amdgpu/src/rtl.cpp
+++ b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
@@ -3163,25 +3163,24 @@ struct AMDGPUPluginTy final : public GenericPluginTy {
uint16_t getMagicElfBits() const override { return ELF::EM_AMDGPU; }
/// Check whether the image is compatible with an AMDGPU device.
- Expected<bool> isELFCompatible(StringRef Image) const override {
+ Expected<bool> isELFCompatible(uint32_t DeviceId,
+ StringRef Image) const override {
// Get the associated architecture and flags from the ELF.
auto ElfOrErr = ELF64LEObjectFile::create(
MemoryBufferRef(Image, /*Identifier=*/""), /*InitContent=*/false);
if (!ElfOrErr)
return ElfOrErr.takeError();
std::optional<StringRef> Processor = ElfOrErr->tryGetCPUName();
+ if (!Processor)
+ return false;
- for (hsa_agent_t Agent : KernelAgents) {
- auto TargeTripleAndFeaturesOrError =
- utils::getTargetTripleAndFeatures(Agent);
- if (!TargeTripleAndFeaturesOrError)
- return TargeTripleAndFeaturesOrError.takeError();
- if (!utils::isImageCompatibleWithEnv(Processor ? *Processor : "",
+ auto TargeTripleAndFeaturesOrError =
+ utils::getTargetTripleAndFeatures(getKernelAgent(DeviceId));
+ if (!TargeTripleAndFeaturesOrError)
+ return TargeTripleAndFeaturesOrError.takeError();
+ return utils::isImageCompatibleWithEnv(Processor ? *Processor : "",
ElfOrErr->getPlatformFlags(),
- *TargeTripleAndFeaturesOrError))
- return false;
- }
- return true;
+ *TargeTripleAndFeaturesOrError);
}
bool isDataExchangable(int32_t SrcDeviceId, int32_t DstDeviceId) override {
@@ -3273,19 +3272,13 @@ Error AMDGPUKernelTy::launchImpl(GenericDeviceTy &GenericDevice,
if (ArgsSize < KernelArgsSize)
return Plugin::error("Mismatch of kernel arguments size");
- // The args size reported by HSA may or may not contain the implicit args.
- // For now, assume that HSA does not consider the implicit arguments when
- // reporting the arguments of a kernel. In the worst case, we can waste
- // 56 bytes per allocation.
- uint32_t AllArgsSize = KernelArgsSize + ImplicitArgsSize;
-
AMDGPUPluginTy &AMDGPUPlugin =
static_cast<AMDGPUPluginTy &>(GenericDevice.Plugin);
AMDHostDeviceTy &HostDevice = AMDGPUPlugin.getHostDevice();
AMDGPUMemoryManagerTy &ArgsMemoryManager = HostDevice.getArgsMemoryManager();
void *AllArgs = nullptr;
- if (auto Err = ArgsMemoryManager.allocate(AllArgsSize, &AllArgs))
+ if (auto Err = ArgsMemoryManager.allocate(ArgsSize, &AllArgs))
return Err;
// Account for user requested dynamic shared memory.
diff --git a/offload/plugins-nextgen/common/include/PluginInterface.h b/offload/plugins-nextgen/common/include/PluginInterface.h
index eda6a4fd541e..88423be039af 100644
--- a/offload/plugins-nextgen/common/include/PluginInterface.h
+++ b/offload/plugins-nextgen/common/include/PluginInterface.h
@@ -993,11 +993,11 @@ struct GenericPluginTy {
/// Get the number of active devices.
int32_t getNumDevices() const { return NumDevices; }
- /// Get the plugin-specific device identifier offset.
- int32_t getDeviceIdStartIndex() const { return DeviceIdStartIndex; }
-
- /// Set the plugin-specific device identifier offset.
- void setDeviceIdStartIndex(int32_t Offset) { DeviceIdStartIndex = Offset; }
+ /// Get the plugin-specific device identifier.
+ int32_t getUserId(int32_t DeviceId) const {
+ assert(UserDeviceIds.contains(DeviceId) && "No user-id registered");
+ return UserDeviceIds.at(DeviceId);
+ }
/// Get the ELF code to recognize the binary image of this plugin.
virtual uint16_t getMagicElfBits() const = 0;
@@ -1059,7 +1059,8 @@ struct GenericPluginTy {
/// Indicate if an image is compatible with the plugin devices. Notice that
/// this function may be called before actually initializing the devices. So
/// we could not move this function into GenericDeviceTy.
- virtual Expected<bool> isELFCompatible(StringRef Image) const = 0;
+ virtual Expected<bool> isELFCompatible(uint32_t DeviceID,
+ StringRef Image) const = 0;
protected:
/// Indicate whether a device id is valid.
@@ -1070,11 +1071,18 @@ protected:
public:
// TODO: This plugin interface needs to be cleaned up.
- /// Returns true if the plugin has been initialized.
+ /// Returns non-zero if the plugin runtime has been initialized.
int32_t is_initialized() const;
- /// Returns non-zero if the provided \p Image can be executed by the runtime.
- int32_t is_valid_binary(__tgt_device_image *Image, bool Initialized = true);
+ /// Returns non-zero if the \p Image is compatible with the plugin. This
+ /// function does not require the plugin to be initialized before use.
+ int32_t is_plugin_compatible(__tgt_device_image *Image);
+
+ /// Returns non-zero if the \p Image is compatible with the device.
+ int32_t is_device_compatible(int32_t DeviceId, __tgt_device_image *Image);
+
+ /// Returns non-zero if the plugin device has been initialized.
+ int32_t is_device_initialized(int32_t DeviceId) const;
/// Initialize the device inside of the plugin.
int32_t init_device(int32_t DeviceId);
@@ -1180,7 +1188,7 @@ public:
const char **ErrStr);
/// Sets the offset into the devices for use by OMPT.
- int32_t set_device_offset(int32_t DeviceIdOffset);
+ int32_t set_device_identifier(int32_t UserId, int32_t DeviceId);
/// Returns if the plugin can support auotmatic copy.
int32_t use_auto_zero_copy(int32_t DeviceId);
@@ -1200,10 +1208,8 @@ private:
/// Number of devices available for the plugin.
int32_t NumDevices = 0;
- /// Index offset, which when added to a DeviceId, will yield a unique
- /// user-observable device identifier. This is especially important when
- /// DeviceIds of multiple plugins / RTLs need to be distinguishable.
- int32_t DeviceIdStartIndex = 0;
+ /// Map of plugin device identifiers to the user device identifier.
+ llvm::DenseMap<int32_t, int32_t> UserDeviceIds;
/// Array of pointers to the devices. Initially, they are all set to nullptr.
/// Once a device is initialized, the pointer is stored in the position given
diff --git a/offload/plugins-nextgen/common/src/PluginInterface.cpp b/offload/plugins-nextgen/common/src/PluginInterface.cpp
index 913721a15d71..5a53c479e33d 100644
--- a/offload/plugins-nextgen/common/src/PluginInterface.cpp
+++ b/offload/plugins-nextgen/common/src/PluginInterface.cpp
@@ -748,8 +748,7 @@ Error GenericDeviceTy::init(GenericPluginTy &Plugin) {
if (ompt::Initialized) {
bool ExpectedStatus = false;
if (OmptInitialized.compare_exchange_strong(ExpectedStatus, true))
- performOmptCallback(device_initialize, /*device_num=*/DeviceId +
- Plugin.getDeviceIdStartIndex(),
+ performOmptCallback(device_initialize, Plugin.getUserId(DeviceId),
/*type=*/getComputeUnitKind().c_str(),
/*device=*/reinterpret_cast<ompt_device_t *>(this),
/*lookup=*/ompt::lookupCallbackByName,
@@ -847,9 +846,7 @@ Error GenericDeviceTy::deinit(GenericPluginTy &Plugin) {
if (ompt::Initialized) {
bool ExpectedStatus = true;
if (OmptInitialized.compare_exchange_strong(ExpectedStatus, false))
- performOmptCallback(device_finalize,
- /*device_num=*/DeviceId +
- Plugin.getDeviceIdStartIndex());
+ performOmptCallback(device_finalize, Plugin.getUserId(DeviceId));
}
#endif
@@ -908,7 +905,7 @@ GenericDeviceTy::loadBinary(GenericPluginTy &Plugin,
size_t Bytes =
getPtrDiff(InputTgtImage->ImageEnd, InputTgtImage->ImageStart);
performOmptCallback(
- device_load, /*device_num=*/DeviceId + Plugin.getDeviceIdStartIndex(),
+ device_load, Plugin.getUserId(DeviceId),
/*FileName=*/nullptr, /*FileOffset=*/0, /*VmaInFile=*/nullptr,
/*ImgSize=*/Bytes, /*HostAddr=*/InputTgtImage->ImageStart,
/*DeviceAddr=*/nullptr, /* FIXME: ModuleId */ 0);
@@ -1492,11 +1489,14 @@ Error GenericDeviceTy::syncEvent(void *EventPtr) {
bool GenericDeviceTy::useAutoZeroCopy() { return useAutoZeroCopyImpl(); }
Error GenericPluginTy::init() {
+ if (Initialized)
+ return Plugin::success();
+
auto NumDevicesOrErr = initImpl();
if (!NumDevicesOrErr)
return NumDevicesOrErr.takeError();
-
Initialized = true;
+
NumDevices = *NumDevicesOrErr;
if (NumDevices == 0)
return Plugin::success();
@@ -1517,6 +1517,8 @@ Error GenericPluginTy::init() {
}
Error GenericPluginTy::deinit() {
+ assert(Initialized && "Plugin was not initialized!");
+
// Deinitialize all active devices.
for (int32_t DeviceId = 0; DeviceId < NumDevices; ++DeviceId) {
if (Devices[DeviceId]) {
@@ -1537,7 +1539,11 @@ Error GenericPluginTy::deinit() {
delete RecordReplay;
// Perform last deinitializations on the plugin.
- return deinitImpl();
+ if (Error Err = deinitImpl())
+ return Err;
+ Initialized = false;
+
+ return Plugin::success();
}
Error GenericPluginTy::initDevice(int32_t DeviceId) {
@@ -1599,8 +1605,7 @@ Expected<bool> GenericPluginTy::checkBitcodeImage(StringRef Image) const {
int32_t GenericPluginTy::is_initialized() const { return Initialized; }
-int32_t GenericPluginTy::is_valid_binary(__tgt_device_image *Image,
- bool Initialized) {
+int32_t GenericPluginTy::is_plugin_compatible(__tgt_device_image *Image) {
StringRef Buffer(reinterpret_cast<const char *>(Image->ImageStart),
target::getPtrDiff(Image->ImageEnd, Image->ImageStart));
@@ -1618,11 +1623,43 @@ int32_t GenericPluginTy::is_valid_binary(__tgt_device_image *Image,
auto MatchOrErr = checkELFImage(Buffer);
if (Error Err = MatchOrErr.takeError())
return HandleError(std::move(Err));
- if (!Initialized || !*MatchOrErr)
- return *MatchOrErr;
+ return *MatchOrErr;
+ }
+ case file_magic::bitcode: {
+ auto MatchOrErr = checkBitcodeImage(Buffer);
+ if (Error Err = MatchOrErr.takeError())
+ return HandleError(std::move(Err));
+ return *MatchOrErr;
+ }
+ default:
+ return false;
+ }
+}
+
+int32_t GenericPluginTy::is_device_compatible(int32_t DeviceId,
+ __tgt_device_image *Image) {
+ StringRef Buffer(reinterpret_cast<const char *>(Image->ImageStart),
+ target::getPtrDiff(Image->ImageEnd, Image->ImageStart));
+
+ auto HandleError = [&](Error Err) -> bool {
+ [[maybe_unused]] std::string ErrStr = toString(std::move(Err));
+ DP("Failure to check validity of image %p: %s", Image, ErrStr.c_str());
+ return false;
+ };
+ switch (identify_magic(Buffer)) {
+ case file_magic::elf:
+ case file_magic::elf_relocatable:
+ case file_magic::elf_executable:
+ case file_magic::elf_shared_object:
+ case file_magic::elf_core: {
+ auto MatchOrErr = checkELFImage(Buffer);
+ if (Error Err = MatchOrErr.takeError())
+ return HandleError(std::move(Err));
+ if (!*MatchOrErr)
+ return false;
// Perform plugin-dependent checks for the specific architecture if needed.
- auto CompatibleOrErr = isELFCompatible(Buffer);
+ auto CompatibleOrErr = isELFCompatible(DeviceId, Buffer);
if (Error Err = CompatibleOrErr.takeError())
return HandleError(std::move(Err));
return *CompatibleOrErr;
@@ -1638,6 +1675,10 @@ int32_t GenericPluginTy::is_valid_binary(__tgt_device_image *Image,
}
}
+int32_t GenericPluginTy::is_device_initialized(int32_t DeviceId) const {
+ return isValidDeviceId(DeviceId) && Devices[DeviceId] != nullptr;
+}
+
int32_t GenericPluginTy::init_device(int32_t DeviceId) {
auto Err = initDevice(DeviceId);
if (Err) {
@@ -1985,8 +2026,9 @@ int32_t GenericPluginTy::init_device_info(int32_t DeviceId,
return OFFLOAD_SUCCESS;
}
-int32_t GenericPluginTy::set_device_offset(int32_t DeviceIdOffset) {
- setDeviceIdStartIndex(DeviceIdOffset);
+int32_t GenericPluginTy::set_device_identifier(int32_t UserId,
+ int32_t DeviceId) {
+ UserDeviceIds[DeviceId] = UserId;
return OFFLOAD_SUCCESS;
}
diff --git a/offload/plugins-nextgen/cuda/src/rtl.cpp b/offload/plugins-nextgen/cuda/src/rtl.cpp
index b260334baa18..62460c07415b 100644
--- a/offload/plugins-nextgen/cuda/src/rtl.cpp
+++ b/offload/plugins-nextgen/cuda/src/rtl.cpp
@@ -1388,8 +1388,9 @@ struct CUDAPluginTy final : public GenericPluginTy {
const char *getName() const override { return GETNAME(TARGET_NAME); }
- /// Check whether the image is compatible with the available CUDA devices.
- Expected<bool> isELFCompatible(StringRef Image) const override {
+ /// Check whether the image is compatible with a CUDA device.
+ Expected<bool> isELFCompatible(uint32_t DeviceId,
+ StringRef Image) const override {
auto ElfOrErr =
ELF64LEObjectFile::create(MemoryBufferRef(Image, /*Identifier=*/""),
/*InitContent=*/false);
@@ -1399,33 +1400,29 @@ struct CUDAPluginTy final : public GenericPluginTy {
// Get the numeric value for the image's `sm_` value.
auto SM = ElfOrErr->getPlatformFlags() & ELF::EF_CUDA_SM;
- for (int32_t DevId = 0; DevId < getNumDevices(); ++DevId) {
- CUdevice Device;
- CUresult Res = cuDeviceGet(&Device, DevId);
- if (auto Err = Plugin::check(Res, "Error in cuDeviceGet: %s"))
- return std::move(Err);
-
- int32_t Major, Minor;
- Res = cuDeviceGetAttribute(
- &Major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, Device);
- if (auto Err = Plugin::check(Res, "Error in cuDeviceGetAttribute: %s"))
- return std::move(Err);
-
- Res = cuDeviceGetAttribute(
- &Minor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, Device);
- if (auto Err = Plugin::check(Res, "Error in cuDeviceGetAttribute: %s"))
- return std::move(Err);
-
- int32_t ImageMajor = SM / 10;
- int32_t ImageMinor = SM % 10;
-
- // A cubin generated for a certain compute capability is supported to
- // run on any GPU with the same major revision and same or higher minor
- // revision.
- if (Major != ImageMajor || Minor < ImageMinor)
- return false;
- }
- return true;
+ CUdevice Device;
+ CUresult Res = cuDeviceGet(&Device, DeviceId);
+ if (auto Err = Plugin::check(Res, "Error in cuDeviceGet: %s"))
+ return std::move(Err);
+
+ int32_t Major, Minor;
+ Res = cuDeviceGetAttribute(
+ &Major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, Device);
+ if (auto Err = Plugin::check(Res, "Error in cuDeviceGetAttribute: %s"))
+ return std::move(Err);
+
+ Res = cuDeviceGetAttribute(
+ &Minor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, Device);
+ if (auto Err = Plugin::check(Res, "Error in cuDeviceGetAttribute: %s"))
+ return std::move(Err);
+
+ int32_t ImageMajor = SM / 10;
+ int32_t ImageMinor = SM % 10;
+
+ // A cubin generated for a certain compute capability is supported to
+ // run on any GPU with the same major revision and same or higher minor
+ // revision.
+ return Major == ImageMajor && Minor >= ImageMinor;
}
};
diff --git a/offload/plugins-nextgen/host/src/rtl.cpp b/offload/plugins-nextgen/host/src/rtl.cpp
index ef84cbaf5458..aa59ea618e39 100644
--- a/offload/plugins-nextgen/host/src/rtl.cpp
+++ b/offload/plugins-nextgen/host/src/rtl.cpp
@@ -418,7 +418,9 @@ struct GenELF64PluginTy final : public GenericPluginTy {
}
/// All images (ELF-compatible) should be compatible with this plugin.
- Expected<bool> isELFCompatible(StringRef) const override { return true; }
+ Expected<bool> isELFCompatible(uint32_t, StringRef) const override {
+ return true;
+ }
Triple::ArchType getTripleArch() const override {
#if defined(__x86_64__)
diff --git a/offload/src/PluginManager.cpp b/offload/src/PluginManager.cpp
index 13f08b142b87..5e8f91792a55 100644
--- a/offload/src/PluginManager.cpp
+++ b/offload/src/PluginManager.cpp
@@ -47,6 +47,9 @@ void PluginManager::deinit() {
DP("Unloading RTLs...\n");
for (auto &Plugin : Plugins) {
+ if (!Plugin->is_initialized())
+ continue;
+
if (auto Err = Plugin->deinit()) {
[[maybe_unused]] std::string InfoMsg = toString(std::move(Err));
DP("Failed to deinit plugin: %s\n", InfoMsg.c_str());
@@ -57,90 +60,15 @@ void PluginManager::deinit() {
DP("RTLs unloaded!\n");
}
-void PluginManager::initDevices(GenericPluginTy &RTL) {
- // If this RTL has already been initialized.
- if (PM->DeviceOffsets.contains(&RTL))
- return;
- TIMESCOPE();
-
- // If this RTL is not already in use, initialize it.
- assert(RTL.number_of_devices() > 0 && "Tried to initialize useless plugin!");
-
- // Initialize the device information for the RTL we are about to use.
- auto ExclusiveDevicesAccessor = getExclusiveDevicesAccessor();
-
- // Initialize the index of this RTL and save it in the used RTLs.
- int32_t DeviceOffset = ExclusiveDevicesAccessor->size();
-
- // Set the device identifier offset in the plugin.
- RTL.set_device_offset(DeviceOffset);
-
- int32_t NumberOfUserDevices = 0;
- int32_t NumPD = RTL.number_of_devices();
- ExclusiveDevicesAccessor->reserve(DeviceOffset + NumPD);
- // Auto zero-copy is a per-device property. We need to ensure
- // that all devices are suggesting to use it.
- bool UseAutoZeroCopy = !(NumPD == 0);
- for (int32_t PDevI = 0, UserDevId = DeviceOffset; PDevI < NumPD; PDevI++) {
- auto Device = std::make_unique<DeviceTy>(&RTL, UserDevId, PDevI);
- if (auto Err = Device->init()) {
- DP("Skip plugin known device %d: %s\n", PDevI,
- toString(std::move(Err)).c_str());
- continue;
- }
- UseAutoZeroCopy = UseAutoZeroCopy && Device->useAutoZeroCopy();
-
- ExclusiveDevicesAccessor->push_back(std::move(Device));
- ++NumberOfUserDevices;
- ++UserDevId;
- }
-
- // Auto Zero-Copy can only be currently triggered when the system is an
- // homogeneous APU architecture without attached discrete GPUs.
- // If all devices suggest to use it, change requirment flags to trigger
- // zero-copy behavior when mapping memory.
- if (UseAutoZeroCopy)
- addRequirements(OMPX_REQ_AUTO_ZERO_COPY);
-
- DeviceOffsets[&RTL] = DeviceOffset;
- DeviceUsed[&RTL] = NumberOfUserDevices;
- DP("Plugin has index %d, exposes %d out of %d devices!\n", DeviceOffset,
- NumberOfUserDevices, RTL.number_of_devices());
-}
-
void PluginManager::initAllPlugins() {
- for (auto &R : Plugins)
- initDevices(*R);
-}
-
-static void registerImageIntoTranslationTable(TranslationTable &TT,
- int32_t DeviceOffset,
- int32_t NumberOfUserDevices,
- __tgt_device_image *Image) {
-
- // same size, as when we increase one, we also increase the other.
- assert(TT.TargetsTable.size() == TT.TargetsImages.size() &&
- "We should have as many images as we have tables!");
-
- // Resize the Targets Table and Images to accommodate the new targets if
- // required
- unsigned TargetsTableMinimumSize = DeviceOffset + NumberOfUserDevices;
-
- if (TT.TargetsTable.size() < TargetsTableMinimumSize) {
- TT.DeviceTables.resize(TargetsTableMinimumSize, {});
- TT.TargetsImages.resize(TargetsTableMinimumSize, 0);
- TT.TargetsEntries.resize(TargetsTableMinimumSize, {});
- TT.TargetsTable.resize(TargetsTableMinimumSize, 0);
- }
-
- // Register the image in all devices for this target type.
- for (int32_t I = 0; I < NumberOfUserDevices; ++I) {
- // If we are changing the image we are also invalidating the target table.
- if (TT.TargetsImages[DeviceOffset + I] != Image) {
- TT.TargetsImages[DeviceOffset + I] = Image;
- TT.TargetsTable[DeviceOffset + I] =
- 0; // lazy initialization of target table.
+ for (auto &R : plugins()) {
+ if (auto Err = R.init()) {
+ [[maybe_unused]] std::string InfoMsg = toString(std::move(Err));
+ DP("Failed to init plugin: %s\n", InfoMsg.c_str());
+ continue;
}
+ DP("Registered plugin %s with %d visible device(s)\n", R.getName(),
+ R.number_of_devices());
}
}
@@ -153,27 +81,6 @@ void PluginManager::registerLib(__tgt_bin_desc *Desc) {
if (Entry.flags == OMP_REGISTER_REQUIRES)
PM->addRequirements(Entry.data);
- // Initialize all the plugins that have associated images.
- for (auto &Plugin : Plugins) {
- // Extract the exectuable image and extra information if availible.
- for (int32_t i = 0; i < Desc->NumDeviceImages; ++i) {
- if (Plugin->is_initialized())
- continue;
-
- if (!Plugin->is_valid_binary(&Desc->DeviceImages[i],
- /*Initialized=*/false))
- continue;
-
- if (auto Err = Plugin->init()) {
- [[maybe_unused]] std::string InfoMsg = toString(std::move(Err));
- DP("Failed to init plugin: %s\n", InfoMsg.c_str());
- } else {
- DP("Registered plugin %s with %d visible device(s)\n",
- Plugin->getName(), Plugin->number_of_devices());
- }
- }
- }
-
// Extract the exectuable image and extra information if availible.
for (int32_t i = 0; i < Desc->NumDeviceImages; ++i)
PM->addDeviceImage(*Desc, Desc->DeviceImages[i]);
@@ -188,54 +95,110 @@ void PluginManager::registerLib(__tgt_bin_desc *Desc) {
// Scan the RTLs that have associated images until we find one that supports
// the current image.
for (auto &R : PM->plugins()) {
- if (!R.number_of_devices())
+ if (!R.is_plugin_compatible(Img))
continue;
- if (!R.is_valid_binary(Img, /*Initialized=*/true)) {
- DP("Image " DPxMOD " is NOT compatible with RTL %s!\n",
- DPxPTR(Img->ImageStart), R.getName());
- continue;
+ if (!R.is_initialized()) {
+ if (auto Err = R.init()) {
+ [[maybe_unused]] std::string InfoMsg = toString(std::move(Err));
+ DP("Failed to init plugin: %s\n", InfoMsg.c_str());
+ continue;
+ }
+ DP("Registered plugin %s with %d visible device(s)\n", R.getName(),
+ R.number_of_devices());
}
- DP("Image " DPxMOD " is compatible with RTL %s!\n",
- DPxPTR(Img->ImageStart), R.getName());
-
- PM->initDevices(R);
+ if (!R.number_of_devices()) {
+ DP("Skipping plugin %s with no visible devices\n", R.getName());
+ continue;
+ }
- // Initialize (if necessary) translation table for this library.
- PM->TrlTblMtx.lock();
- if (!PM->HostEntriesBeginToTransTable.count(Desc->HostEntriesBegin)) {
- PM->HostEntriesBeginRegistrationOrder.push_back(Desc->HostEntriesBegin);
- TranslationTable &TransTable =
+ for (int32_t DeviceId = 0; DeviceId < R.number_of_devices(); ++DeviceId) {
+ if (!R.is_device_compatible(DeviceId, Img))
+ continue;
+
+ DP("Image " DPxMOD " is compatible with RTL %s device %d!\n",
+ DPxPTR(Img->ImageStart), R.getName(), DeviceId);
+
+ if (!R.is_device_initialized(DeviceId)) {
+ // Initialize the device information for the RTL we are about to use.
+ auto ExclusiveDevicesAccessor = getExclusiveDevicesAccessor();
+
+ int32_t UserId = ExclusiveDevicesAccessor->size();
+
+ // Set the device identifier offset in the plugin.
+#ifdef OMPT_SUPPORT
+ R.set_device_identifier(UserId, DeviceId);
+#endif
+
+ auto Device = std::make_unique<DeviceTy>(&R, UserId, DeviceId);
+ if (auto Err = Device->init()) {
+ [[maybe_unused]] std::string InfoMsg = toString(std::move(Err));
+ DP("Failed to init device %d: %s\n", DeviceId, InfoMsg.c_str());
+ continue;
+ }
+
+ ExclusiveDevicesAccessor->push_back(std::move(Device));
+
+ // We need to map between the plugin's device identifier and the one
+ // that OpenMP will use.
+ PM->DeviceIds[std::make_pair(&R, DeviceId)] = UserId;
+ }
+
+ // Initialize (if necessary) translation table for this library.
+ PM->TrlTblMtx.lock();
+ if (!PM->HostEntriesBeginToTransTable.count(Desc->HostEntriesBegin)) {
+ PM->HostEntriesBeginRegistrationOrder.push_back(
+ Desc->HostEntriesBegin);
+ TranslationTable &TT =
+ (PM->HostEntriesBeginToTransTable)[Desc->HostEntriesBegin];
+ TT.HostTable.EntriesBegin = Desc->HostEntriesBegin;
+ TT.HostTable.EntriesEnd = Desc->HostEntriesEnd;
+ }
+
+ // Retrieve translation table for this library.
+ TranslationTable &TT =
(PM->HostEntriesBeginToTransTable)[Desc->HostEntriesBegin];
- TransTable.HostTable.EntriesBegin = Desc->HostEntriesBegin;
- TransTable.HostTable.EntriesEnd = Desc->HostEntriesEnd;
- }
- // Retrieve translation table for this library.
- TranslationTable &TransTable =
- (PM->HostEntriesBeginToTransTable)[Desc->HostEntriesBegin];
+ DP("Registering image " DPxMOD " with RTL %s!\n",
+ DPxPTR(Img->ImageStart), R.getName());
- DP("Registering image " DPxMOD " with RTL %s!\n", DPxPTR(Img->ImageStart),
- R.getName());
+ auto UserId = PM->DeviceIds[std::make_pair(&R, DeviceId)];
+ if (TT.TargetsTable.size() < static_cast<size_t>(UserId + 1)) {
+ TT.DeviceTables.resize(UserId + 1, {});
+ TT.TargetsImages.resize(UserId + 1, nullptr);
+ TT.TargetsEntries.resize(UserId + 1, {});
+ TT.TargetsTable.resize(UserId + 1, nullptr);
+ }
- registerImageIntoTranslationTable(TransTable, PM->DeviceOffsets[&R],
- PM->DeviceUsed[&R], Img);
- PM->UsedImages.insert(Img);
+ // Register the image for this target type and invalidate the table.
+ TT.TargetsImages[UserId] = Img;
+ TT.TargetsTable[UserId] = nullptr;
- PM->TrlTblMtx.unlock();
- FoundRTL = &R;
+ PM->UsedImages.insert(Img);
+ FoundRTL = &R;
- // if an RTL was found we are done - proceed to register the next image
- break;
+ PM->TrlTblMtx.unlock();
+ }
}
-
- if (!FoundRTL) {
+ if (!FoundRTL)
DP("No RTL found for image " DPxMOD "!\n", DPxPTR(Img->ImageStart));
- }
}
PM->RTLsMtx.unlock();
+ bool UseAutoZeroCopy = Plugins.size() > 0;
+
+ auto ExclusiveDevicesAccessor = getExclusiveDevicesAccessor();
+ for (const auto &Device : *ExclusiveDevicesAccessor)
+ UseAutoZeroCopy &= Device->useAutoZeroCopy();
+
+ // Auto Zero-Copy can only be currently triggered when the system is an
+ // homogeneous APU architecture without attached discrete GPUs.
+ // If all devices suggest to use it, change requirment flags to trigger
+ // zero-copy behavior when mapping memory.
+ if (UseAutoZeroCopy)
+ addRequirements(OMPX_REQ_AUTO_ZERO_COPY);
+
DP("Done registering entries!\n");
}
@@ -257,7 +220,7 @@ void PluginManager::unregisterLib(__tgt_bin_desc *Desc) {
// Scan the RTLs that have associated images until we find one that supports
// the current image. We only need to scan RTLs that are already being used.
for (auto &R : PM->plugins()) {
- if (!DeviceOffsets.contains(&R))
+ if (R.is_initialized())
continue;
// Ensure that we do not use any unused images associated with this RTL.
diff --git a/offload/src/omptarget.cpp b/offload/src/omptarget.cpp
index 91e1213f175e..9bca8529c5ee 100644
--- a/offload/src/omptarget.cpp
+++ b/offload/src/omptarget.cpp
@@ -315,7 +315,7 @@ void handleTargetOutcome(bool Success, ident_t *Loc) {
FAILURE_MESSAGE("Consult https://openmp.llvm.org/design/Runtimes.html "
"for debugging options.\n");
- if (!PM->getNumUsedPlugins()) {
+ if (!PM->getNumActivePlugins()) {
FAILURE_MESSAGE(
"No images found compatible with the installed hardware. ");
diff --git a/offload/test/offloading/ompx_bare_shfl_down_sync.cpp b/offload/test/offloading/ompx_bare_shfl_down_sync.cpp
index d2569a5b0266..c9246894b089 100644
--- a/offload/test/offloading/ompx_bare_shfl_down_sync.cpp
+++ b/offload/test/offloading/ompx_bare_shfl_down_sync.cpp
@@ -23,7 +23,7 @@ bool equal(T LHS, T RHS) {
template <typename T,
std::enable_if_t<std::is_floating_point<T>::value, bool> = true>
bool equal(T LHS, T RHS) {
- return std::abs(LHS - RHS) < std::numeric_limits<T>::epsilon();
+ return __builtin_fabs(LHS - RHS) < std::numeric_limits<T>::epsilon();
}
template <typename T> void test() {
diff --git a/openmp/CMakeLists.txt b/openmp/CMakeLists.txt
index c228a392e4c7..e565354ccc7f 100644
--- a/openmp/CMakeLists.txt
+++ b/openmp/CMakeLists.txt
@@ -137,8 +137,10 @@ if (OPENMP_ENABLE_OMPT_TOOLS)
endif()
# Propagate OMPT support to offload
-set(LIBOMP_HAVE_OMPT_SUPPORT ${LIBOMP_HAVE_OMPT_SUPPORT} PARENT_SCOPE)
-set(LIBOMP_OMP_TOOLS_INCLUDE_DIR ${LIBOMP_OMP_TOOLS_INCLUDE_DIR} PARENT_SCOPE)
+if(NOT ${OPENMP_STANDALONE_BUILD})
+ set(LIBOMP_HAVE_OMPT_SUPPORT ${LIBOMP_HAVE_OMPT_SUPPORT} PARENT_SCOPE)
+ set(LIBOMP_OMP_TOOLS_INCLUDE_DIR ${LIBOMP_OMP_TOOLS_INCLUDE_DIR} PARENT_SCOPE)
+endif()
option(OPENMP_MSVC_NAME_SCHEME "Build dll with MSVC naming scheme." OFF)
diff --git a/third-party/unittest/googletest/include/gtest/internal/gtest-port.h b/third-party/unittest/googletest/include/gtest/internal/gtest-port.h
index a17349e40150..02e1eb0a914f 100644
--- a/third-party/unittest/googletest/include/gtest/internal/gtest-port.h
+++ b/third-party/unittest/googletest/include/gtest/internal/gtest-port.h
@@ -652,7 +652,7 @@ typedef struct _RTL_CRITICAL_SECTION GTEST_CRITICAL_SECTION;
// Determines whether to support death tests.
// pops up a dialog window that cannot be suppressed programmatically.
#if (defined(GTEST_OS_LINUX) || defined(GTEST_OS_CYGWIN) || \
- defined(GTEST_OS_SOLARIS) || \
+ defined(GTEST_OS_SOLARIS) || defined(GTEST_OS_ZOS) || \
(defined(GTEST_OS_MAC) && !defined(GTEST_OS_IOS)) || \
(defined(GTEST_OS_WINDOWS_DESKTOP) && _MSC_VER) || \
defined(GTEST_OS_WINDOWS_MINGW) || defined(GTEST_OS_AIX) || \
diff --git a/utils/bazel/llvm-project-overlay/libc/test/src/__support/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/test/src/__support/BUILD.bazel
index 3980ef60c197..c8001fe1e581 100644
--- a/utils/bazel/llvm-project-overlay/libc/test/src/__support/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/libc/test/src/__support/BUILD.bazel
@@ -99,6 +99,7 @@ libc_test(
name = "fixedvector_test",
srcs = ["fixedvector_test.cpp"],
deps = [
+ "//libc:__support_cpp_array",
"//libc:__support_fixedvector",
],
)
diff --git a/utils/bazel/llvm-project-overlay/libc/test/src/math/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/test/src/math/BUILD.bazel
index 4f72a0a8e186..fac692addb9e 100644
--- a/utils/bazel/llvm-project-overlay/libc/test/src/math/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/libc/test/src/math/BUILD.bazel
@@ -529,7 +529,9 @@ libc_support_library(
"//libc:__support_cpp_bit",
"//libc:__support_cpp_type_traits",
"//libc:__support_fputil_basic_operations",
+ "//libc:__support_fputil_fenv_impl",
"//libc:__support_fputil_fp_bits",
+ "//libc:hdr_fenv_macros",
"//libc:hdr_math_macros",
"//libc/test/UnitTest:LibcUnitTest",
"//libc/test/UnitTest:fp_test_helpers",
diff --git a/utils/bazel/llvm-project-overlay/libc/test/src/math/smoke/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/test/src/math/smoke/BUILD.bazel
index eccea8faeebc..2ad2209925ce 100644
--- a/utils/bazel/llvm-project-overlay/libc/test/src/math/smoke/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/libc/test/src/math/smoke/BUILD.bazel
@@ -136,7 +136,9 @@ libc_support_library(
"//libc:__support_cpp_bit",
"//libc:__support_cpp_type_traits",
"//libc:__support_fputil_basic_operations",
+ "//libc:__support_fputil_fenv_impl",
"//libc:__support_fputil_fp_bits",
+ "//libc:hdr_fenv_macros",
"//libc:hdr_math_macros",
"//libc/test/UnitTest:LibcUnitTest",
"//libc/test/UnitTest:fp_test_helpers",
diff --git a/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel b/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel
index 2ffbc783ec97..aebb05d827f7 100644
--- a/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel
@@ -164,6 +164,10 @@ td_library(
includes = ["include"],
)
+llvm_config_target_defines = [
+ "LLVM_HAS_{}_TARGET=1".format(t) for t in llvm_targets
+]
+
cc_library(
name = "config",
hdrs = [
@@ -171,7 +175,7 @@ cc_library(
"include/llvm/Config/llvm-config.h",
],
copts = llvm_copts,
- defines = llvm_config_defines,
+ defines = llvm_config_defines + llvm_config_target_defines,
includes = ["include"],
textual_hdrs = [
"include/llvm/Config/AsmParsers.def",
@@ -1755,6 +1759,7 @@ cc_library(
":TransformUtils",
":Vectorize",
":config",
+ ":ir_headers",
],
)