[RISCV] Add cost model tests for llvm.vector.{insert,extract}. NFC

For llvm.vector.extract, this tests combinations of inserting at a zero and non-zero index, and extracting from a fixed or scalable vector. For llvm.vector.insert, this tests the same combinations as extracts but with an additional configuration for an undef vector. This is because we can use a subregister insert if the index is 0 and the vector is undef, which should be free.
author: Luke Lau <luke@igalia.com> 2024-02-15 11:48:38 +0800
committer: Luke Lau <luke@igalia.com> 2024-02-15 12:17:27 +0800
commit: fc0b67e1d79d1f199687f8f06d619984d9520230 (patch)
tree: 49265d916ec694df7f43ad2423868a5397b1f7df
parent: fa9e297b8b63dacb962d99814e698658ad71f946 (diff)
2 files changed, 504 insertions, 0 deletions
diff --git a/llvm/test/Analysis/CostModel/RISCV/rvv-vectorextract.ll b/llvm/test/Analysis/CostModel/RISCV/rvv-vectorextract.ll
new file mode 100644
index 000000000000..1e2d1f4d9495
--- /dev/null
+++ b/llvm/test/Analysis/CostModel/RISCV/rvv-vectorextract.ll
@@ -0,0 +1,169 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 4
+; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=riscv64 -mattr=+v < %s | FileCheck %s
+; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=riscv64 -mattr=+v -cost-kind=code-size < %s | FileCheck %s --check-prefix=SIZE
+
+define void @vector_extract_nxv128i8_0(<vscale x 128 x i8> %v) {
+; CHECK-LABEL: 'vector_extract_nxv128i8_0'
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %scalable_mf8 = call <vscale x 1 x i8> @llvm.vector.extract.nxv1i8.nxv128i8(<vscale x 128 x i8> %v, i64 0)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %scalable_mf4 = call <vscale x 2 x i8> @llvm.vector.extract.nxv2i8.nxv128i8(<vscale x 128 x i8> %v, i64 0)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %scalable_mf2 = call <vscale x 4 x i8> @llvm.vector.extract.nxv4i8.nxv128i8(<vscale x 128 x i8> %v, i64 0)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %scalable_m1 = call <vscale x 8 x i8> @llvm.vector.extract.nxv8i8.nxv128i8(<vscale x 128 x i8> %v, i64 0)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %scalable_m2 = call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv128i8(<vscale x 128 x i8> %v, i64 0)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %scalable_m4 = call <vscale x 32 x i8> @llvm.vector.extract.nxv32i8.nxv128i8(<vscale x 128 x i8> %v, i64 0)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %scalable_m8 = call <vscale x 64 x i8> @llvm.vector.extract.nxv64i8.nxv128i8(<vscale x 128 x i8> %v, i64 0)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %fixed_mf8 = call <2 x i8> @llvm.vector.extract.v2i8.nxv128i8(<vscale x 128 x i8> %v, i64 0)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %fixed_mf4 = call <4 x i8> @llvm.vector.extract.v4i8.nxv128i8(<vscale x 128 x i8> %v, i64 0)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %fixed_mf2 = call <8 x i8> @llvm.vector.extract.v8i8.nxv128i8(<vscale x 128 x i8> %v, i64 0)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %fixed_m1 = call <16 x i8> @llvm.vector.extract.v16i8.nxv128i8(<vscale x 128 x i8> %v, i64 0)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %fixed_m2 = call <32 x i8> @llvm.vector.extract.v32i8.nxv128i8(<vscale x 128 x i8> %v, i64 0)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %fixed_m4 = call <64 x i8> @llvm.vector.extract.v64i8.nxv128i8(<vscale x 128 x i8> %v, i64 0)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %fixed_m8 = call <128 x i8> @llvm.vector.extract.v128i8.nxv128i8(<vscale x 128 x i8> %v, i64 0)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; SIZE-LABEL: 'vector_extract_nxv128i8_0'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %scalable_mf8 = call <vscale x 1 x i8> @llvm.vector.extract.nxv1i8.nxv128i8(<vscale x 128 x i8> %v, i64 0)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %scalable_mf4 = call <vscale x 2 x i8> @llvm.vector.extract.nxv2i8.nxv128i8(<vscale x 128 x i8> %v, i64 0)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %scalable_mf2 = call <vscale x 4 x i8> @llvm.vector.extract.nxv4i8.nxv128i8(<vscale x 128 x i8> %v, i64 0)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %scalable_m1 = call <vscale x 8 x i8> @llvm.vector.extract.nxv8i8.nxv128i8(<vscale x 128 x i8> %v, i64 0)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %scalable_m2 = call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv128i8(<vscale x 128 x i8> %v, i64 0)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %scalable_m4 = call <vscale x 32 x i8> @llvm.vector.extract.nxv32i8.nxv128i8(<vscale x 128 x i8> %v, i64 0)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %scalable_m8 = call <vscale x 64 x i8> @llvm.vector.extract.nxv64i8.nxv128i8(<vscale x 128 x i8> %v, i64 0)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %fixed_mf8 = call <2 x i8> @llvm.vector.extract.v2i8.nxv128i8(<vscale x 128 x i8> %v, i64 0)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %fixed_mf4 = call <4 x i8> @llvm.vector.extract.v4i8.nxv128i8(<vscale x 128 x i8> %v, i64 0)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %fixed_mf2 = call <8 x i8> @llvm.vector.extract.v8i8.nxv128i8(<vscale x 128 x i8> %v, i64 0)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %fixed_m1 = call <16 x i8> @llvm.vector.extract.v16i8.nxv128i8(<vscale x 128 x i8> %v, i64 0)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %fixed_m2 = call <32 x i8> @llvm.vector.extract.v32i8.nxv128i8(<vscale x 128 x i8> %v, i64 0)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %fixed_m4 = call <64 x i8> @llvm.vector.extract.v64i8.nxv128i8(<vscale x 128 x i8> %v, i64 0)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %fixed_m8 = call <128 x i8> @llvm.vector.extract.v128i8.nxv128i8(<vscale x 128 x i8> %v, i64 0)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+;
+  %scalable_mf8 = call <vscale x 1 x i8> @llvm.vector.extract.nxv1i8.nxv128i8(<vscale x 128 x i8> %v, i64 0)
+  %scalable_mf4 = call <vscale x 2 x i8> @llvm.vector.extract.nxv2i8.nxv128i8(<vscale x 128 x i8> %v, i64 0)
+  %scalable_mf2 = call <vscale x 4 x i8> @llvm.vector.extract.nxv4i8.nxv128i8(<vscale x 128 x i8> %v, i64 0)
+  %scalable_m1 = call <vscale x 8 x i8> @llvm.vector.extract.nxv8i8.nxv128i8(<vscale x 128 x i8> %v, i64 0)
+  %scalable_m2 = call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv128i8(<vscale x 128 x i8> %v, i64 0)
+  %scalable_m4 = call <vscale x 32 x i8> @llvm.vector.extract.nxv32i8.nxv128i8(<vscale x 128 x i8> %v, i64 0)
+  %scalable_m8 = call <vscale x 64 x i8> @llvm.vector.extract.nxv64i8.nxv128i8(<vscale x 128 x i8> %v, i64 0)
+
+  %fixed_mf8 = call <2 x i8> @llvm.vector.extract.v2i8.nxv128i8(<vscale x 128 x i8> %v, i64 0)
+  %fixed_mf4 = call <4 x i8> @llvm.vector.extract.v4i8.nxv128i8(<vscale x 128 x i8> %v, i64 0)
+  %fixed_mf2 = call <8 x i8> @llvm.vector.extract.v8i8.nxv128i8(<vscale x 128 x i8> %v, i64 0)
+  %fixed_m1 = call <16 x i8> @llvm.vector.extract.v16i8.nxv128i8(<vscale x 128 x i8> %v, i64 0)
+  %fixed_m2 = call <32 x i8> @llvm.vector.extract.v32i8.nxv128i8(<vscale x 128 x i8> %v, i64 0)
+  %fixed_m4 = call <64 x i8> @llvm.vector.extract.v64i8.nxv128i8(<vscale x 128 x i8> %v, i64 0)
+  %fixed_m8 = call <128 x i8> @llvm.vector.extract.v128i8.nxv128i8(<vscale x 128 x i8> %v, i64 0)
+  ret void
+}
+
+define void @vector_extract_nxv128i8_1(<vscale x 128 x i8> %v) {
+; CHECK-LABEL: 'vector_extract_nxv128i8_1'
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %scalable_mf8 = call <vscale x 1 x i8> @llvm.vector.extract.nxv1i8.nxv128i8(<vscale x 128 x i8> %v, i64 1)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %scalable_mf4 = call <vscale x 2 x i8> @llvm.vector.extract.nxv2i8.nxv128i8(<vscale x 128 x i8> %v, i64 2)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %scalable_mf2 = call <vscale x 4 x i8> @llvm.vector.extract.nxv4i8.nxv128i8(<vscale x 128 x i8> %v, i64 4)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %scalable_m1 = call <vscale x 8 x i8> @llvm.vector.extract.nxv8i8.nxv128i8(<vscale x 128 x i8> %v, i64 8)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %scalable_m2 = call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv128i8(<vscale x 128 x i8> %v, i64 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %scalable_m4 = call <vscale x 32 x i8> @llvm.vector.extract.nxv32i8.nxv128i8(<vscale x 128 x i8> %v, i64 32)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %scalable_m8 = call <vscale x 64 x i8> @llvm.vector.extract.nxv64i8.nxv128i8(<vscale x 128 x i8> %v, i64 64)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %fixed_mf8 = call <2 x i8> @llvm.vector.extract.v2i8.nxv128i8(<vscale x 128 x i8> %v, i64 2)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %fixed_mf4 = call <4 x i8> @llvm.vector.extract.v4i8.nxv128i8(<vscale x 128 x i8> %v, i64 4)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %fixed_mf2 = call <8 x i8> @llvm.vector.extract.v8i8.nxv128i8(<vscale x 128 x i8> %v, i64 8)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %fixed_m1 = call <16 x i8> @llvm.vector.extract.v16i8.nxv128i8(<vscale x 128 x i8> %v, i64 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %fixed_m2 = call <32 x i8> @llvm.vector.extract.v32i8.nxv128i8(<vscale x 128 x i8> %v, i64 32)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %fixed_m4 = call <64 x i8> @llvm.vector.extract.v64i8.nxv128i8(<vscale x 128 x i8> %v, i64 64)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %fixed_m8 = call <128 x i8> @llvm.vector.extract.v128i8.nxv128i8(<vscale x 128 x i8> %v, i64 128)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; SIZE-LABEL: 'vector_extract_nxv128i8_1'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %scalable_mf8 = call <vscale x 1 x i8> @llvm.vector.extract.nxv1i8.nxv128i8(<vscale x 128 x i8> %v, i64 1)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %scalable_mf4 = call <vscale x 2 x i8> @llvm.vector.extract.nxv2i8.nxv128i8(<vscale x 128 x i8> %v, i64 2)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %scalable_mf2 = call <vscale x 4 x i8> @llvm.vector.extract.nxv4i8.nxv128i8(<vscale x 128 x i8> %v, i64 4)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %scalable_m1 = call <vscale x 8 x i8> @llvm.vector.extract.nxv8i8.nxv128i8(<vscale x 128 x i8> %v, i64 8)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %scalable_m2 = call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv128i8(<vscale x 128 x i8> %v, i64 16)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %scalable_m4 = call <vscale x 32 x i8> @llvm.vector.extract.nxv32i8.nxv128i8(<vscale x 128 x i8> %v, i64 32)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %scalable_m8 = call <vscale x 64 x i8> @llvm.vector.extract.nxv64i8.nxv128i8(<vscale x 128 x i8> %v, i64 64)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %fixed_mf8 = call <2 x i8> @llvm.vector.extract.v2i8.nxv128i8(<vscale x 128 x i8> %v, i64 2)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %fixed_mf4 = call <4 x i8> @llvm.vector.extract.v4i8.nxv128i8(<vscale x 128 x i8> %v, i64 4)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %fixed_mf2 = call <8 x i8> @llvm.vector.extract.v8i8.nxv128i8(<vscale x 128 x i8> %v, i64 8)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %fixed_m1 = call <16 x i8> @llvm.vector.extract.v16i8.nxv128i8(<vscale x 128 x i8> %v, i64 16)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %fixed_m2 = call <32 x i8> @llvm.vector.extract.v32i8.nxv128i8(<vscale x 128 x i8> %v, i64 32)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %fixed_m4 = call <64 x i8> @llvm.vector.extract.v64i8.nxv128i8(<vscale x 128 x i8> %v, i64 64)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %fixed_m8 = call <128 x i8> @llvm.vector.extract.v128i8.nxv128i8(<vscale x 128 x i8> %v, i64 128)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+;
+  %scalable_mf8 = call <vscale x 1 x i8> @llvm.vector.extract.nxv1i8.nxv128i8(<vscale x 128 x i8> %v, i64 1)
+  %scalable_mf4 = call <vscale x 2 x i8> @llvm.vector.extract.nxv2i8.nxv128i8(<vscale x 128 x i8> %v, i64 2)
+  %scalable_mf2 = call <vscale x 4 x i8> @llvm.vector.extract.nxv4i8.nxv128i8(<vscale x 128 x i8> %v, i64 4)
+  %scalable_m1 = call <vscale x 8 x i8> @llvm.vector.extract.nxv8i8.nxv128i8(<vscale x 128 x i8> %v, i64 8)
+  %scalable_m2 = call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv128i8(<vscale x 128 x i8> %v, i64 16)
+  %scalable_m4 = call <vscale x 32 x i8> @llvm.vector.extract.nxv32i8.nxv128i8(<vscale x 128 x i8> %v, i64 32)
+  %scalable_m8 = call <vscale x 64 x i8> @llvm.vector.extract.nxv64i8.nxv128i8(<vscale x 128 x i8> %v, i64 64)
+
+  %fixed_mf8 = call <2 x i8> @llvm.vector.extract.v2i8.nxv128i8(<vscale x 128 x i8> %v, i64 2)
+  %fixed_mf4 = call <4 x i8> @llvm.vector.extract.v4i8.nxv128i8(<vscale x 128 x i8> %v, i64 4)
+  %fixed_mf2 = call <8 x i8> @llvm.vector.extract.v8i8.nxv128i8(<vscale x 128 x i8> %v, i64 8)
+  %fixed_m1 = call <16 x i8> @llvm.vector.extract.v16i8.nxv128i8(<vscale x 128 x i8> %v, i64 16)
+  %fixed_m2 = call <32 x i8> @llvm.vector.extract.v32i8.nxv128i8(<vscale x 128 x i8> %v, i64 32)
+  %fixed_m4 = call <64 x i8> @llvm.vector.extract.v64i8.nxv128i8(<vscale x 128 x i8> %v, i64 64)
+  %fixed_m8 = call <128 x i8> @llvm.vector.extract.v128i8.nxv128i8(<vscale x 128 x i8> %v, i64 128)
+  ret void
+}
+
+define void @vector_extract_v128i8_0(<128 x i8> %v) {
+; CHECK-LABEL: 'vector_extract_v128i8_0'
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %fixed_mf8 = call <2 x i8> @llvm.vector.extract.v2i8.v128i8(<128 x i8> %v, i64 0)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %fixed_mf4 = call <4 x i8> @llvm.vector.extract.v4i8.v128i8(<128 x i8> %v, i64 0)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %fixed_mf2 = call <8 x i8> @llvm.vector.extract.v8i8.v128i8(<128 x i8> %v, i64 0)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %fixed_m1 = call <16 x i8> @llvm.vector.extract.v16i8.v128i8(<128 x i8> %v, i64 0)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %fixed_m2 = call <32 x i8> @llvm.vector.extract.v32i8.v128i8(<128 x i8> %v, i64 0)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %fixed_m4 = call <64 x i8> @llvm.vector.extract.v64i8.v128i8(<128 x i8> %v, i64 0)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %fixed_m8 = call <128 x i8> @llvm.vector.extract.v128i8.v128i8(<128 x i8> %v, i64 0)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; SIZE-LABEL: 'vector_extract_v128i8_0'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %fixed_mf8 = call <2 x i8> @llvm.vector.extract.v2i8.v128i8(<128 x i8> %v, i64 0)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %fixed_mf4 = call <4 x i8> @llvm.vector.extract.v4i8.v128i8(<128 x i8> %v, i64 0)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %fixed_mf2 = call <8 x i8> @llvm.vector.extract.v8i8.v128i8(<128 x i8> %v, i64 0)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %fixed_m1 = call <16 x i8> @llvm.vector.extract.v16i8.v128i8(<128 x i8> %v, i64 0)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %fixed_m2 = call <32 x i8> @llvm.vector.extract.v32i8.v128i8(<128 x i8> %v, i64 0)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %fixed_m4 = call <64 x i8> @llvm.vector.extract.v64i8.v128i8(<128 x i8> %v, i64 0)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %fixed_m8 = call <128 x i8> @llvm.vector.extract.v128i8.v128i8(<128 x i8> %v, i64 0)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+;
+  %fixed_mf8 = call <2 x i8> @llvm.vector.extract.v2i8.v128i8(<128 x i8> %v, i64 0)
+  %fixed_mf4 = call <4 x i8> @llvm.vector.extract.v4i8.v128i8(<128 x i8> %v, i64 0)
+  %fixed_mf2 = call <8 x i8> @llvm.vector.extract.v8i8.v128i8(<128 x i8> %v, i64 0)
+  %fixed_m1 = call <16 x i8> @llvm.vector.extract.v16i8.v128i8(<128 x i8> %v, i64 0)
+  %fixed_m2 = call <32 x i8> @llvm.vector.extract.v32i8.v128i8(<128 x i8> %v, i64 0)
+  %fixed_m4 = call <64 x i8> @llvm.vector.extract.v64i8.v128i8(<128 x i8> %v, i64 0)
+  %fixed_m8 = call <128 x i8> @llvm.vector.extract.v128i8.v128i8(<128 x i8> %v, i64 0)
+  ret void
+}
+
+define void @vector_extract_v128i8_1(<128 x i8> %v) {
+; CHECK-LABEL: 'vector_extract_v128i8_1'
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %fixed_mf8 = call <2 x i8> @llvm.vector.extract.v2i8.v128i8(<128 x i8> %v, i64 2)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %fixed_mf4 = call <4 x i8> @llvm.vector.extract.v4i8.v128i8(<128 x i8> %v, i64 4)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %fixed_mf2 = call <8 x i8> @llvm.vector.extract.v8i8.v128i8(<128 x i8> %v, i64 8)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %fixed_m1 = call <16 x i8> @llvm.vector.extract.v16i8.v128i8(<128 x i8> %v, i64 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %fixed_m2 = call <32 x i8> @llvm.vector.extract.v32i8.v128i8(<128 x i8> %v, i64 32)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %fixed_m4 = call <64 x i8> @llvm.vector.extract.v64i8.v128i8(<128 x i8> %v, i64 64)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; SIZE-LABEL: 'vector_extract_v128i8_1'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %fixed_mf8 = call <2 x i8> @llvm.vector.extract.v2i8.v128i8(<128 x i8> %v, i64 2)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %fixed_mf4 = call <4 x i8> @llvm.vector.extract.v4i8.v128i8(<128 x i8> %v, i64 4)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %fixed_mf2 = call <8 x i8> @llvm.vector.extract.v8i8.v128i8(<128 x i8> %v, i64 8)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %fixed_m1 = call <16 x i8> @llvm.vector.extract.v16i8.v128i8(<128 x i8> %v, i64 16)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %fixed_m2 = call <32 x i8> @llvm.vector.extract.v32i8.v128i8(<128 x i8> %v, i64 32)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %fixed_m4 = call <64 x i8> @llvm.vector.extract.v64i8.v128i8(<128 x i8> %v, i64 64)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+;
+  %fixed_mf8 = call <2 x i8> @llvm.vector.extract.v2i8.v128i8(<128 x i8> %v, i64 2)
+  %fixed_mf4 = call <4 x i8> @llvm.vector.extract.v4i8.v128i8(<128 x i8> %v, i64 4)
+  %fixed_mf2 = call <8 x i8> @llvm.vector.extract.v8i8.v128i8(<128 x i8> %v, i64 8)
+  %fixed_m1 = call <16 x i8> @llvm.vector.extract.v16i8.v128i8(<128 x i8> %v, i64 16)
+  %fixed_m2 = call <32 x i8> @llvm.vector.extract.v32i8.v128i8(<128 x i8> %v, i64 32)
+  %fixed_m4 = call <64 x i8> @llvm.vector.extract.v64i8.v128i8(<128 x i8> %v, i64 64)
+  ; No @llvm.vector.extract.v128i8.v128i8(<128 x i8> %v, i64 128) since it would overrun
+  ret void
+}
diff --git a/llvm/test/Analysis/CostModel/RISCV/rvv-vectorinsert.ll b/llvm/test/Analysis/CostModel/RISCV/rvv-vectorinsert.ll
new file mode 100644
index 000000000000..7a9f45cb7af5
--- /dev/null
+++ b/llvm/test/Analysis/CostModel/RISCV/rvv-vectorinsert.ll
@@ -0,0 +1,335 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 4
+; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=riscv64 -mattr=+v < %s | FileCheck %s
+; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=riscv64 -mattr=+v -cost-kind=code-size < %s | FileCheck %s --check-prefix=SIZE
+
+define void @vector_insert_nxv128i8_0(<vscale x 128 x i8> %v) {
+; CHECK-LABEL: 'vector_insert_nxv128i8_0'
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %scalable_mf8 = call <vscale x 128 x i8> @llvm.vector.insert.nxv128i8.nxv1i8(<vscale x 128 x i8> %v, <vscale x 1 x i8> undef, i64 0)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %scalable_mf4 = call <vscale x 128 x i8> @llvm.vector.insert.nxv128i8.nxv2i8(<vscale x 128 x i8> %v, <vscale x 2 x i8> undef, i64 0)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %scalable_mf2 = call <vscale x 128 x i8> @llvm.vector.insert.nxv128i8.nxv4i8(<vscale x 128 x i8> %v, <vscale x 4 x i8> undef, i64 0)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %scalable_m1 = call <vscale x 128 x i8> @llvm.vector.insert.nxv128i8.nxv8i8(<vscale x 128 x i8> %v, <vscale x 8 x i8> undef, i64 0)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %scalable_m2 = call <vscale x 128 x i8> @llvm.vector.insert.nxv128i8.nxv16i8(<vscale x 128 x i8> %v, <vscale x 16 x i8> undef, i64 0)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %scalable_m4 = call <vscale x 128 x i8> @llvm.vector.insert.nxv128i8.nxv32i8(<vscale x 128 x i8> %v, <vscale x 32 x i8> undef, i64 0)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %scalable_m8 = call <vscale x 128 x i8> @llvm.vector.insert.nxv128i8.nxv64i8(<vscale x 128 x i8> %v, <vscale x 64 x i8> undef, i64 0)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %fixed_mf8 = call <vscale x 128 x i8> @llvm.vector.insert.nxv128i8.v2i8(<vscale x 128 x i8> %v, <2 x i8> undef, i64 0)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %fixed_mf4 = call <vscale x 128 x i8> @llvm.vector.insert.nxv128i8.v4i8(<vscale x 128 x i8> %v, <4 x i8> undef, i64 0)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %fixed_mf2 = call <vscale x 128 x i8> @llvm.vector.insert.nxv128i8.v8i8(<vscale x 128 x i8> %v, <8 x i8> undef, i64 0)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %fixed_m1 = call <vscale x 128 x i8> @llvm.vector.insert.nxv128i8.v16i8(<vscale x 128 x i8> %v, <16 x i8> undef, i64 0)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %fixed_m2 = call <vscale x 128 x i8> @llvm.vector.insert.nxv128i8.v32i8(<vscale x 128 x i8> %v, <32 x i8> undef, i64 0)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %fixed_m4 = call <vscale x 128 x i8> @llvm.vector.insert.nxv128i8.v64i8(<vscale x 128 x i8> %v, <64 x i8> undef, i64 0)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %fixed_m8 = call <vscale x 128 x i8> @llvm.vector.insert.nxv128i8.v128i8(<vscale x 128 x i8> %v, <128 x i8> undef, i64 0)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; SIZE-LABEL: 'vector_insert_nxv128i8_0'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %scalable_mf8 = call <vscale x 128 x i8> @llvm.vector.insert.nxv128i8.nxv1i8(<vscale x 128 x i8> %v, <vscale x 1 x i8> undef, i64 0)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %scalable_mf4 = call <vscale x 128 x i8> @llvm.vector.insert.nxv128i8.nxv2i8(<vscale x 128 x i8> %v, <vscale x 2 x i8> undef, i64 0)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %scalable_mf2 = call <vscale x 128 x i8> @llvm.vector.insert.nxv128i8.nxv4i8(<vscale x 128 x i8> %v, <vscale x 4 x i8> undef, i64 0)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %scalable_m1 = call <vscale x 128 x i8> @llvm.vector.insert.nxv128i8.nxv8i8(<vscale x 128 x i8> %v, <vscale x 8 x i8> undef, i64 0)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %scalable_m2 = call <vscale x 128 x i8> @llvm.vector.insert.nxv128i8.nxv16i8(<vscale x 128 x i8> %v, <vscale x 16 x i8> undef, i64 0)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %scalable_m4 = call <vscale x 128 x i8> @llvm.vector.insert.nxv128i8.nxv32i8(<vscale x 128 x i8> %v, <vscale x 32 x i8> undef, i64 0)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %scalable_m8 = call <vscale x 128 x i8> @llvm.vector.insert.nxv128i8.nxv64i8(<vscale x 128 x i8> %v, <vscale x 64 x i8> undef, i64 0)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %fixed_mf8 = call <vscale x 128 x i8> @llvm.vector.insert.nxv128i8.v2i8(<vscale x 128 x i8> %v, <2 x i8> undef, i64 0)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %fixed_mf4 = call <vscale x 128 x i8> @llvm.vector.insert.nxv128i8.v4i8(<vscale x 128 x i8> %v, <4 x i8> undef, i64 0)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %fixed_mf2 = call <vscale x 128 x i8> @llvm.vector.insert.nxv128i8.v8i8(<vscale x 128 x i8> %v, <8 x i8> undef, i64 0)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %fixed_m1 = call <vscale x 128 x i8> @llvm.vector.insert.nxv128i8.v16i8(<vscale x 128 x i8> %v, <16 x i8> undef, i64 0)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %fixed_m2 = call <vscale x 128 x i8> @llvm.vector.insert.nxv128i8.v32i8(<vscale x 128 x i8> %v, <32 x i8> undef, i64 0)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %fixed_m4 = call <vscale x 128 x i8> @llvm.vector.insert.nxv128i8.v64i8(<vscale x 128 x i8> %v, <64 x i8> undef, i64 0)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %fixed_m8 = call <vscale x 128 x i8> @llvm.vector.insert.nxv128i8.v128i8(<vscale x 128 x i8> %v, <128 x i8> undef, i64 0)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+;
+  %scalable_mf8 = call <vscale x 128 x i8> @llvm.vector.insert.nxv128i8.nxv1i8(<vscale x 128 x i8> %v, <vscale x 1 x i8> undef, i64 0)
+  %scalable_mf4 = call <vscale x 128 x i8> @llvm.vector.insert.nxv128i8.nxv2i8(<vscale x 128 x i8> %v, <vscale x 2 x i8> undef, i64 0)
+  %scalable_mf2 = call <vscale x 128 x i8> @llvm.vector.insert.nxv128i8.nxv4i8(<vscale x 128 x i8> %v, <vscale x 4 x i8> undef, i64 0)
+  %scalable_m1 = call <vscale x 128 x i8> @llvm.vector.insert.nxv128i8.nxv8i8(<vscale x 128 x i8> %v, <vscale x 8 x i8> undef, i64 0)
+  %scalable_m2 = call <vscale x 128 x i8> @llvm.vector.insert.nxv128i8.nxv16i8(<vscale x 128 x i8> %v, <vscale x 16 x i8> undef, i64 0)
+  %scalable_m4 = call <vscale x 128 x i8> @llvm.vector.insert.nxv128i8.nxv32i8(<vscale x 128 x i8> %v, <vscale x 32 x i8> undef, i64 0)
+  %scalable_m8 = call <vscale x 128 x i8> @llvm.vector.insert.nxv128i8.nxv64i8(<vscale x 128 x i8> %v, <vscale x 64 x i8> undef, i64 0)
+
+  %fixed_mf8 = call <vscale x 128 x i8> @llvm.vector.insert.nxv128i8.v2i8(<vscale x 128 x i8> %v, <2 x i8> undef, i64 0)
+  %fixed_mf4 = call <vscale x 128 x i8> @llvm.vector.insert.nxv128i8.v4i8(<vscale x 128 x i8> %v, <4 x i8> undef, i64 0)
+  %fixed_mf2 = call <vscale x 128 x i8> @llvm.vector.insert.nxv128i8.v8i8(<vscale x 128 x i8> %v, <8 x i8> undef, i64 0)
+  %fixed_m1 = call <vscale x 128 x i8> @llvm.vector.insert.nxv128i8.v16i8(<vscale x 128 x i8> %v, <16 x i8> undef, i64 0)
+  %fixed_m2 = call <vscale x 128 x i8> @llvm.vector.insert.nxv128i8.v32i8(<vscale x 128 x i8> %v, <32 x i8> undef, i64 0)
+  %fixed_m4 = call <vscale x 128 x i8> @llvm.vector.insert.nxv128i8.v64i8(<vscale x 128 x i8> %v, <64 x i8> undef, i64 0)
+  %fixed_m8 = call <vscale x 128 x i8> @llvm.vector.insert.nxv128i8.v128i8(<vscale x 128 x i8> %v, <128 x i8> undef, i64 0)
+  ret void
+}
+
+define void @vector_insert_nxv128i8_undef_0() {
+; CHECK-LABEL: 'vector_insert_nxv128i8_undef_0'
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %scalable_mf8 = call <vscale x 128 x i8> @llvm.vector.insert.nxv128i8.nxv1i8(<vscale x 128 x i8> undef, <vscale x 1 x i8> undef, i64 0)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %scalable_mf4 = call <vscale x 128 x i8> @llvm.vector.insert.nxv128i8.nxv2i8(<vscale x 128 x i8> undef, <vscale x 2 x i8> undef, i64 0)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %scalable_mf2 = call <vscale x 128 x i8> @llvm.vector.insert.nxv128i8.nxv4i8(<vscale x 128 x i8> undef, <vscale x 4 x i8> undef, i64 0)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %scalable_m1 = call <vscale x 128 x i8> @llvm.vector.insert.nxv128i8.nxv8i8(<vscale x 128 x i8> undef, <vscale x 8 x i8> undef, i64 0)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %scalable_m2 = call <vscale x 128 x i8> @llvm.vector.insert.nxv128i8.nxv16i8(<vscale x 128 x i8> undef, <vscale x 16 x i8> undef, i64 0)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %scalable_m4 = call <vscale x 128 x i8> @llvm.vector.insert.nxv128i8.nxv32i8(<vscale x 128 x i8> undef, <vscale x 32 x i8> undef, i64 0)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %scalable_m8 = call <vscale x 128 x i8> @llvm.vector.insert.nxv128i8.nxv64i8(<vscale x 128 x i8> undef, <vscale x 64 x i8> undef, i64 0)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %fixed_mf8 = call <vscale x 128 x i8> @llvm.vector.insert.nxv128i8.v2i8(<vscale x 128 x i8> undef, <2 x i8> undef, i64 0)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %fixed_mf4 = call <vscale x 128 x i8> @llvm.vector.insert.nxv128i8.v4i8(<vscale x 128 x i8> undef, <4 x i8> undef, i64 0)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %fixed_mf2 = call <vscale x 128 x i8> @llvm.vector.insert.nxv128i8.v8i8(<vscale x 128 x i8> undef, <8 x i8> undef, i64 0)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %fixed_m1 = call <vscale x 128 x i8> @llvm.vector.insert.nxv128i8.v16i8(<vscale x 128 x i8> undef, <16 x i8> undef, i64 0)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %fixed_m2 = call <vscale x 128 x i8> @llvm.vector.insert.nxv128i8.v32i8(<vscale x 128 x i8> undef, <32 x i8> undef, i64 0)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %fixed_m4 = call <vscale x 128 x i8> @llvm.vector.insert.nxv128i8.v64i8(<vscale x 128 x i8> undef, <64 x i8> undef, i64 0)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %fixed_m8 = call <vscale x 128 x i8> @llvm.vector.insert.nxv128i8.v128i8(<vscale x 128 x i8> undef, <128 x i8> undef, i64 0)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; SIZE-LABEL: 'vector_insert_nxv128i8_undef_0'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %scalable_mf8 = call <vscale x 128 x i8> @llvm.vector.insert.nxv128i8.nxv1i8(<vscale x 128 x i8> undef, <vscale x 1 x i8> undef, i64 0)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %scalable_mf4 = call <vscale x 128 x i8> @llvm.vector.insert.nxv128i8.nxv2i8(<vscale x 128 x i8> undef, <vscale x 2 x i8> undef, i64 0)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %scalable_mf2 = call <vscale x 128 x i8> @llvm.vector.insert.nxv128i8.nxv4i8(<vscale x 128 x i8> undef, <vscale x 4 x i8> undef, i64 0)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %scalable_m1 = call <vscale x 128 x i8> @llvm.vector.insert.nxv128i8.nxv8i8(<vscale x 128 x i8> undef, <vscale x 8 x i8> undef, i64 0)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %scalable_m2 = call <vscale x 128 x i8> @llvm.vector.insert.nxv128i8.nxv16i8(<vscale x 128 x i8> undef, <vscale x 16 x i8> undef, i64 0)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %scalable_m4 = call <vscale x 128 x i8> @llvm.vector.insert.nxv128i8.nxv32i8(<vscale x 128 x i8> undef, <vscale x 32 x i8> undef, i64 0)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %scalable_m8 = call <vscale x 128 x i8> @llvm.vector.insert.nxv128i8.nxv64i8(<vscale x 128 x i8> undef, <vscale x 64 x i8> undef, i64 0)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %fixed_mf8 = call <vscale x 128 x i8> @llvm.vector.insert.nxv128i8.v2i8(<vscale x 128 x i8> undef, <2 x i8> undef, i64 0)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %fixed_mf4 = call <vscale x 128 x i8> @llvm.vector.insert.nxv128i8.v4i8(<vscale x 128 x i8> undef, <4 x i8> undef, i64 0)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %fixed_mf2 = call <vscale x 128 x i8> @llvm.vector.insert.nxv128i8.v8i8(<vscale x 128 x i8> undef, <8 x i8> undef, i64 0)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %fixed_m1 = call <vscale x 128 x i8> @llvm.vector.insert.nxv128i8.v16i8(<vscale x 128 x i8> undef, <16 x i8> undef, i64 0)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %fixed_m2 = call <vscale x 128 x i8> @llvm.vector.insert.nxv128i8.v32i8(<vscale x 128 x i8> undef, <32 x i8> undef, i64 0)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %fixed_m4 = call <vscale x 128 x i8> @llvm.vector.insert.nxv128i8.v64i8(<vscale x 128 x i8> undef, <64 x i8> undef, i64 0)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %fixed_m8 = call <vscale x 128 x i8> @llvm.vector.insert.nxv128i8.v128i8(<vscale x 128 x i8> undef, <128 x i8> undef, i64 0)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+;
+  %scalable_mf8 = call <vscale x 128 x i8> @llvm.vector.insert.nxv128i8.nxv1i8(<vscale x 128 x i8> undef, <vscale x 1 x i8> undef, i64 0)
+  %scalable_mf4 = call <vscale x 128 x i8> @llvm.vector.insert.nxv128i8.nxv2i8(<vscale x 128 x i8> undef, <vscale x 2 x i8> undef, i64 0)
+  %scalable_mf2 = call <vscale x 128 x i8> @llvm.vector.insert.nxv128i8.nxv4i8(<vscale x 128 x i8> undef, <vscale x 4 x i8> undef, i64 0)
+  %scalable_m1 = call <vscale x 128 x i8> @llvm.vector.insert.nxv128i8.nxv8i8(<vscale x 128 x i8> undef, <vscale x 8 x i8> undef, i64 0)
+  %scalable_m2 = call <vscale x 128 x i8> @llvm.vector.insert.nxv128i8.nxv16i8(<vscale x 128 x i8> undef, <vscale x 16 x i8> undef, i64 0)
+  %scalable_m4 = call <vscale x 128 x i8> @llvm.vector.insert.nxv128i8.nxv32i8(<vscale x 128 x i8> undef, <vscale x 32 x i8> undef, i64 0)
+  %scalable_m8 = call <vscale x 128 x i8> @llvm.vector.insert.nxv128i8.nxv64i8(<vscale x 128 x i8> undef, <vscale x 64 x i8> undef, i64 0)
+
+  %fixed_mf8 = call <vscale x 128 x i8> @llvm.vector.insert.nxv128i8.v2i8(<vscale x 128 x i8> undef, <2 x i8> undef, i64 0)
+  %fixed_mf4 = call <vscale x 128 x i8> @llvm.vector.insert.nxv128i8.v4i8(<vscale x 128 x i8> undef, <4 x i8> undef, i64 0)
+  %fixed_mf2 = call <vscale x 128 x i8> @llvm.vector.insert.nxv128i8.v8i8(<vscale x 128 x i8> undef, <8 x i8> undef, i64 0)
+  %fixed_m1 = call <vscale x 128 x i8> @llvm.vector.insert.nxv128i8.v16i8(<vscale x 128 x i8> undef, <16 x i8> undef, i64 0)
+  %fixed_m2 = call <vscale x 128 x i8> @llvm.vector.insert.nxv128i8.v32i8(<vscale x 128 x i8> undef, <32 x i8> undef, i64 0)
+  %fixed_m4 = call <vscale x 128 x i8> @llvm.vector.insert.nxv128i8.v64i8(<vscale x 128 x i8> undef, <64 x i8> undef, i64 0)
+  %fixed_m8 = call <vscale x 128 x i8> @llvm.vector.insert.nxv128i8.v128i8(<vscale x 128 x i8> undef, <128 x i8> undef, i64 0)
+  ret void
+}
+
+define void @vector_insert_nxv128i8_1(<vscale x 128 x i8> %v) {
+; CHECK-LABEL: 'vector_insert_nxv128i8_1'
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %scalable_mf8 = call <vscale x 128 x i8> @llvm.vector.insert.nxv128i8.nxv1i8(<vscale x 128 x i8> %v, <vscale x 1 x i8> undef, i64 1)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %scalable_mf4 = call <vscale x 128 x i8> @llvm.vector.insert.nxv128i8.nxv2i8(<vscale x 128 x i8> %v, <vscale x 2 x i8> undef, i64 2)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %scalable_mf2 = call <vscale x 128 x i8> @llvm.vector.insert.nxv128i8.nxv4i8(<vscale x 128 x i8> %v, <vscale x 4 x i8> undef, i64 4)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %scalable_m1 = call <vscale x 128 x i8> @llvm.vector.insert.nxv128i8.nxv8i8(<vscale x 128 x i8> %v, <vscale x 8 x i8> undef, i64 8)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %scalable_m2 = call <vscale x 128 x i8> @llvm.vector.insert.nxv128i8.nxv16i8(<vscale x 128 x i8> %v, <vscale x 16 x i8> undef, i64 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %scalable_m4 = call <vscale x 128 x i8> @llvm.vector.insert.nxv128i8.nxv32i8(<vscale x 128 x i8> %v, <vscale x 32 x i8> undef, i64 32)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %scalable_m8 = call <vscale x 128 x i8> @llvm.vector.insert.nxv128i8.nxv64i8(<vscale x 128 x i8> %v, <vscale x 64 x i8> undef, i64 64)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %fixed_mf8 = call <vscale x 128 x i8> @llvm.vector.insert.nxv128i8.v2i8(<vscale x 128 x i8> %v, <2 x i8> undef, i64 2)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %fixed_mf4 = call <vscale x 128 x i8> @llvm.vector.insert.nxv128i8.v4i8(<vscale x 128 x i8> %v, <4 x i8> undef, i64 4)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %fixed_mf2 = call <vscale x 128 x i8> @llvm.vector.insert.nxv128i8.v8i8(<vscale x 128 x i8> %v, <8 x i8> undef, i64 8)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %fixed_m1 = call <vscale x 128 x i8> @llvm.vector.insert.nxv128i8.v16i8(<vscale x 128 x i8> %v, <16 x i8> undef, i64 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %fixed_m2 = call <vscale x 128 x i8> @llvm.vector.insert.nxv128i8.v32i8(<vscale x 128 x i8> %v, <32 x i8> undef, i64 32)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %fixed_m4 = call <vscale x 128 x i8> @llvm.vector.insert.nxv128i8.v64i8(<vscale x 128 x i8> %v, <64 x i8> undef, i64 64)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %fixed_m8 = call <vscale x 128 x i8> @llvm.vector.insert.nxv128i8.v128i8(<vscale x 128 x i8> %v, <128 x i8> undef, i64 128)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; SIZE-LABEL: 'vector_insert_nxv128i8_1'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %scalable_mf8 = call <vscale x 128 x i8> @llvm.vector.insert.nxv128i8.nxv1i8(<vscale x 128 x i8> %v, <vscale x 1 x i8> undef, i64 1)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %scalable_mf4 = call <vscale x 128 x i8> @llvm.vector.insert.nxv128i8.nxv2i8(<vscale x 128 x i8> %v, <vscale x 2 x i8> undef, i64 2)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %scalable_mf2 = call <vscale x 128 x i8> @llvm.vector.insert.nxv128i8.nxv4i8(<vscale x 128 x i8> %v, <vscale x 4 x i8> undef, i64 4)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %scalable_m1 = call <vscale x 128 x i8> @llvm.vector.insert.nxv128i8.nxv8i8(<vscale x 128 x i8> %v, <vscale x 8 x i8> undef, i64 8)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %scalable_m2 = call <vscale x 128 x i8> @llvm.vector.insert.nxv128i8.nxv16i8(<vscale x 128 x i8> %v, <vscale x 16 x i8> undef, i64 16)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %scalable_m4 = call <vscale x 128 x i8> @llvm.vector.insert.nxv128i8.nxv32i8(<vscale x 128 x i8> %v, <vscale x 32 x i8> undef, i64 32)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %scalable_m8 = call <vscale x 128 x i8> @llvm.vector.insert.nxv128i8.nxv64i8(<vscale x 128 x i8> %v, <vscale x 64 x i8> undef, i64 64)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %fixed_mf8 = call <vscale x 128 x i8> @llvm.vector.insert.nxv128i8.v2i8(<vscale x 128 x i8> %v, <2 x i8> undef, i64 2)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %fixed_mf4 = call <vscale x 128 x i8> @llvm.vector.insert.nxv128i8.v4i8(<vscale x 128 x i8> %v, <4 x i8> undef, i64 4)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %fixed_mf2 = call <vscale x 128 x i8> @llvm.vector.insert.nxv128i8.v8i8(<vscale x 128 x i8> %v, <8 x i8> undef, i64 8)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %fixed_m1 = call <vscale x 128 x i8> @llvm.vector.insert.nxv128i8.v16i8(<vscale x 128 x i8> %v, <16 x i8> undef, i64 16)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %fixed_m2 = call <vscale x 128 x i8> @llvm.vector.insert.nxv128i8.v32i8(<vscale x 128 x i8> %v, <32 x i8> undef, i64 32)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %fixed_m4 = call <vscale x 128 x i8> @llvm.vector.insert.nxv128i8.v64i8(<vscale x 128 x i8> %v, <64 x i8> undef, i64 64)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %fixed_m8 = call <vscale x 128 x i8> @llvm.vector.insert.nxv128i8.v128i8(<vscale x 128 x i8> %v, <128 x i8> undef, i64 128)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+;
+  %scalable_mf8 = call <vscale x 128 x i8> @llvm.vector.insert.nxv128i8.nxv1i8(<vscale x 128 x i8> %v, <vscale x 1 x i8> undef, i64 1)
+  %scalable_mf4 = call <vscale x 128 x i8> @llvm.vector.insert.nxv128i8.nxv2i8(<vscale x 128 x i8> %v, <vscale x 2 x i8> undef, i64 2)
+  %scalable_mf2 = call <vscale x 128 x i8> @llvm.vector.insert.nxv128i8.nxv4i8(<vscale x 128 x i8> %v, <vscale x 4 x i8> undef, i64 4)
+  %scalable_m1 = call <vscale x 128 x i8> @llvm.vector.insert.nxv128i8.nxv8i8(<vscale x 128 x i8> %v, <vscale x 8 x i8> undef, i64 8)
+  %scalable_m2 = call <vscale x 128 x i8> @llvm.vector.insert.nxv128i8.nxv16i8(<vscale x 128 x i8> %v, <vscale x 16 x i8> undef, i64 16)
+  %scalable_m4 = call <vscale x 128 x i8> @llvm.vector.insert.nxv128i8.nxv32i8(<vscale x 128 x i8> %v, <vscale x 32 x i8> undef, i64 32)
+  %scalable_m8 = call <vscale x 128 x i8> @llvm.vector.insert.nxv128i8.nxv64i8(<vscale x 128 x i8> %v, <vscale x 64 x i8> undef, i64 64)
+
+  %fixed_mf8 = call <vscale x 128 x i8> @llvm.vector.insert.nxv128i8.v2i8(<vscale x 128 x i8> %v, <2 x i8> undef, i64 2)
+  %fixed_mf4 = call <vscale x 128 x i8> @llvm.vector.insert.nxv128i8.v4i8(<vscale x 128 x i8> %v, <4 x i8> undef, i64 4)
+  %fixed_mf2 = call <vscale x 128 x i8> @llvm.vector.insert.nxv128i8.v8i8(<vscale x 128 x i8> %v, <8 x i8> undef, i64 8)
+  %fixed_m1 = call <vscale x 128 x i8> @llvm.vector.insert.nxv128i8.v16i8(<vscale x 128 x i8> %v, <16 x i8> undef, i64 16)
+  %fixed_m2 = call <vscale x 128 x i8> @llvm.vector.insert.nxv128i8.v32i8(<vscale x 128 x i8> %v, <32 x i8> undef, i64 32)
+  %fixed_m4 = call <vscale x 128 x i8> @llvm.vector.insert.nxv128i8.v64i8(<vscale x 128 x i8> %v, <64 x i8> undef, i64 64)
+  %fixed_m8 = call <vscale x 128 x i8> @llvm.vector.insert.nxv128i8.v128i8(<vscale x 128 x i8> %v, <128 x i8> undef, i64 128)
+  ret void
+}
+
+define void @vector_insert_nxv128i8_undef_1() {
+; CHECK-LABEL: 'vector_insert_nxv128i8_undef_1'
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %scalable_mf8 = call <vscale x 128 x i8> @llvm.vector.insert.nxv128i8.nxv1i8(<vscale x 128 x i8> undef, <vscale x 1 x i8> undef, i64 1)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %scalable_mf4 = call <vscale x 128 x i8> @llvm.vector.insert.nxv128i8.nxv2i8(<vscale x 128 x i8> undef, <vscale x 2 x i8> undef, i64 2)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %scalable_mf2 = call <vscale x 128 x i8> @llvm.vector.insert.nxv128i8.nxv4i8(<vscale x 128 x i8> undef, <vscale x 4 x i8> undef, i64 4)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %scalable_m1 = call <vscale x 128 x i8> @llvm.vector.insert.nxv128i8.nxv8i8(<vscale x 128 x i8> undef, <vscale x 8 x i8> undef, i64 8)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %scalable_m2 = call <vscale x 128 x i8> @llvm.vector.insert.nxv128i8.nxv16i8(<vscale x 128 x i8> undef, <vscale x 16 x i8> undef, i64 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %scalable_m4 = call <vscale x 128 x i8> @llvm.vector.insert.nxv128i8.nxv32i8(<vscale x 128 x i8> undef, <vscale x 32 x i8> undef, i64 32)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %scalable_m8 = call <vscale x 128 x i8> @llvm.vector.insert.nxv128i8.nxv64i8(<vscale x 128 x i8> undef, <vscale x 64 x i8> undef, i64 64)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %fixed_mf8 = call <vscale x 128 x i8> @llvm.vector.insert.nxv128i8.v2i8(<vscale x 128 x i8> undef, <2 x i8> undef, i64 2)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %fixed_mf4 = call <vscale x 128 x i8> @llvm.vector.insert.nxv128i8.v4i8(<vscale x 128 x i8> undef, <4 x i8> undef, i64 4)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %fixed_mf2 = call <vscale x 128 x i8> @llvm.vector.insert.nxv128i8.v8i8(<vscale x 128 x i8> undef, <8 x i8> undef, i64 8)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %fixed_m1 = call <vscale x 128 x i8> @llvm.vector.insert.nxv128i8.v16i8(<vscale x 128 x i8> undef, <16 x i8> undef, i64 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %fixed_m2 = call <vscale x 128 x i8> @llvm.vector.insert.nxv128i8.v32i8(<vscale x 128 x i8> undef, <32 x i8> undef, i64 32)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %fixed_m4 = call <vscale x 128 x i8> @llvm.vector.insert.nxv128i8.v64i8(<vscale x 128 x i8> undef, <64 x i8> undef, i64 64)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %fixed_m8 = call <vscale x 128 x i8> @llvm.vector.insert.nxv128i8.v128i8(<vscale x 128 x i8> undef, <128 x i8> undef, i64 128)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; SIZE-LABEL: 'vector_insert_nxv128i8_undef_1'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %scalable_mf8 = call <vscale x 128 x i8> @llvm.vector.insert.nxv128i8.nxv1i8(<vscale x 128 x i8> undef, <vscale x 1 x i8> undef, i64 1)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %scalable_mf4 = call <vscale x 128 x i8> @llvm.vector.insert.nxv128i8.nxv2i8(<vscale x 128 x i8> undef, <vscale x 2 x i8> undef, i64 2)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %scalable_mf2 = call <vscale x 128 x i8> @llvm.vector.insert.nxv128i8.nxv4i8(<vscale x 128 x i8> undef, <vscale x 4 x i8> undef, i64 4)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %scalable_m1 = call <vscale x 128 x i8> @llvm.vector.insert.nxv128i8.nxv8i8(<vscale x 128 x i8> undef, <vscale x 8 x i8> undef, i64 8)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %scalable_m2 = call <vscale x 128 x i8> @llvm.vector.insert.nxv128i8.nxv16i8(<vscale x 128 x i8> undef, <vscale x 16 x i8> undef, i64 16)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %scalable_m4 = call <vscale x 128 x i8> @llvm.vector.insert.nxv128i8.nxv32i8(<vscale x 128 x i8> undef, <vscale x 32 x i8> undef, i64 32)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %scalable_m8 = call <vscale x 128 x i8> @llvm.vector.insert.nxv128i8.nxv64i8(<vscale x 128 x i8> undef, <vscale x 64 x i8> undef, i64 64)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %fixed_mf8 = call <vscale x 128 x i8> @llvm.vector.insert.nxv128i8.v2i8(<vscale x 128 x i8> undef, <2 x i8> undef, i64 2)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %fixed_mf4 = call <vscale x 128 x i8> @llvm.vector.insert.nxv128i8.v4i8(<vscale x 128 x i8> undef, <4 x i8> undef, i64 4)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %fixed_mf2 = call <vscale x 128 x i8> @llvm.vector.insert.nxv128i8.v8i8(<vscale x 128 x i8> undef, <8 x i8> undef, i64 8)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %fixed_m1 = call <vscale x 128 x i8> @llvm.vector.insert.nxv128i8.v16i8(<vscale x 128 x i8> undef, <16 x i8> undef, i64 16)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %fixed_m2 = call <vscale x 128 x i8> @llvm.vector.insert.nxv128i8.v32i8(<vscale x 128 x i8> undef, <32 x i8> undef, i64 32)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %fixed_m4 = call <vscale x 128 x i8> @llvm.vector.insert.nxv128i8.v64i8(<vscale x 128 x i8> undef, <64 x i8> undef, i64 64)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %fixed_m8 = call <vscale x 128 x i8> @llvm.vector.insert.nxv128i8.v128i8(<vscale x 128 x i8> undef, <128 x i8> undef, i64 128)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+;
+  %scalable_mf8 = call <vscale x 128 x i8> @llvm.vector.insert.nxv128i8.nxv1i8(<vscale x 128 x i8> undef, <vscale x 1 x i8> undef, i64 1)
+  %scalable_mf4 = call <vscale x 128 x i8> @llvm.vector.insert.nxv128i8.nxv2i8(<vscale x 128 x i8> undef, <vscale x 2 x i8> undef, i64 2)
+  %scalable_mf2 = call <vscale x 128 x i8> @llvm.vector.insert.nxv128i8.nxv4i8(<vscale x 128 x i8> undef, <vscale x 4 x i8> undef, i64 4)
+  %scalable_m1 = call <vscale x 128 x i8> @llvm.vector.insert.nxv128i8.nxv8i8(<vscale x 128 x i8> undef, <vscale x 8 x i8> undef, i64 8)
+  %scalable_m2 = call <vscale x 128 x i8> @llvm.vector.insert.nxv128i8.nxv16i8(<vscale x 128 x i8> undef, <vscale x 16 x i8> undef, i64 16)
+  %scalable_m4 = call <vscale x 128 x i8> @llvm.vector.insert.nxv128i8.nxv32i8(<vscale x 128 x i8> undef, <vscale x 32 x i8> undef, i64 32)
+  %scalable_m8 = call <vscale x 128 x i8> @llvm.vector.insert.nxv128i8.nxv64i8(<vscale x 128 x i8> undef, <vscale x 64 x i8> undef, i64 64)
+
+  %fixed_mf8 = call <vscale x 128 x i8> @llvm.vector.insert.nxv128i8.v2i8(<vscale x 128 x i8> undef, <2 x i8> undef, i64 2)
+  %fixed_mf4 = call <vscale x 128 x i8> @llvm.vector.insert.nxv128i8.v4i8(<vscale x 128 x i8> undef, <4 x i8> undef, i64 4)
+  %fixed_mf2 = call <vscale x 128 x i8> @llvm.vector.insert.nxv128i8.v8i8(<vscale x 128 x i8> undef, <8 x i8> undef, i64 8)
+  %fixed_m1 = call <vscale x 128 x i8> @llvm.vector.insert.nxv128i8.v16i8(<vscale x 128 x i8> undef, <16 x i8> undef, i64 16)
+  %fixed_m2 = call <vscale x 128 x i8> @llvm.vector.insert.nxv128i8.v32i8(<vscale x 128 x i8> undef, <32 x i8> undef, i64 32)
+  %fixed_m4 = call <vscale x 128 x i8> @llvm.vector.insert.nxv128i8.v64i8(<vscale x 128 x i8> undef, <64 x i8> undef, i64 64)
+  %fixed_m8 = call <vscale x 128 x i8> @llvm.vector.insert.nxv128i8.v128i8(<vscale x 128 x i8> undef, <128 x i8> undef, i64 128)
+  ret void
+}
+
+define void @vector_insert_v128i8_0(<128 x i8> %v) {
+; CHECK-LABEL: 'vector_insert_v128i8_0'
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %fixed_mf8 = call <128 x i8> @llvm.vector.insert.v128i8.v2i8(<128 x i8> %v, <2 x i8> undef, i64 0)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %fixed_mf4 = call <128 x i8> @llvm.vector.insert.v128i8.v4i8(<128 x i8> %v, <4 x i8> undef, i64 0)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %fixed_mf2 = call <128 x i8> @llvm.vector.insert.v128i8.v8i8(<128 x i8> %v, <8 x i8> undef, i64 0)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %fixed_m1 = call <128 x i8> @llvm.vector.insert.v128i8.v16i8(<128 x i8> %v, <16 x i8> undef, i64 0)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %fixed_m2 = call <128 x i8> @llvm.vector.insert.v128i8.v32i8(<128 x i8> %v, <32 x i8> undef, i64 0)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %fixed_m4 = call <128 x i8> @llvm.vector.insert.v128i8.v64i8(<128 x i8> %v, <64 x i8> undef, i64 0)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %fixed_m8 = call <128 x i8> @llvm.vector.insert.v128i8.v128i8(<128 x i8> %v, <128 x i8> undef, i64 0)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; SIZE-LABEL: 'vector_insert_v128i8_0'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %fixed_mf8 = call <128 x i8> @llvm.vector.insert.v128i8.v2i8(<128 x i8> %v, <2 x i8> undef, i64 0)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %fixed_mf4 = call <128 x i8> @llvm.vector.insert.v128i8.v4i8(<128 x i8> %v, <4 x i8> undef, i64 0)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %fixed_mf2 = call <128 x i8> @llvm.vector.insert.v128i8.v8i8(<128 x i8> %v, <8 x i8> undef, i64 0)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %fixed_m1 = call <128 x i8> @llvm.vector.insert.v128i8.v16i8(<128 x i8> %v, <16 x i8> undef, i64 0)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %fixed_m2 = call <128 x i8> @llvm.vector.insert.v128i8.v32i8(<128 x i8> %v, <32 x i8> undef, i64 0)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %fixed_m4 = call <128 x i8> @llvm.vector.insert.v128i8.v64i8(<128 x i8> %v, <64 x i8> undef, i64 0)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %fixed_m8 = call <128 x i8> @llvm.vector.insert.v128i8.v128i8(<128 x i8> %v, <128 x i8> undef, i64 0)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+;
+  %fixed_mf8 = call <128 x i8> @llvm.vector.insert.v128i8.v2i8(<128 x i8> %v, <2 x i8> undef, i64 0)
+  %fixed_mf4 = call <128 x i8> @llvm.vector.insert.v128i8.v4i8(<128 x i8> %v, <4 x i8> undef, i64 0)
+  %fixed_mf2 = call <128 x i8> @llvm.vector.insert.v128i8.v8i8(<128 x i8> %v, <8 x i8> undef, i64 0)
+  %fixed_m1 = call <128 x i8> @llvm.vector.insert.v128i8.v16i8(<128 x i8> %v, <16 x i8> undef, i64 0)
+  %fixed_m2 = call <128 x i8> @llvm.vector.insert.v128i8.v32i8(<128 x i8> %v, <32 x i8> undef, i64 0)
+  %fixed_m4 = call <128 x i8> @llvm.vector.insert.v128i8.v64i8(<128 x i8> %v, <64 x i8> undef, i64 0)
+  %fixed_m8 = call <128 x i8> @llvm.vector.insert.v128i8.v128i8(<128 x i8> %v, <128 x i8> undef, i64 0)
+  ret void
+}
+
+define void @vector_insert_v128i8_undef_0() {
+; CHECK-LABEL: 'vector_insert_v128i8_undef_0'
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %fixed_mf8 = call <128 x i8> @llvm.vector.insert.v128i8.v2i8(<128 x i8> undef, <2 x i8> undef, i64 0)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %fixed_mf4 = call <128 x i8> @llvm.vector.insert.v128i8.v4i8(<128 x i8> undef, <4 x i8> undef, i64 0)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %fixed_mf2 = call <128 x i8> @llvm.vector.insert.v128i8.v8i8(<128 x i8> undef, <8 x i8> undef, i64 0)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %fixed_m1 = call <128 x i8> @llvm.vector.insert.v128i8.v16i8(<128 x i8> undef, <16 x i8> undef, i64 0)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %fixed_m2 = call <128 x i8> @llvm.vector.insert.v128i8.v32i8(<128 x i8> undef, <32 x i8> undef, i64 0)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %fixed_m4 = call <128 x i8> @llvm.vector.insert.v128i8.v64i8(<128 x i8> undef, <64 x i8> undef, i64 0)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %fixed_m8 = call <128 x i8> @llvm.vector.insert.v128i8.v128i8(<128 x i8> undef, <128 x i8> undef, i64 0)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; SIZE-LABEL: 'vector_insert_v128i8_undef_0'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %fixed_mf8 = call <128 x i8> @llvm.vector.insert.v128i8.v2i8(<128 x i8> undef, <2 x i8> undef, i64 0)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %fixed_mf4 = call <128 x i8> @llvm.vector.insert.v128i8.v4i8(<128 x i8> undef, <4 x i8> undef, i64 0)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %fixed_mf2 = call <128 x i8> @llvm.vector.insert.v128i8.v8i8(<128 x i8> undef, <8 x i8> undef, i64 0)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %fixed_m1 = call <128 x i8> @llvm.vector.insert.v128i8.v16i8(<128 x i8> undef, <16 x i8> undef, i64 0)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %fixed_m2 = call <128 x i8> @llvm.vector.insert.v128i8.v32i8(<128 x i8> undef, <32 x i8> undef, i64 0)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %fixed_m4 = call <128 x i8> @llvm.vector.insert.v128i8.v64i8(<128 x i8> undef, <64 x i8> undef, i64 0)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %fixed_m8 = call <128 x i8> @llvm.vector.insert.v128i8.v128i8(<128 x i8> undef, <128 x i8> undef, i64 0)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+;
+  %fixed_mf8 = call <128 x i8> @llvm.vector.insert.v128i8.v2i8(<128 x i8> undef, <2 x i8> undef, i64 0)
+  %fixed_mf4 = call <128 x i8> @llvm.vector.insert.v128i8.v4i8(<128 x i8> undef, <4 x i8> undef, i64 0)
+  %fixed_mf2 = call <128 x i8> @llvm.vector.insert.v128i8.v8i8(<128 x i8> undef, <8 x i8> undef, i64 0)
+  %fixed_m1 = call <128 x i8> @llvm.vector.insert.v128i8.v16i8(<128 x i8> undef, <16 x i8> undef, i64 0)
+  %fixed_m2 = call <128 x i8> @llvm.vector.insert.v128i8.v32i8(<128 x i8> undef, <32 x i8> undef, i64 0)
+  %fixed_m4 = call <128 x i8> @llvm.vector.insert.v128i8.v64i8(<128 x i8> undef, <64 x i8> undef, i64 0)
+  %fixed_m8 = call <128 x i8> @llvm.vector.insert.v128i8.v128i8(<128 x i8> undef, <128 x i8> undef, i64 0)
+  ret void
+}
+
+define void @vector_insert_v128i8_1(<128 x i8> %v) {
+; CHECK-LABEL: 'vector_insert_v128i8_1'
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %fixed_mf8 = call <128 x i8> @llvm.vector.insert.v128i8.v2i8(<128 x i8> %v, <2 x i8> undef, i64 2)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %fixed_mf4 = call <128 x i8> @llvm.vector.insert.v128i8.v4i8(<128 x i8> %v, <4 x i8> undef, i64 4)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %fixed_mf2 = call <128 x i8> @llvm.vector.insert.v128i8.v8i8(<128 x i8> %v, <8 x i8> undef, i64 8)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %fixed_m1 = call <128 x i8> @llvm.vector.insert.v128i8.v16i8(<128 x i8> %v, <16 x i8> undef, i64 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %fixed_m2 = call <128 x i8> @llvm.vector.insert.v128i8.v32i8(<128 x i8> %v, <32 x i8> undef, i64 32)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %fixed_m4 = call <128 x i8> @llvm.vector.insert.v128i8.v64i8(<128 x i8> %v, <64 x i8> undef, i64 64)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; SIZE-LABEL: 'vector_insert_v128i8_1'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %fixed_mf8 = call <128 x i8> @llvm.vector.insert.v128i8.v2i8(<128 x i8> %v, <2 x i8> undef, i64 2)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %fixed_mf4 = call <128 x i8> @llvm.vector.insert.v128i8.v4i8(<128 x i8> %v, <4 x i8> undef, i64 4)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %fixed_mf2 = call <128 x i8> @llvm.vector.insert.v128i8.v8i8(<128 x i8> %v, <8 x i8> undef, i64 8)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %fixed_m1 = call <128 x i8> @llvm.vector.insert.v128i8.v16i8(<128 x i8> %v, <16 x i8> undef, i64 16)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %fixed_m2 = call <128 x i8> @llvm.vector.insert.v128i8.v32i8(<128 x i8> %v, <32 x i8> undef, i64 32)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %fixed_m4 = call <128 x i8> @llvm.vector.insert.v128i8.v64i8(<128 x i8> %v, <64 x i8> undef, i64 64)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+;
+  %fixed_mf8 = call <128 x i8> @llvm.vector.insert.v128i8.v2i8(<128 x i8> %v, <2 x i8> undef, i64 2)
+  %fixed_mf4 = call <128 x i8> @llvm.vector.insert.v128i8.v4i8(<128 x i8> %v, <4 x i8> undef, i64 4)
+  %fixed_mf2 = call <128 x i8> @llvm.vector.insert.v128i8.v8i8(<128 x i8> %v, <8 x i8> undef, i64 8)
+  %fixed_m1 = call <128 x i8> @llvm.vector.insert.v128i8.v16i8(<128 x i8> %v, <16 x i8> undef, i64 16)
+  %fixed_m2 = call <128 x i8> @llvm.vector.insert.v128i8.v32i8(<128 x i8> %v, <32 x i8> undef, i64 32)
+  %fixed_m4 = call <128 x i8> @llvm.vector.insert.v128i8.v64i8(<128 x i8> %v, <64 x i8> undef, i64 64)
+  ; No @llvm.vector.insert.v128i8.v128i8(<128 x i8> %v, <128 x i8> undef, i64 128) since it would overrun
+  ret void
+}
+
+define void @vector_insert_v128i8_undef_1() {
+; CHECK-LABEL: 'vector_insert_v128i8_undef_1'
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %fixed_mf8 = call <128 x i8> @llvm.vector.insert.v128i8.v2i8(<128 x i8> undef, <2 x i8> undef, i64 2)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %fixed_mf4 = call <128 x i8> @llvm.vector.insert.v128i8.v4i8(<128 x i8> undef, <4 x i8> undef, i64 4)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %fixed_mf2 = call <128 x i8> @llvm.vector.insert.v128i8.v8i8(<128 x i8> undef, <8 x i8> undef, i64 8)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %fixed_m1 = call <128 x i8> @llvm.vector.insert.v128i8.v16i8(<128 x i8> undef, <16 x i8> undef, i64 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %fixed_m2 = call <128 x i8> @llvm.vector.insert.v128i8.v32i8(<128 x i8> undef, <32 x i8> undef, i64 32)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %fixed_m4 = call <128 x i8> @llvm.vector.insert.v128i8.v64i8(<128 x i8> undef, <64 x i8> undef, i64 64)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; SIZE-LABEL: 'vector_insert_v128i8_undef_1'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %fixed_mf8 = call <128 x i8> @llvm.vector.insert.v128i8.v2i8(<128 x i8> undef, <2 x i8> undef, i64 2)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %fixed_mf4 = call <128 x i8> @llvm.vector.insert.v128i8.v4i8(<128 x i8> undef, <4 x i8> undef, i64 4)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %fixed_mf2 = call <128 x i8> @llvm.vector.insert.v128i8.v8i8(<128 x i8> undef, <8 x i8> undef, i64 8)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %fixed_m1 = call <128 x i8> @llvm.vector.insert.v128i8.v16i8(<128 x i8> undef, <16 x i8> undef, i64 16)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %fixed_m2 = call <128 x i8> @llvm.vector.insert.v128i8.v32i8(<128 x i8> undef, <32 x i8> undef, i64 32)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %fixed_m4 = call <128 x i8> @llvm.vector.insert.v128i8.v64i8(<128 x i8> undef, <64 x i8> undef, i64 64)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+;
+  %fixed_mf8 = call <128 x i8> @llvm.vector.insert.v128i8.v2i8(<128 x i8> undef, <2 x i8> undef, i64 2)
+  %fixed_mf4 = call <128 x i8> @llvm.vector.insert.v128i8.v4i8(<128 x i8> undef, <4 x i8> undef, i64 4)
+  %fixed_mf2 = call <128 x i8> @llvm.vector.insert.v128i8.v8i8(<128 x i8> undef, <8 x i8> undef, i64 8)
+  %fixed_m1 = call <128 x i8> @llvm.vector.insert.v128i8.v16i8(<128 x i8> undef, <16 x i8> undef, i64 16)
+  %fixed_m2 = call <128 x i8> @llvm.vector.insert.v128i8.v32i8(<128 x i8> undef, <32 x i8> undef, i64 32)
+  %fixed_m4 = call <128 x i8> @llvm.vector.insert.v128i8.v64i8(<128 x i8> undef, <64 x i8> undef, i64 64)
+  ; No @llvm.vector.insert.v128i8.v128i8(<128 x i8> undef, <128 x i8> undef, i64 128) since it would overrun
+  ret void
+}
author	Luke Lau <luke@igalia.com>	2024-02-15 11:48:38 +0800
committer	Luke Lau <luke@igalia.com>	2024-02-15 12:17:27 +0800
commit	fc0b67e1d79d1f199687f8f06d619984d9520230 (patch)
tree	49265d916ec694df7f43ad2423868a5397b1f7df
parent	fa9e297b8b63dacb962d99814e698658ad71f946 (diff)