summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid Green <david.green@arm.com>2023-11-18 20:09:58 +0000
committerDavid Green <david.green@arm.com>2023-11-18 20:09:58 +0000
commit396e650ef35659e40f5c5c37b942fb447d2bef69 (patch)
tree16881950b821ff5117a0ad8e9af8723b17d48831
parenta540808de254b18b304aa0915638a0900b36d9fa (diff)
[AArch64] Add some testing for BE shuffles. NFC
-rw-r--r--llvm/test/CodeGen/AArch64/shuffles.ll604
1 files changed, 452 insertions, 152 deletions
diff --git a/llvm/test/CodeGen/AArch64/shuffles.ll b/llvm/test/CodeGen/AArch64/shuffles.ll
index 51d51e5a7834..41dd7f06712d 100644
--- a/llvm/test/CodeGen/AArch64/shuffles.ll
+++ b/llvm/test/CodeGen/AArch64/shuffles.ll
@@ -1,273 +1,573 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=aarch64--linux-gnu | FileCheck %s
+; RUN: llc < %s -mtriple=aarch64--linux-gnu | FileCheck %s --check-prefix=CHECKLE
+; RUN: llc < %s -mtriple=aarch64_be--linux-gnu | FileCheck %s --check-prefix=CHECKBE
define <16 x i32> @test_shuf1(<16 x i32> %x, <16 x i32> %y) {
-; CHECK-LABEL: test_shuf1:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ext v3.16b, v6.16b, v1.16b, #4
-; CHECK-NEXT: uzp1 v5.4s, v1.4s, v0.4s
-; CHECK-NEXT: uzp2 v16.4s, v2.4s, v4.4s
-; CHECK-NEXT: dup v17.4s, v4.s[0]
-; CHECK-NEXT: trn2 v4.4s, v1.4s, v3.4s
-; CHECK-NEXT: mov v17.s[0], v6.s[3]
-; CHECK-NEXT: trn2 v1.4s, v5.4s, v1.4s
-; CHECK-NEXT: rev64 v3.4s, v7.4s
-; CHECK-NEXT: trn1 v2.4s, v16.4s, v2.4s
-; CHECK-NEXT: mov v4.s[0], v7.s[1]
-; CHECK-NEXT: ext v1.16b, v0.16b, v1.16b, #12
-; CHECK-NEXT: mov v3.d[0], v17.d[0]
-; CHECK-NEXT: mov v2.s[3], v7.s[0]
-; CHECK-NEXT: mov v0.16b, v4.16b
-; CHECK-NEXT: ret
+; CHECKLE-LABEL: test_shuf1:
+; CHECKLE: // %bb.0:
+; CHECKLE-NEXT: ext v3.16b, v6.16b, v1.16b, #4
+; CHECKLE-NEXT: uzp1 v5.4s, v1.4s, v0.4s
+; CHECKLE-NEXT: uzp2 v16.4s, v2.4s, v4.4s
+; CHECKLE-NEXT: dup v17.4s, v4.s[0]
+; CHECKLE-NEXT: trn2 v4.4s, v1.4s, v3.4s
+; CHECKLE-NEXT: mov v17.s[0], v6.s[3]
+; CHECKLE-NEXT: trn2 v1.4s, v5.4s, v1.4s
+; CHECKLE-NEXT: rev64 v3.4s, v7.4s
+; CHECKLE-NEXT: trn1 v2.4s, v16.4s, v2.4s
+; CHECKLE-NEXT: mov v4.s[0], v7.s[1]
+; CHECKLE-NEXT: ext v1.16b, v0.16b, v1.16b, #12
+; CHECKLE-NEXT: mov v3.d[0], v17.d[0]
+; CHECKLE-NEXT: mov v2.s[3], v7.s[0]
+; CHECKLE-NEXT: mov v0.16b, v4.16b
+; CHECKLE-NEXT: ret
+;
+; CHECKBE-LABEL: test_shuf1:
+; CHECKBE: // %bb.0:
+; CHECKBE-NEXT: rev64 v1.4s, v1.4s
+; CHECKBE-NEXT: rev64 v3.4s, v6.4s
+; CHECKBE-NEXT: rev64 v0.4s, v0.4s
+; CHECKBE-NEXT: rev64 v2.4s, v2.4s
+; CHECKBE-NEXT: rev64 v4.4s, v4.4s
+; CHECKBE-NEXT: rev64 v5.4s, v7.4s
+; CHECKBE-NEXT: ext v1.16b, v1.16b, v1.16b, #8
+; CHECKBE-NEXT: ext v3.16b, v3.16b, v3.16b, #8
+; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
+; CHECKBE-NEXT: ext v2.16b, v2.16b, v2.16b, #8
+; CHECKBE-NEXT: ext v4.16b, v4.16b, v4.16b, #8
+; CHECKBE-NEXT: ext v5.16b, v5.16b, v5.16b, #8
+; CHECKBE-NEXT: ext v6.16b, v3.16b, v1.16b, #4
+; CHECKBE-NEXT: uzp1 v16.4s, v1.4s, v0.4s
+; CHECKBE-NEXT: uzp2 v7.4s, v2.4s, v4.4s
+; CHECKBE-NEXT: dup v4.4s, v4.s[0]
+; CHECKBE-NEXT: rev64 v17.4s, v5.4s
+; CHECKBE-NEXT: trn2 v6.4s, v1.4s, v6.4s
+; CHECKBE-NEXT: mov v4.s[0], v3.s[3]
+; CHECKBE-NEXT: trn2 v1.4s, v16.4s, v1.4s
+; CHECKBE-NEXT: trn1 v2.4s, v7.4s, v2.4s
+; CHECKBE-NEXT: rev64 v3.4s, v17.4s
+; CHECKBE-NEXT: mov v6.s[0], v5.s[1]
+; CHECKBE-NEXT: rev64 v4.4s, v4.4s
+; CHECKBE-NEXT: ext v0.16b, v0.16b, v1.16b, #12
+; CHECKBE-NEXT: mov v2.s[3], v5.s[0]
+; CHECKBE-NEXT: rev64 v1.4s, v6.4s
+; CHECKBE-NEXT: mov v3.d[0], v4.d[0]
+; CHECKBE-NEXT: rev64 v4.4s, v0.4s
+; CHECKBE-NEXT: rev64 v2.4s, v2.4s
+; CHECKBE-NEXT: ext v0.16b, v1.16b, v1.16b, #8
+; CHECKBE-NEXT: ext v3.16b, v3.16b, v3.16b, #8
+; CHECKBE-NEXT: ext v1.16b, v4.16b, v4.16b, #8
+; CHECKBE-NEXT: ext v2.16b, v2.16b, v2.16b, #8
+; CHECKBE-NEXT: ret
%s3 = shufflevector <16 x i32> %x, <16 x i32> %y, <16 x i32> <i32 29, i32 26, i32 7, i32 4, i32 3, i32 6, i32 5, i32 2, i32 9, i32 8, i32 17, i32 28, i32 27, i32 16, i32 31, i32 30>
ret <16 x i32> %s3
}
define <4 x i32> @test_shuf2(<16 x i32> %x, <16 x i32> %y) {
-; CHECK-LABEL: test_shuf2:
-; CHECK: // %bb.0:
-; CHECK-NEXT: zip2 v0.4s, v7.4s, v6.4s
-; CHECK-NEXT: trn2 v2.4s, v7.4s, v0.4s
-; CHECK-NEXT: ext v0.16b, v1.16b, v1.16b, #4
-; CHECK-NEXT: mov v0.d[0], v2.d[0]
-; CHECK-NEXT: ret
+; CHECKLE-LABEL: test_shuf2:
+; CHECKLE: // %bb.0:
+; CHECKLE-NEXT: zip2 v0.4s, v7.4s, v6.4s
+; CHECKLE-NEXT: trn2 v2.4s, v7.4s, v0.4s
+; CHECKLE-NEXT: ext v0.16b, v1.16b, v1.16b, #4
+; CHECKLE-NEXT: mov v0.d[0], v2.d[0]
+; CHECKLE-NEXT: ret
+;
+; CHECKBE-LABEL: test_shuf2:
+; CHECKBE: // %bb.0:
+; CHECKBE-NEXT: rev64 v0.4s, v6.4s
+; CHECKBE-NEXT: rev64 v2.4s, v7.4s
+; CHECKBE-NEXT: rev64 v1.4s, v1.4s
+; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
+; CHECKBE-NEXT: ext v2.16b, v2.16b, v2.16b, #8
+; CHECKBE-NEXT: ext v1.16b, v1.16b, v1.16b, #8
+; CHECKBE-NEXT: zip2 v0.4s, v2.4s, v0.4s
+; CHECKBE-NEXT: ext v1.16b, v1.16b, v1.16b, #4
+; CHECKBE-NEXT: trn2 v0.4s, v2.4s, v0.4s
+; CHECKBE-NEXT: rev64 v1.4s, v1.4s
+; CHECKBE-NEXT: rev64 v0.4s, v0.4s
+; CHECKBE-NEXT: mov v1.d[0], v0.d[0]
+; CHECKBE-NEXT: ext v0.16b, v1.16b, v1.16b, #8
+; CHECKBE-NEXT: ret
%s3 = shufflevector <16 x i32> %x, <16 x i32> %y, <4 x i32> <i32 29, i32 26, i32 7, i32 4>
ret <4 x i32> %s3
}
define <4 x i32> @test_shuf3(<16 x i32> %x, <16 x i32> %y) {
-; CHECK-LABEL: test_shuf3:
-; CHECK: // %bb.0:
-; CHECK-NEXT: uzp1 v2.4s, v1.4s, v0.4s
-; CHECK-NEXT: trn2 v1.4s, v2.4s, v1.4s
-; CHECK-NEXT: ext v0.16b, v0.16b, v1.16b, #12
-; CHECK-NEXT: ret
+; CHECKLE-LABEL: test_shuf3:
+; CHECKLE: // %bb.0:
+; CHECKLE-NEXT: uzp1 v2.4s, v1.4s, v0.4s
+; CHECKLE-NEXT: trn2 v1.4s, v2.4s, v1.4s
+; CHECKLE-NEXT: ext v0.16b, v0.16b, v1.16b, #12
+; CHECKLE-NEXT: ret
+;
+; CHECKBE-LABEL: test_shuf3:
+; CHECKBE: // %bb.0:
+; CHECKBE-NEXT: rev64 v0.4s, v0.4s
+; CHECKBE-NEXT: rev64 v1.4s, v1.4s
+; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
+; CHECKBE-NEXT: ext v1.16b, v1.16b, v1.16b, #8
+; CHECKBE-NEXT: uzp1 v2.4s, v1.4s, v0.4s
+; CHECKBE-NEXT: trn2 v1.4s, v2.4s, v1.4s
+; CHECKBE-NEXT: ext v0.16b, v0.16b, v1.16b, #12
+; CHECKBE-NEXT: rev64 v0.4s, v0.4s
+; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
+; CHECKBE-NEXT: ret
%s3 = shufflevector <16 x i32> %x, <16 x i32> %y, <4 x i32> <i32 3, i32 6, i32 5, i32 2>
ret <4 x i32> %s3
}
define <4 x i32> @test_shuf4(<16 x i32> %x, <16 x i32> %y) {
-; CHECK-LABEL: test_shuf4:
-; CHECK: // %bb.0:
-; CHECK-NEXT: uzp2 v0.4s, v2.4s, v4.4s
-; CHECK-NEXT: trn1 v0.4s, v0.4s, v2.4s
-; CHECK-NEXT: mov v0.s[3], v7.s[0]
-; CHECK-NEXT: ret
+; CHECKLE-LABEL: test_shuf4:
+; CHECKLE: // %bb.0:
+; CHECKLE-NEXT: uzp2 v0.4s, v2.4s, v4.4s
+; CHECKLE-NEXT: trn1 v0.4s, v0.4s, v2.4s
+; CHECKLE-NEXT: mov v0.s[3], v7.s[0]
+; CHECKLE-NEXT: ret
+;
+; CHECKBE-LABEL: test_shuf4:
+; CHECKBE: // %bb.0:
+; CHECKBE-NEXT: rev64 v0.4s, v4.4s
+; CHECKBE-NEXT: rev64 v1.4s, v2.4s
+; CHECKBE-NEXT: rev64 v2.4s, v7.4s
+; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
+; CHECKBE-NEXT: ext v1.16b, v1.16b, v1.16b, #8
+; CHECKBE-NEXT: ext v2.16b, v2.16b, v2.16b, #8
+; CHECKBE-NEXT: uzp2 v0.4s, v1.4s, v0.4s
+; CHECKBE-NEXT: trn1 v0.4s, v0.4s, v1.4s
+; CHECKBE-NEXT: mov v0.s[3], v2.s[0]
+; CHECKBE-NEXT: rev64 v0.4s, v0.4s
+; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
+; CHECKBE-NEXT: ret
%s3 = shufflevector <16 x i32> %x, <16 x i32> %y, <4 x i32> <i32 9, i32 8, i32 17, i32 28>
ret <4 x i32> %s3
}
define <4 x i32> @test_shuf5(<16 x i32> %x, <16 x i32> %y) {
-; CHECK-LABEL: test_shuf5:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ext v1.16b, v6.16b, v4.16b, #12
-; CHECK-NEXT: rev64 v0.4s, v7.4s
-; CHECK-NEXT: mov v0.d[0], v1.d[0]
-; CHECK-NEXT: ret
+; CHECKLE-LABEL: test_shuf5:
+; CHECKLE: // %bb.0:
+; CHECKLE-NEXT: ext v1.16b, v6.16b, v4.16b, #12
+; CHECKLE-NEXT: rev64 v0.4s, v7.4s
+; CHECKLE-NEXT: mov v0.d[0], v1.d[0]
+; CHECKLE-NEXT: ret
+;
+; CHECKBE-LABEL: test_shuf5:
+; CHECKBE: // %bb.0:
+; CHECKBE-NEXT: rev64 v0.4s, v7.4s
+; CHECKBE-NEXT: rev64 v1.4s, v4.4s
+; CHECKBE-NEXT: rev64 v2.4s, v6.4s
+; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
+; CHECKBE-NEXT: ext v1.16b, v1.16b, v1.16b, #8
+; CHECKBE-NEXT: ext v2.16b, v2.16b, v2.16b, #8
+; CHECKBE-NEXT: ext v1.16b, v2.16b, v1.16b, #12
+; CHECKBE-NEXT: rev64 v0.4s, v0.4s
+; CHECKBE-NEXT: rev64 v1.4s, v1.4s
+; CHECKBE-NEXT: rev64 v0.4s, v0.4s
+; CHECKBE-NEXT: mov v0.d[0], v1.d[0]
+; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
+; CHECKBE-NEXT: ret
%s3 = shufflevector <16 x i32> %x, <16 x i32> %y, <4 x i32> <i32 27, i32 16, i32 31, i32 30>
ret <4 x i32> %s3
}
define <4 x i32> @test1503(<4 x i32> %a, <4 x i32> %b)
-; CHECK-LABEL: test1503:
-; CHECK: // %bb.0:
-; CHECK-NEXT: zip1 v1.4s, v0.4s, v1.4s
-; CHECK-NEXT: ext v1.16b, v1.16b, v0.16b, #8
-; CHECK-NEXT: mov v1.s[3], v0.s[3]
-; CHECK-NEXT: mov v0.16b, v1.16b
-; CHECK-NEXT: ret
+; CHECKLE-LABEL: test1503:
+; CHECKLE: // %bb.0:
+; CHECKLE-NEXT: zip1 v1.4s, v0.4s, v1.4s
+; CHECKLE-NEXT: ext v1.16b, v1.16b, v0.16b, #8
+; CHECKLE-NEXT: mov v1.s[3], v0.s[3]
+; CHECKLE-NEXT: mov v0.16b, v1.16b
+; CHECKLE-NEXT: ret
+;
+; CHECKBE-LABEL: test1503:
+; CHECKBE: // %bb.0:
+; CHECKBE-NEXT: rev64 v1.4s, v1.4s
+; CHECKBE-NEXT: rev64 v0.4s, v0.4s
+; CHECKBE-NEXT: ext v1.16b, v1.16b, v1.16b, #8
+; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
+; CHECKBE-NEXT: zip1 v1.4s, v0.4s, v1.4s
+; CHECKBE-NEXT: ext v1.16b, v1.16b, v0.16b, #8
+; CHECKBE-NEXT: mov v1.s[3], v0.s[3]
+; CHECKBE-NEXT: rev64 v0.4s, v1.4s
+; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
+; CHECKBE-NEXT: ret
{
%r = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 5, i32 0, i32 3>
ret <4 x i32> %r
}
define <4 x i32> @test4366(<4 x i32> %a, <4 x i32> %b)
-; CHECK-LABEL: test4366:
-; CHECK: // %bb.0:
-; CHECK-NEXT: trn1 v1.4s, v1.4s, v1.4s
-; CHECK-NEXT: mov v1.s[1], v0.s[3]
-; CHECK-NEXT: mov v0.16b, v1.16b
-; CHECK-NEXT: ret
+; CHECKLE-LABEL: test4366:
+; CHECKLE: // %bb.0:
+; CHECKLE-NEXT: trn1 v1.4s, v1.4s, v1.4s
+; CHECKLE-NEXT: mov v1.s[1], v0.s[3]
+; CHECKLE-NEXT: mov v0.16b, v1.16b
+; CHECKLE-NEXT: ret
+;
+; CHECKBE-LABEL: test4366:
+; CHECKBE: // %bb.0:
+; CHECKBE-NEXT: rev64 v1.4s, v1.4s
+; CHECKBE-NEXT: rev64 v0.4s, v0.4s
+; CHECKBE-NEXT: ext v1.16b, v1.16b, v1.16b, #8
+; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
+; CHECKBE-NEXT: trn1 v1.4s, v1.4s, v1.4s
+; CHECKBE-NEXT: mov v1.s[1], v0.s[3]
+; CHECKBE-NEXT: rev64 v0.4s, v1.4s
+; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
+; CHECKBE-NEXT: ret
{
%r = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 4, i32 3, i32 6, i32 6>
ret <4 x i32> %r
}
define <4 x i32> @test7367(<4 x i32> %a, <4 x i32> %b)
-; CHECK-LABEL: test7367:
-; CHECK: // %bb.0:
-; CHECK-NEXT: mov v2.16b, v1.16b
-; CHECK-NEXT: mov v2.d[0], v0.d[1]
-; CHECK-NEXT: mov v2.s[0], v1.s[3]
-; CHECK-NEXT: mov v0.16b, v2.16b
-; CHECK-NEXT: ret
+; CHECKLE-LABEL: test7367:
+; CHECKLE: // %bb.0:
+; CHECKLE-NEXT: mov v2.16b, v1.16b
+; CHECKLE-NEXT: mov v2.d[0], v0.d[1]
+; CHECKLE-NEXT: mov v2.s[0], v1.s[3]
+; CHECKLE-NEXT: mov v0.16b, v2.16b
+; CHECKLE-NEXT: ret
+;
+; CHECKBE-LABEL: test7367:
+; CHECKBE: // %bb.0:
+; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
+; CHECKBE-NEXT: ext v2.16b, v1.16b, v1.16b, #8
+; CHECKBE-NEXT: rev64 v1.4s, v1.4s
+; CHECKBE-NEXT: mov v2.d[0], v0.d[1]
+; CHECKBE-NEXT: ext v0.16b, v1.16b, v1.16b, #8
+; CHECKBE-NEXT: rev64 v1.4s, v2.4s
+; CHECKBE-NEXT: mov v1.s[0], v0.s[3]
+; CHECKBE-NEXT: rev64 v0.4s, v1.4s
+; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
+; CHECKBE-NEXT: ret
{
%r = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 7, i32 3, i32 6, i32 7>
ret <4 x i32> %r
}
define <4 x i32> @test4045(<4 x i32> %a, <4 x i32> %b)
-; CHECK-LABEL: test4045:
-; CHECK: // %bb.0:
-; CHECK-NEXT: trn1 v0.4s, v1.4s, v0.4s
-; CHECK-NEXT: mov v0.d[1], v1.d[0]
-; CHECK-NEXT: ret
+; CHECKLE-LABEL: test4045:
+; CHECKLE: // %bb.0:
+; CHECKLE-NEXT: trn1 v0.4s, v1.4s, v0.4s
+; CHECKLE-NEXT: mov v0.d[1], v1.d[0]
+; CHECKLE-NEXT: ret
+;
+; CHECKBE-LABEL: test4045:
+; CHECKBE: // %bb.0:
+; CHECKBE-NEXT: rev64 v0.4s, v0.4s
+; CHECKBE-NEXT: rev64 v2.4s, v1.4s
+; CHECKBE-NEXT: ext v1.16b, v1.16b, v1.16b, #8
+; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
+; CHECKBE-NEXT: ext v2.16b, v2.16b, v2.16b, #8
+; CHECKBE-NEXT: trn1 v0.4s, v2.4s, v0.4s
+; CHECKBE-NEXT: rev64 v0.4s, v0.4s
+; CHECKBE-NEXT: mov v0.d[1], v1.d[0]
+; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
+; CHECKBE-NEXT: ret
{
%r = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 4, i32 0, i32 4, i32 5>
ret <4 x i32> %r
}
define <4 x i32> @test0067(<4 x i32> %a, <4 x i32> %b)
-; CHECK-LABEL: test0067:
-; CHECK: // %bb.0:
-; CHECK-NEXT: trn1 v0.4s, v0.4s, v0.4s
-; CHECK-NEXT: mov v0.d[1], v1.d[1]
-; CHECK-NEXT: ret
+; CHECKLE-LABEL: test0067:
+; CHECKLE: // %bb.0:
+; CHECKLE-NEXT: trn1 v0.4s, v0.4s, v0.4s
+; CHECKLE-NEXT: mov v0.d[1], v1.d[1]
+; CHECKLE-NEXT: ret
+;
+; CHECKBE-LABEL: test0067:
+; CHECKBE: // %bb.0:
+; CHECKBE-NEXT: rev64 v0.4s, v0.4s
+; CHECKBE-NEXT: ext v1.16b, v1.16b, v1.16b, #8
+; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
+; CHECKBE-NEXT: trn1 v0.4s, v0.4s, v0.4s
+; CHECKBE-NEXT: rev64 v0.4s, v0.4s
+; CHECKBE-NEXT: mov v0.d[1], v1.d[1]
+; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
+; CHECKBE-NEXT: ret
{
%r = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 0, i32 6, i32 7>
ret <4 x i32> %r
}
define <4 x i32> @test_shuf6(<4 x i32> %a, <4 x i32> %b)
-; CHECK-LABEL: test_shuf6:
-; CHECK: // %bb.0:
-; CHECK-NEXT: mov v0.s[2], v1.s[3]
-; CHECK-NEXT: trn1 v0.4s, v0.4s, v0.4s
-; CHECK-NEXT: ret
+; CHECKLE-LABEL: test_shuf6:
+; CHECKLE: // %bb.0:
+; CHECKLE-NEXT: mov v0.s[2], v1.s[3]
+; CHECKLE-NEXT: trn1 v0.4s, v0.4s, v0.4s
+; CHECKLE-NEXT: ret
+;
+; CHECKBE-LABEL: test_shuf6:
+; CHECKBE: // %bb.0:
+; CHECKBE-NEXT: rev64 v1.4s, v1.4s
+; CHECKBE-NEXT: rev64 v0.4s, v0.4s
+; CHECKBE-NEXT: ext v1.16b, v1.16b, v1.16b, #8
+; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
+; CHECKBE-NEXT: mov v0.s[2], v1.s[3]
+; CHECKBE-NEXT: trn1 v0.4s, v0.4s, v0.4s
+; CHECKBE-NEXT: rev64 v0.4s, v0.4s
+; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
+; CHECKBE-NEXT: ret
{
%r = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 0, i32 7, i32 7>
ret <4 x i32> %r
}
define <4 x i16> @test_shuf7(<4 x i16> %a, <4 x i16> %b)
-; CHECK-LABEL: test_shuf7:
-; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
-; CHECK-NEXT: mov v0.h[2], v1.h[3]
-; CHECK-NEXT: trn1 v0.4h, v0.4h, v0.4h
-; CHECK-NEXT: ret
+; CHECKLE-LABEL: test_shuf7:
+; CHECKLE: // %bb.0:
+; CHECKLE-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECKLE-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECKLE-NEXT: mov v0.h[2], v1.h[3]
+; CHECKLE-NEXT: trn1 v0.4h, v0.4h, v0.4h
+; CHECKLE-NEXT: ret
+;
+; CHECKBE-LABEL: test_shuf7:
+; CHECKBE: // %bb.0:
+; CHECKBE-NEXT: rev64 v1.4h, v1.4h
+; CHECKBE-NEXT: rev64 v0.4h, v0.4h
+; CHECKBE-NEXT: mov v0.h[2], v1.h[3]
+; CHECKBE-NEXT: trn1 v0.4h, v0.4h, v0.4h
+; CHECKBE-NEXT: rev64 v0.4h, v0.4h
+; CHECKBE-NEXT: ret
{
%r = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 0, i32 7, i32 7>
ret <4 x i16> %r
}
define <8 x i8> @test_shuf8(<8 x i8> %a, <8 x i8> %b)
-; CHECK-LABEL: test_shuf8:
-; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
-; CHECK-NEXT: adrp x8, .LCPI12_0
-; CHECK-NEXT: mov v0.d[1], v1.d[0]
-; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI12_0]
-; CHECK-NEXT: tbl v0.8b, { v0.16b }, v1.8b
-; CHECK-NEXT: ret
+; CHECKLE-LABEL: test_shuf8:
+; CHECKLE: // %bb.0:
+; CHECKLE-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECKLE-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECKLE-NEXT: adrp x8, .LCPI12_0
+; CHECKLE-NEXT: mov v0.d[1], v1.d[0]
+; CHECKLE-NEXT: ldr d1, [x8, :lo12:.LCPI12_0]
+; CHECKLE-NEXT: tbl v0.8b, { v0.16b }, v1.8b
+; CHECKLE-NEXT: ret
+;
+; CHECKBE-LABEL: test_shuf8:
+; CHECKBE: // %bb.0:
+; CHECKBE-NEXT: rev64 v0.8b, v0.8b
+; CHECKBE-NEXT: rev64 v1.8b, v1.8b
+; CHECKBE-NEXT: adrp x8, .LCPI12_0
+; CHECKBE-NEXT: add x8, x8, :lo12:.LCPI12_0
+; CHECKBE-NEXT: mov v0.d[1], v1.d[0]
+; CHECKBE-NEXT: ld1 { v1.8b }, [x8]
+; CHECKBE-NEXT: tbl v0.8b, { v0.16b }, v1.8b
+; CHECKBE-NEXT: rev64 v0.8b, v0.8b
+; CHECKBE-NEXT: ret
{
%r = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8>
ret <8 x i8> %r
}
define <8 x i16> @test_shuf9(<8 x i16> %a, <8 x i16> %b)
-; CHECK-LABEL: test_shuf9:
-; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI13_0
-; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
-; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI13_0]
-; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
-; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b
-; CHECK-NEXT: ret
+; CHECKLE-LABEL: test_shuf9:
+; CHECKLE: // %bb.0:
+; CHECKLE-NEXT: adrp x8, .LCPI13_0
+; CHECKLE-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECKLE-NEXT: ldr q2, [x8, :lo12:.LCPI13_0]
+; CHECKLE-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
+; CHECKLE-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b
+; CHECKLE-NEXT: ret
+;
+; CHECKBE-LABEL: test_shuf9:
+; CHECKBE: // %bb.0:
+; CHECKBE-NEXT: rev64 v1.16b, v1.16b
+; CHECKBE-NEXT: rev64 v0.16b, v0.16b
+; CHECKBE-NEXT: adrp x8, .LCPI13_0
+; CHECKBE-NEXT: add x8, x8, :lo12:.LCPI13_0
+; CHECKBE-NEXT: ext v2.16b, v1.16b, v1.16b, #8
+; CHECKBE-NEXT: ext v1.16b, v0.16b, v0.16b, #8
+; CHECKBE-NEXT: ld1 { v0.16b }, [x8]
+; CHECKBE-NEXT: tbl v0.16b, { v1.16b, v2.16b }, v0.16b
+; CHECKBE-NEXT: rev64 v0.16b, v0.16b
+; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
+; CHECKBE-NEXT: ret
{
%r = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8>
ret <8 x i16> %r
}
define <16 x i8> @test_shuf10(<16 x i8> %a, <16 x i8> %b)
-; CHECK-LABEL: test_shuf10:
-; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI14_0
-; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI14_0]
-; CHECK-NEXT: tbl v0.16b, { v0.16b }, v1.16b
-; CHECK-NEXT: ret
+; CHECKLE-LABEL: test_shuf10:
+; CHECKLE: // %bb.0:
+; CHECKLE-NEXT: adrp x8, .LCPI14_0
+; CHECKLE-NEXT: ldr q1, [x8, :lo12:.LCPI14_0]
+; CHECKLE-NEXT: tbl v0.16b, { v0.16b }, v1.16b
+; CHECKLE-NEXT: ret
+;
+; CHECKBE-LABEL: test_shuf10:
+; CHECKBE: // %bb.0:
+; CHECKBE-NEXT: rev64 v0.16b, v0.16b
+; CHECKBE-NEXT: adrp x8, .LCPI14_0
+; CHECKBE-NEXT: add x8, x8, :lo12:.LCPI14_0
+; CHECKBE-NEXT: ld1 { v1.16b }, [x8]
+; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
+; CHECKBE-NEXT: tbl v0.16b, { v0.16b }, v1.16b
+; CHECKBE-NEXT: rev64 v0.16b, v0.16b
+; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
+; CHECKBE-NEXT: ret
{
%r = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8 >
ret <16 x i8> %r
}
define <8 x half> @test_shuf11(<8 x half> %a, <8 x half> %b)
-; CHECK-LABEL: test_shuf11:
-; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI15_0
-; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
-; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI15_0]
-; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
-; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b
-; CHECK-NEXT: ret
+; CHECKLE-LABEL: test_shuf11:
+; CHECKLE: // %bb.0:
+; CHECKLE-NEXT: adrp x8, .LCPI15_0
+; CHECKLE-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECKLE-NEXT: ldr q2, [x8, :lo12:.LCPI15_0]
+; CHECKLE-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
+; CHECKLE-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b
+; CHECKLE-NEXT: ret
+;
+; CHECKBE-LABEL: test_shuf11:
+; CHECKBE: // %bb.0:
+; CHECKBE-NEXT: rev64 v1.16b, v1.16b
+; CHECKBE-NEXT: rev64 v0.16b, v0.16b
+; CHECKBE-NEXT: adrp x8, .LCPI15_0
+; CHECKBE-NEXT: add x8, x8, :lo12:.LCPI15_0
+; CHECKBE-NEXT: ext v2.16b, v1.16b, v1.16b, #8
+; CHECKBE-NEXT: ext v1.16b, v0.16b, v0.16b, #8
+; CHECKBE-NEXT: ld1 { v0.16b }, [x8]
+; CHECKBE-NEXT: tbl v0.16b, { v1.16b, v2.16b }, v0.16b
+; CHECKBE-NEXT: rev64 v0.16b, v0.16b
+; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
+; CHECKBE-NEXT: ret
{
%r = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8>
ret <8 x half> %r
}
define <8 x half> @test_shuf12(<8 x half> %a, <8 x half> %b)
-; CHECK-LABEL: test_shuf12:
-; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI16_0
-; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
-; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI16_0]
-; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
-; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b
-; CHECK-NEXT: ret
+; CHECKLE-LABEL: test_shuf12:
+; CHECKLE: // %bb.0:
+; CHECKLE-NEXT: adrp x8, .LCPI16_0
+; CHECKLE-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECKLE-NEXT: ldr q2, [x8, :lo12:.LCPI16_0]
+; CHECKLE-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
+; CHECKLE-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b
+; CHECKLE-NEXT: ret
+;
+; CHECKBE-LABEL: test_shuf12:
+; CHECKBE: // %bb.0:
+; CHECKBE-NEXT: rev64 v1.16b, v1.16b
+; CHECKBE-NEXT: rev64 v0.16b, v0.16b
+; CHECKBE-NEXT: adrp x8, .LCPI16_0
+; CHECKBE-NEXT: add x8, x8, :lo12:.LCPI16_0
+; CHECKBE-NEXT: ext v2.16b, v1.16b, v1.16b, #8
+; CHECKBE-NEXT: ext v1.16b, v0.16b, v0.16b, #8
+; CHECKBE-NEXT: ld1 { v0.16b }, [x8]
+; CHECKBE-NEXT: tbl v0.16b, { v1.16b, v2.16b }, v0.16b
+; CHECKBE-NEXT: rev64 v0.16b, v0.16b
+; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
+; CHECKBE-NEXT: ret
{
%r = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 1, i32 15>
ret <8 x half> %r
}
define <8 x half> @test_shuf13(<8 x half> %a, <8 x half> %b)
-; CHECK-LABEL: test_shuf13:
-; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI17_0
-; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
-; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI17_0]
-; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
-; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b
-; CHECK-NEXT: ret
+; CHECKLE-LABEL: test_shuf13:
+; CHECKLE: // %bb.0:
+; CHECKLE-NEXT: adrp x8, .LCPI17_0
+; CHECKLE-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECKLE-NEXT: ldr q2, [x8, :lo12:.LCPI17_0]
+; CHECKLE-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
+; CHECKLE-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b
+; CHECKLE-NEXT: ret
+;
+; CHECKBE-LABEL: test_shuf13:
+; CHECKBE: // %bb.0:
+; CHECKBE-NEXT: rev64 v1.16b, v1.16b
+; CHECKBE-NEXT: rev64 v0.16b, v0.16b
+; CHECKBE-NEXT: adrp x8, .LCPI17_0
+; CHECKBE-NEXT: add x8, x8, :lo12:.LCPI17_0
+; CHECKBE-NEXT: ext v2.16b, v1.16b, v1.16b, #8
+; CHECKBE-NEXT: ext v1.16b, v0.16b, v0.16b, #8
+; CHECKBE-NEXT: ld1 { v0.16b }, [x8]
+; CHECKBE-NEXT: tbl v0.16b, { v1.16b, v2.16b }, v0.16b
+; CHECKBE-NEXT: rev64 v0.16b, v0.16b
+; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
+; CHECKBE-NEXT: ret
{
%r = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> <i32 1, i32 2, i32 0, i32 0, i32 0, i32 8, i32 1, i32 15>
ret <8 x half> %r
}
define <8 x half> @test_shuf14(<8 x half> %a, <8 x half> %b)
-; CHECK-LABEL: test_shuf14:
-; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI18_0
-; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
-; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI18_0]
-; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
-; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b
-; CHECK-NEXT: ret
+; CHECKLE-LABEL: test_shuf14:
+; CHECKLE: // %bb.0:
+; CHECKLE-NEXT: adrp x8, .LCPI18_0
+; CHECKLE-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECKLE-NEXT: ldr q2, [x8, :lo12:.LCPI18_0]
+; CHECKLE-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
+; CHECKLE-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b
+; CHECKLE-NEXT: ret
+;
+; CHECKBE-LABEL: test_shuf14:
+; CHECKBE: // %bb.0:
+; CHECKBE-NEXT: rev64 v1.16b, v1.16b
+; CHECKBE-NEXT: rev64 v0.16b, v0.16b
+; CHECKBE-NEXT: adrp x8, .LCPI18_0
+; CHECKBE-NEXT: add x8, x8, :lo12:.LCPI18_0
+; CHECKBE-NEXT: ext v2.16b, v1.16b, v1.16b, #8
+; CHECKBE-NEXT: ext v1.16b, v0.16b, v0.16b, #8
+; CHECKBE-NEXT: ld1 { v0.16b }, [x8]
+; CHECKBE-NEXT: tbl v0.16b, { v1.16b, v2.16b }, v0.16b
+; CHECKBE-NEXT: rev64 v0.16b, v0.16b
+; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
+; CHECKBE-NEXT: ret
{
%r = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> <i32 1, i32 2, i32 1, i32 1, i32 0, i32 8, i32 1, i32 15>
ret <8 x half> %r
}
define <8 x half> @test_shuf15(<8 x half> %a, <8 x half> %b)
-; CHECK-LABEL: test_shuf15:
-; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI19_0
-; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
-; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI19_0]
-; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
-; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b
-; CHECK-NEXT: ret
+; CHECKLE-LABEL: test_shuf15:
+; CHECKLE: // %bb.0:
+; CHECKLE-NEXT: adrp x8, .LCPI19_0
+; CHECKLE-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECKLE-NEXT: ldr q2, [x8, :lo12:.LCPI19_0]
+; CHECKLE-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
+; CHECKLE-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b
+; CHECKLE-NEXT: ret
+;
+; CHECKBE-LABEL: test_shuf15:
+; CHECKBE: // %bb.0:
+; CHECKBE-NEXT: rev64 v1.16b, v1.16b
+; CHECKBE-NEXT: rev64 v0.16b, v0.16b
+; CHECKBE-NEXT: adrp x8, .LCPI19_0
+; CHECKBE-NEXT: add x8, x8, :lo12:.LCPI19_0
+; CHECKBE-NEXT: ext v2.16b, v1.16b, v1.16b, #8
+; CHECKBE-NEXT: ext v1.16b, v0.16b, v0.16b, #8
+; CHECKBE-NEXT: ld1 { v0.16b }, [x8]
+; CHECKBE-NEXT: tbl v0.16b, { v1.16b, v2.16b }, v0.16b
+; CHECKBE-NEXT: rev64 v0.16b, v0.16b
+; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
+; CHECKBE-NEXT: ret
{
%r = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> <i32 1, i32 2, i32 7, i32 2, i32 0, i32 3, i32 2, i32 15>
ret <8 x half> %r
}
define <4 x i32> @extract_shuffle(<8 x i16> %j, <4 x i16> %k) {
-; CHECK-LABEL: extract_shuffle:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ushll2 v0.4s, v0.8h, #3
-; CHECK-NEXT: ret
+; CHECKLE-LABEL: extract_shuffle:
+; CHECKLE: // %bb.0:
+; CHECKLE-NEXT: ushll2 v0.4s, v0.8h, #3
+; CHECKLE-NEXT: ret
+;
+; CHECKBE-LABEL: extract_shuffle:
+; CHECKBE: // %bb.0:
+; CHECKBE-NEXT: rev64 v0.8h, v0.8h
+; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
+; CHECKBE-NEXT: ushll2 v0.4s, v0.8h, #3
+; CHECKBE-NEXT: rev64 v0.4s, v0.4s
+; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
+; CHECKBE-NEXT: ret
%a = shufflevector <8 x i16> %j, <8 x i16> poison, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison>
%b = shufflevector <8 x i16> %a, <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%c = zext <4 x i16> %b to <4 x i32>