summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorCraig Topper <craig.topper@sifive.com>2024-05-01 14:55:14 -0700
committerGitHub <noreply@github.com>2024-05-01 14:55:14 -0700
commit41466a177a95ee6ff699d190f7625f0b32922a20 (patch)
tree3c41c02b8daf30acaf6b3d176adbef5a31703941
parentc2d892668b7fc296a37679a6b729f40ce08a6179 (diff)
[SelectionDAG] Correct the implementation of m_AllOnes. (#90776)
Previously we used SpecificInt_match which created a 64 bit APInt containing all ones. This was then checked against other constants by using APInt::isSameValue. If the constnats have different bitwidths, APInt::isSameValue will zero extend the constant to make them match. This means for any constant less than 64 bits, m_AllOnes was guaranteed to fail since the zero extended value would not match all ones. I think would also incorrectly consider an i128 with 64 leading zeros and 64 trailing zeros as matching m_AllOnes. To avoid this, this patch adds a new matcher class that just calls isAllOnesOrAllOnesSplat.
-rw-r--r--llvm/include/llvm/CodeGen/SDPatternMatch.h14
-rw-r--r--llvm/test/CodeGen/X86/subvectorwise-store-of-vector-splat.ll478
2 files changed, 105 insertions, 387 deletions
diff --git a/llvm/include/llvm/CodeGen/SDPatternMatch.h b/llvm/include/llvm/CodeGen/SDPatternMatch.h
index 4cc7bb9c3b55..c581eb7a60aa 100644
--- a/llvm/include/llvm/CodeGen/SDPatternMatch.h
+++ b/llvm/include/llvm/CodeGen/SDPatternMatch.h
@@ -716,7 +716,17 @@ inline SpecificInt_match m_SpecificInt(uint64_t V) {
inline SpecificInt_match m_Zero() { return m_SpecificInt(0U); }
inline SpecificInt_match m_One() { return m_SpecificInt(1U); }
-inline SpecificInt_match m_AllOnes() { return m_SpecificInt(~0U); }
+
+struct AllOnes_match {
+
+ AllOnes_match() = default;
+
+ template <typename MatchContext> bool match(const MatchContext &, SDValue N) {
+ return isAllOnesOrAllOnesSplat(N);
+ }
+};
+
+inline AllOnes_match m_AllOnes() { return AllOnes_match(); }
/// Match true boolean value based on the information provided by
/// TargetLowering.
@@ -766,7 +776,7 @@ inline BinaryOpc_match<SpecificInt_match, ValTy> m_Neg(const ValTy &V) {
/// Match a Not as a xor(v, -1) or xor(-1, v)
template <typename ValTy>
-inline BinaryOpc_match<ValTy, SpecificInt_match, true> m_Not(const ValTy &V) {
+inline BinaryOpc_match<ValTy, AllOnes_match, true> m_Not(const ValTy &V) {
return m_Xor(V, m_AllOnes());
}
diff --git a/llvm/test/CodeGen/X86/subvectorwise-store-of-vector-splat.ll b/llvm/test/CodeGen/X86/subvectorwise-store-of-vector-splat.ll
index ae66c5420638..f1fd05565c47 100644
--- a/llvm/test/CodeGen/X86/subvectorwise-store-of-vector-splat.ll
+++ b/llvm/test/CodeGen/X86/subvectorwise-store-of-vector-splat.ll
@@ -2384,52 +2384,45 @@ define void @vec384_v2f64(ptr %in.subvec.ptr, ptr %out.subvec.ptr, ptr %out.vec.
define void @vec384_v3i8(ptr %in.subvec.ptr, ptr %out.subvec.ptr, ptr %out.vec.ptr) nounwind {
; SCALAR-LABEL: vec384_v3i8:
; SCALAR: # %bb.0:
-; SCALAR-NEXT: movl (%rdi), %ecx
-; SCALAR-NEXT: movl %ecx, %eax
-; SCALAR-NEXT: shrl $16, %eax
-; SCALAR-NEXT: movl %ecx, %edi
-; SCALAR-NEXT: shrl $8, %edi
+; SCALAR-NEXT: movl (%rdi), %eax
+; SCALAR-NEXT: movl %eax, %ecx
+; SCALAR-NEXT: shrl $16, %ecx
; SCALAR-NEXT: notb %cl
-; SCALAR-NEXT: movzbl %cl, %r8d
-; SCALAR-NEXT: notb %dil
-; SCALAR-NEXT: movzbl %dil, %ecx
-; SCALAR-NEXT: shll $8, %ecx
-; SCALAR-NEXT: orl %r8d, %ecx
-; SCALAR-NEXT: notb %al
-; SCALAR-NEXT: movb %al, 2(%rsi)
-; SCALAR-NEXT: movw %cx, (%rsi)
-; SCALAR-NEXT: movb %al, 2(%rdx)
-; SCALAR-NEXT: movw %cx, (%rdx)
-; SCALAR-NEXT: movb %al, 6(%rdx)
-; SCALAR-NEXT: movw %cx, 4(%rdx)
-; SCALAR-NEXT: movb %al, 10(%rdx)
-; SCALAR-NEXT: movw %cx, 8(%rdx)
-; SCALAR-NEXT: movb %al, 14(%rdx)
-; SCALAR-NEXT: movw %cx, 12(%rdx)
-; SCALAR-NEXT: movb %al, 18(%rdx)
-; SCALAR-NEXT: movw %cx, 16(%rdx)
-; SCALAR-NEXT: movb %al, 22(%rdx)
-; SCALAR-NEXT: movw %cx, 20(%rdx)
-; SCALAR-NEXT: movb %al, 26(%rdx)
-; SCALAR-NEXT: movw %cx, 24(%rdx)
-; SCALAR-NEXT: movb %al, 30(%rdx)
-; SCALAR-NEXT: movw %cx, 28(%rdx)
-; SCALAR-NEXT: movb %al, 34(%rdx)
-; SCALAR-NEXT: movw %cx, 32(%rdx)
-; SCALAR-NEXT: movb %al, 38(%rdx)
-; SCALAR-NEXT: movw %cx, 36(%rdx)
-; SCALAR-NEXT: movb %al, 42(%rdx)
-; SCALAR-NEXT: movw %cx, 40(%rdx)
-; SCALAR-NEXT: movb %al, 46(%rdx)
-; SCALAR-NEXT: movw %cx, 44(%rdx)
-; SCALAR-NEXT: movb %al, 50(%rdx)
-; SCALAR-NEXT: movw %cx, 48(%rdx)
-; SCALAR-NEXT: movb %al, 54(%rdx)
-; SCALAR-NEXT: movw %cx, 52(%rdx)
-; SCALAR-NEXT: movb %al, 58(%rdx)
-; SCALAR-NEXT: movw %cx, 56(%rdx)
-; SCALAR-NEXT: movb %al, 62(%rdx)
-; SCALAR-NEXT: movw %cx, 60(%rdx)
+; SCALAR-NEXT: notl %eax
+; SCALAR-NEXT: movw %ax, (%rsi)
+; SCALAR-NEXT: movb %cl, 2(%rsi)
+; SCALAR-NEXT: movb %cl, 2(%rdx)
+; SCALAR-NEXT: movw %ax, (%rdx)
+; SCALAR-NEXT: movb %cl, 6(%rdx)
+; SCALAR-NEXT: movw %ax, 4(%rdx)
+; SCALAR-NEXT: movb %cl, 10(%rdx)
+; SCALAR-NEXT: movw %ax, 8(%rdx)
+; SCALAR-NEXT: movb %cl, 14(%rdx)
+; SCALAR-NEXT: movw %ax, 12(%rdx)
+; SCALAR-NEXT: movb %cl, 18(%rdx)
+; SCALAR-NEXT: movw %ax, 16(%rdx)
+; SCALAR-NEXT: movb %cl, 22(%rdx)
+; SCALAR-NEXT: movw %ax, 20(%rdx)
+; SCALAR-NEXT: movb %cl, 26(%rdx)
+; SCALAR-NEXT: movw %ax, 24(%rdx)
+; SCALAR-NEXT: movb %cl, 30(%rdx)
+; SCALAR-NEXT: movw %ax, 28(%rdx)
+; SCALAR-NEXT: movb %cl, 34(%rdx)
+; SCALAR-NEXT: movw %ax, 32(%rdx)
+; SCALAR-NEXT: movb %cl, 38(%rdx)
+; SCALAR-NEXT: movw %ax, 36(%rdx)
+; SCALAR-NEXT: movb %cl, 42(%rdx)
+; SCALAR-NEXT: movw %ax, 40(%rdx)
+; SCALAR-NEXT: movb %cl, 46(%rdx)
+; SCALAR-NEXT: movw %ax, 44(%rdx)
+; SCALAR-NEXT: movb %cl, 50(%rdx)
+; SCALAR-NEXT: movw %ax, 48(%rdx)
+; SCALAR-NEXT: movb %cl, 54(%rdx)
+; SCALAR-NEXT: movw %ax, 52(%rdx)
+; SCALAR-NEXT: movb %cl, 58(%rdx)
+; SCALAR-NEXT: movw %ax, 56(%rdx)
+; SCALAR-NEXT: movb %cl, 62(%rdx)
+; SCALAR-NEXT: movw %ax, 60(%rdx)
; SCALAR-NEXT: retq
;
; SSE2-ONLY-LABEL: vec384_v3i8:
@@ -3784,56 +3777,29 @@ define void @vec384_v4f32(ptr %in.subvec.ptr, ptr %out.subvec.ptr, ptr %out.vec.
define void @vec384_v6i8(ptr %in.subvec.ptr, ptr %out.subvec.ptr, ptr %out.vec.ptr) nounwind {
; SCALAR-LABEL: vec384_v6i8:
; SCALAR: # %bb.0:
-; SCALAR-NEXT: movq (%rdi), %rdi
-; SCALAR-NEXT: movq %rdi, %rax
-; SCALAR-NEXT: shrq $40, %rax
-; SCALAR-NEXT: movq %rdi, %rcx
+; SCALAR-NEXT: movq (%rdi), %rax
+; SCALAR-NEXT: movq %rax, %rcx
; SCALAR-NEXT: shrq $32, %rcx
-; SCALAR-NEXT: movl %edi, %r8d
-; SCALAR-NEXT: shrl $24, %r8d
-; SCALAR-NEXT: movl %edi, %r9d
-; SCALAR-NEXT: shrl $16, %r9d
-; SCALAR-NEXT: movl %edi, %r10d
-; SCALAR-NEXT: shrl $8, %r10d
-; SCALAR-NEXT: notb %dil
-; SCALAR-NEXT: movzbl %dil, %edi
-; SCALAR-NEXT: notb %r10b
-; SCALAR-NEXT: movzbl %r10b, %r10d
-; SCALAR-NEXT: shll $8, %r10d
-; SCALAR-NEXT: orl %edi, %r10d
-; SCALAR-NEXT: notb %r9b
-; SCALAR-NEXT: movzbl %r9b, %edi
-; SCALAR-NEXT: notb %r8b
-; SCALAR-NEXT: movzbl %r8b, %r8d
-; SCALAR-NEXT: shll $8, %r8d
-; SCALAR-NEXT: orl %edi, %r8d
-; SCALAR-NEXT: notb %cl
-; SCALAR-NEXT: movzbl %cl, %ecx
-; SCALAR-NEXT: notb %al
-; SCALAR-NEXT: movzbl %al, %eax
-; SCALAR-NEXT: shll $8, %eax
-; SCALAR-NEXT: orl %ecx, %eax
-; SCALAR-NEXT: movw %ax, 4(%rsi)
-; SCALAR-NEXT: shll $16, %r8d
-; SCALAR-NEXT: movzwl %r10w, %ecx
-; SCALAR-NEXT: orl %r8d, %ecx
-; SCALAR-NEXT: movl %ecx, (%rsi)
-; SCALAR-NEXT: movw %ax, 4(%rdx)
-; SCALAR-NEXT: movl %ecx, (%rdx)
-; SCALAR-NEXT: movw %ax, 12(%rdx)
-; SCALAR-NEXT: movl %ecx, 8(%rdx)
-; SCALAR-NEXT: movw %ax, 20(%rdx)
-; SCALAR-NEXT: movl %ecx, 16(%rdx)
-; SCALAR-NEXT: movw %ax, 28(%rdx)
-; SCALAR-NEXT: movl %ecx, 24(%rdx)
-; SCALAR-NEXT: movw %ax, 36(%rdx)
-; SCALAR-NEXT: movl %ecx, 32(%rdx)
-; SCALAR-NEXT: movw %ax, 44(%rdx)
-; SCALAR-NEXT: movl %ecx, 40(%rdx)
-; SCALAR-NEXT: movw %ax, 52(%rdx)
-; SCALAR-NEXT: movl %ecx, 48(%rdx)
-; SCALAR-NEXT: movw %ax, 60(%rdx)
-; SCALAR-NEXT: movl %ecx, 56(%rdx)
+; SCALAR-NEXT: notl %ecx
+; SCALAR-NEXT: notl %eax
+; SCALAR-NEXT: movl %eax, (%rsi)
+; SCALAR-NEXT: movw %cx, 4(%rsi)
+; SCALAR-NEXT: movw %cx, 4(%rdx)
+; SCALAR-NEXT: movl %eax, (%rdx)
+; SCALAR-NEXT: movw %cx, 12(%rdx)
+; SCALAR-NEXT: movl %eax, 8(%rdx)
+; SCALAR-NEXT: movw %cx, 20(%rdx)
+; SCALAR-NEXT: movl %eax, 16(%rdx)
+; SCALAR-NEXT: movw %cx, 28(%rdx)
+; SCALAR-NEXT: movl %eax, 24(%rdx)
+; SCALAR-NEXT: movw %cx, 36(%rdx)
+; SCALAR-NEXT: movl %eax, 32(%rdx)
+; SCALAR-NEXT: movw %cx, 44(%rdx)
+; SCALAR-NEXT: movl %eax, 40(%rdx)
+; SCALAR-NEXT: movw %cx, 52(%rdx)
+; SCALAR-NEXT: movl %eax, 48(%rdx)
+; SCALAR-NEXT: movw %cx, 60(%rdx)
+; SCALAR-NEXT: movl %eax, 56(%rdx)
; SCALAR-NEXT: retq
;
; SSE2-ONLY-LABEL: vec384_v6i8:
@@ -4062,31 +4028,20 @@ define void @vec384_v6i8(ptr %in.subvec.ptr, ptr %out.subvec.ptr, ptr %out.vec.p
define void @vec384_v6i16(ptr %in.subvec.ptr, ptr %out.subvec.ptr, ptr %out.vec.ptr) nounwind {
; SCALAR-LABEL: vec384_v6i16:
; SCALAR: # %bb.0:
-; SCALAR-NEXT: movl 8(%rdi), %eax
-; SCALAR-NEXT: movq (%rdi), %rcx
-; SCALAR-NEXT: movq %rcx, %rdi
-; SCALAR-NEXT: shrq $32, %rdi
-; SCALAR-NEXT: movq %rcx, %r8
-; SCALAR-NEXT: shrq $48, %r8
-; SCALAR-NEXT: notl %r8d
-; SCALAR-NEXT: shll $16, %r8d
-; SCALAR-NEXT: notl %edi
-; SCALAR-NEXT: movzwl %di, %edi
-; SCALAR-NEXT: orl %r8d, %edi
+; SCALAR-NEXT: movq (%rdi), %rax
+; SCALAR-NEXT: movl 8(%rdi), %ecx
; SCALAR-NEXT: notl %ecx
-; SCALAR-NEXT: notl %eax
-; SCALAR-NEXT: movl %eax, 8(%rsi)
-; SCALAR-NEXT: shlq $32, %rdi
-; SCALAR-NEXT: orq %rdi, %rcx
-; SCALAR-NEXT: movq %rcx, (%rsi)
-; SCALAR-NEXT: movl %eax, 8(%rdx)
-; SCALAR-NEXT: movq %rcx, (%rdx)
-; SCALAR-NEXT: movl %eax, 24(%rdx)
-; SCALAR-NEXT: movq %rcx, 16(%rdx)
-; SCALAR-NEXT: movl %eax, 40(%rdx)
-; SCALAR-NEXT: movq %rcx, 32(%rdx)
-; SCALAR-NEXT: movl %eax, 56(%rdx)
-; SCALAR-NEXT: movq %rcx, 48(%rdx)
+; SCALAR-NEXT: notq %rax
+; SCALAR-NEXT: movq %rax, (%rsi)
+; SCALAR-NEXT: movl %ecx, 8(%rsi)
+; SCALAR-NEXT: movl %ecx, 8(%rdx)
+; SCALAR-NEXT: movq %rax, (%rdx)
+; SCALAR-NEXT: movl %ecx, 24(%rdx)
+; SCALAR-NEXT: movq %rax, 16(%rdx)
+; SCALAR-NEXT: movl %ecx, 40(%rdx)
+; SCALAR-NEXT: movq %rax, 32(%rdx)
+; SCALAR-NEXT: movl %ecx, 56(%rdx)
+; SCALAR-NEXT: movq %rax, 48(%rdx)
; SCALAR-NEXT: retq
;
; SSE2-ONLY-LABEL: vec384_v6i16:
@@ -4579,95 +4534,20 @@ define void @vec384_v8i16(ptr %in.subvec.ptr, ptr %out.subvec.ptr, ptr %out.vec.
define void @vec384_v12i8(ptr %in.subvec.ptr, ptr %out.subvec.ptr, ptr %out.vec.ptr) nounwind {
; SCALAR-LABEL: vec384_v12i8:
; SCALAR: # %bb.0:
-; SCALAR-NEXT: pushq %rbp
-; SCALAR-NEXT: pushq %r15
-; SCALAR-NEXT: pushq %r14
-; SCALAR-NEXT: pushq %r12
-; SCALAR-NEXT: pushq %rbx
-; SCALAR-NEXT: movq (%rdi), %r9
-; SCALAR-NEXT: movq 8(%rdi), %rcx
-; SCALAR-NEXT: movl %ecx, %eax
-; SCALAR-NEXT: shrl $8, %eax
-; SCALAR-NEXT: movl %ecx, %edi
-; SCALAR-NEXT: shrl $24, %edi
-; SCALAR-NEXT: movl %ecx, %r8d
-; SCALAR-NEXT: shrl $16, %r8d
-; SCALAR-NEXT: movq %r9, %r10
-; SCALAR-NEXT: shrq $40, %r10
-; SCALAR-NEXT: movq %r9, %r11
-; SCALAR-NEXT: shrq $32, %r11
-; SCALAR-NEXT: movq %r9, %rbx
-; SCALAR-NEXT: shrq $56, %rbx
-; SCALAR-NEXT: movq %r9, %r14
-; SCALAR-NEXT: shrq $48, %r14
-; SCALAR-NEXT: movl %r9d, %ebp
-; SCALAR-NEXT: shrl $8, %ebp
-; SCALAR-NEXT: movl %r9d, %r15d
-; SCALAR-NEXT: shrl $24, %r15d
-; SCALAR-NEXT: movl %r9d, %r12d
-; SCALAR-NEXT: shrl $16, %r12d
-; SCALAR-NEXT: notb %r12b
-; SCALAR-NEXT: movzbl %r12b, %r12d
-; SCALAR-NEXT: notb %r15b
-; SCALAR-NEXT: movzbl %r15b, %r15d
-; SCALAR-NEXT: shll $8, %r15d
-; SCALAR-NEXT: orl %r12d, %r15d
-; SCALAR-NEXT: shll $16, %r15d
-; SCALAR-NEXT: notb %r9b
-; SCALAR-NEXT: movzbl %r9b, %r9d
-; SCALAR-NEXT: notb %bpl
-; SCALAR-NEXT: movzbl %bpl, %ebp
-; SCALAR-NEXT: shll $8, %ebp
-; SCALAR-NEXT: orl %r9d, %ebp
-; SCALAR-NEXT: movzwl %bp, %r9d
-; SCALAR-NEXT: orl %r15d, %r9d
-; SCALAR-NEXT: notb %r14b
-; SCALAR-NEXT: movzbl %r14b, %ebp
-; SCALAR-NEXT: notb %bl
-; SCALAR-NEXT: movzbl %bl, %ebx
-; SCALAR-NEXT: shll $8, %ebx
-; SCALAR-NEXT: orl %ebp, %ebx
-; SCALAR-NEXT: shll $16, %ebx
-; SCALAR-NEXT: notb %r11b
-; SCALAR-NEXT: movzbl %r11b, %r11d
-; SCALAR-NEXT: notb %r10b
-; SCALAR-NEXT: movzbl %r10b, %r10d
-; SCALAR-NEXT: shll $8, %r10d
-; SCALAR-NEXT: orl %r11d, %r10d
-; SCALAR-NEXT: movzwl %r10w, %r10d
-; SCALAR-NEXT: orl %ebx, %r10d
-; SCALAR-NEXT: notb %r8b
-; SCALAR-NEXT: movzbl %r8b, %r8d
-; SCALAR-NEXT: notb %dil
-; SCALAR-NEXT: movzbl %dil, %edi
-; SCALAR-NEXT: shll $8, %edi
-; SCALAR-NEXT: orl %r8d, %edi
-; SCALAR-NEXT: shll $16, %edi
-; SCALAR-NEXT: notb %cl
-; SCALAR-NEXT: movzbl %cl, %ecx
-; SCALAR-NEXT: notb %al
-; SCALAR-NEXT: movzbl %al, %eax
-; SCALAR-NEXT: shll $8, %eax
-; SCALAR-NEXT: orl %ecx, %eax
-; SCALAR-NEXT: movzwl %ax, %eax
-; SCALAR-NEXT: orl %edi, %eax
-; SCALAR-NEXT: movl %eax, 8(%rsi)
-; SCALAR-NEXT: shlq $32, %r10
-; SCALAR-NEXT: orq %r10, %r9
-; SCALAR-NEXT: movq %r9, (%rsi)
-; SCALAR-NEXT: movl %eax, 8(%rdx)
-; SCALAR-NEXT: movq %r9, (%rdx)
-; SCALAR-NEXT: movl %eax, 24(%rdx)
-; SCALAR-NEXT: movq %r9, 16(%rdx)
-; SCALAR-NEXT: movl %eax, 40(%rdx)
-; SCALAR-NEXT: movq %r9, 32(%rdx)
-; SCALAR-NEXT: movl %eax, 56(%rdx)
-; SCALAR-NEXT: movq %r9, 48(%rdx)
-; SCALAR-NEXT: popq %rbx
-; SCALAR-NEXT: popq %r12
-; SCALAR-NEXT: popq %r14
-; SCALAR-NEXT: popq %r15
-; SCALAR-NEXT: popq %rbp
+; SCALAR-NEXT: movq (%rdi), %rax
+; SCALAR-NEXT: movl 8(%rdi), %ecx
+; SCALAR-NEXT: notl %ecx
+; SCALAR-NEXT: notq %rax
+; SCALAR-NEXT: movq %rax, (%rsi)
+; SCALAR-NEXT: movl %ecx, 8(%rsi)
+; SCALAR-NEXT: movl %ecx, 8(%rdx)
+; SCALAR-NEXT: movq %rax, (%rdx)
+; SCALAR-NEXT: movl %ecx, 24(%rdx)
+; SCALAR-NEXT: movq %rax, 16(%rdx)
+; SCALAR-NEXT: movl %ecx, 40(%rdx)
+; SCALAR-NEXT: movq %rax, 32(%rdx)
+; SCALAR-NEXT: movl %ecx, 56(%rdx)
+; SCALAR-NEXT: movq %rax, 48(%rdx)
; SCALAR-NEXT: retq
;
; SSE2-ONLY-LABEL: vec384_v12i8:
@@ -4785,47 +4665,12 @@ define void @vec384_v12i8(ptr %in.subvec.ptr, ptr %out.subvec.ptr, ptr %out.vec.
define void @vec384_v12i16(ptr %in.subvec.ptr, ptr %out.subvec.ptr, ptr %out.vec.ptr) nounwind {
; SCALAR-LABEL: vec384_v12i16:
; SCALAR: # %bb.0:
-; SCALAR-NEXT: pushq %r14
-; SCALAR-NEXT: pushq %rbx
; SCALAR-NEXT: movq (%rdi), %rax
; SCALAR-NEXT: movq 8(%rdi), %rcx
-; SCALAR-NEXT: movq %rax, %r8
-; SCALAR-NEXT: shrq $32, %r8
-; SCALAR-NEXT: movq %rax, %r9
-; SCALAR-NEXT: shrq $48, %r9
-; SCALAR-NEXT: movq %rcx, %r10
-; SCALAR-NEXT: shrq $32, %r10
-; SCALAR-NEXT: movq %rcx, %r11
-; SCALAR-NEXT: shrq $48, %r11
; SCALAR-NEXT: movq 16(%rdi), %rdi
-; SCALAR-NEXT: movq %rdi, %rbx
-; SCALAR-NEXT: shrq $32, %rbx
-; SCALAR-NEXT: movq %rdi, %r14
-; SCALAR-NEXT: shrq $48, %r14
-; SCALAR-NEXT: notl %r14d
-; SCALAR-NEXT: shll $16, %r14d
-; SCALAR-NEXT: notl %ebx
-; SCALAR-NEXT: movzwl %bx, %ebx
-; SCALAR-NEXT: orl %r14d, %ebx
-; SCALAR-NEXT: shlq $32, %rbx
-; SCALAR-NEXT: notl %edi
-; SCALAR-NEXT: orq %rbx, %rdi
-; SCALAR-NEXT: notl %r11d
-; SCALAR-NEXT: shll $16, %r11d
-; SCALAR-NEXT: notl %r10d
-; SCALAR-NEXT: movzwl %r10w, %r10d
-; SCALAR-NEXT: orl %r11d, %r10d
-; SCALAR-NEXT: shlq $32, %r10
-; SCALAR-NEXT: notl %ecx
-; SCALAR-NEXT: orq %r10, %rcx
-; SCALAR-NEXT: notl %r9d
-; SCALAR-NEXT: shll $16, %r9d
-; SCALAR-NEXT: notl %r8d
-; SCALAR-NEXT: movzwl %r8w, %r8d
-; SCALAR-NEXT: orl %r9d, %r8d
-; SCALAR-NEXT: shlq $32, %r8
-; SCALAR-NEXT: notl %eax
-; SCALAR-NEXT: orq %r8, %rax
+; SCALAR-NEXT: notq %rdi
+; SCALAR-NEXT: notq %rcx
+; SCALAR-NEXT: notq %rax
; SCALAR-NEXT: movq %rax, (%rsi)
; SCALAR-NEXT: movq %rcx, 8(%rsi)
; SCALAR-NEXT: movq %rdi, 16(%rsi)
@@ -4835,8 +4680,6 @@ define void @vec384_v12i16(ptr %in.subvec.ptr, ptr %out.subvec.ptr, ptr %out.vec
; SCALAR-NEXT: movq %rdi, 48(%rdx)
; SCALAR-NEXT: movq %rcx, 40(%rdx)
; SCALAR-NEXT: movq %rax, 32(%rdx)
-; SCALAR-NEXT: popq %rbx
-; SCALAR-NEXT: popq %r14
; SCALAR-NEXT: retq
;
; SSE2-LABEL: vec384_v12i16:
@@ -5085,144 +4928,9 @@ define void @vec384_v24i8(ptr %in.subvec.ptr, ptr %out.subvec.ptr, ptr %out.vec.
; SCALAR-NEXT: movq (%rdi), %rax
; SCALAR-NEXT: movq 8(%rdi), %rcx
; SCALAR-NEXT: movq 16(%rdi), %rdi
-; SCALAR-NEXT: movq %rdi, %r8
-; SCALAR-NEXT: shrq $40, %r8
-; SCALAR-NEXT: movq %rdi, %r9
-; SCALAR-NEXT: shrq $56, %r9
-; SCALAR-NEXT: movq %rdi, %r10
-; SCALAR-NEXT: shrq $48, %r10
-; SCALAR-NEXT: notb %r10b
-; SCALAR-NEXT: movzbl %r10b, %r10d
-; SCALAR-NEXT: notb %r9b
-; SCALAR-NEXT: movzbl %r9b, %r9d
-; SCALAR-NEXT: shll $8, %r9d
-; SCALAR-NEXT: orl %r10d, %r9d
-; SCALAR-NEXT: movq %rdi, %r10
-; SCALAR-NEXT: shrq $32, %r10
-; SCALAR-NEXT: notb %r10b
-; SCALAR-NEXT: movzbl %r10b, %r10d
-; SCALAR-NEXT: notb %r8b
-; SCALAR-NEXT: movzbl %r8b, %r8d
-; SCALAR-NEXT: shll $8, %r8d
-; SCALAR-NEXT: orl %r10d, %r8d
-; SCALAR-NEXT: movl %edi, %r10d
-; SCALAR-NEXT: shrl $24, %r10d
-; SCALAR-NEXT: shll $16, %r9d
-; SCALAR-NEXT: movzwl %r8w, %r8d
-; SCALAR-NEXT: orl %r9d, %r8d
-; SCALAR-NEXT: movl %edi, %r9d
-; SCALAR-NEXT: shrl $16, %r9d
-; SCALAR-NEXT: notb %r9b
-; SCALAR-NEXT: movzbl %r9b, %r9d
-; SCALAR-NEXT: notb %r10b
-; SCALAR-NEXT: movzbl %r10b, %r10d
-; SCALAR-NEXT: shll $8, %r10d
-; SCALAR-NEXT: orl %r9d, %r10d
-; SCALAR-NEXT: movl %edi, %r9d
-; SCALAR-NEXT: shrl $8, %r9d
-; SCALAR-NEXT: notb %dil
-; SCALAR-NEXT: movzbl %dil, %edi
-; SCALAR-NEXT: notb %r9b
-; SCALAR-NEXT: movzbl %r9b, %r11d
-; SCALAR-NEXT: shll $8, %r11d
-; SCALAR-NEXT: orl %edi, %r11d
-; SCALAR-NEXT: movq %rcx, %r9
-; SCALAR-NEXT: shrq $40, %r9
-; SCALAR-NEXT: shll $16, %r10d
-; SCALAR-NEXT: movzwl %r11w, %edi
-; SCALAR-NEXT: orl %r10d, %edi
-; SCALAR-NEXT: movq %rcx, %r10
-; SCALAR-NEXT: shrq $56, %r10
-; SCALAR-NEXT: shlq $32, %r8
-; SCALAR-NEXT: orq %r8, %rdi
-; SCALAR-NEXT: movq %rcx, %r8
-; SCALAR-NEXT: shrq $48, %r8
-; SCALAR-NEXT: notb %r8b
-; SCALAR-NEXT: movzbl %r8b, %r8d
-; SCALAR-NEXT: notb %r10b
-; SCALAR-NEXT: movzbl %r10b, %r10d
-; SCALAR-NEXT: shll $8, %r10d
-; SCALAR-NEXT: orl %r8d, %r10d
-; SCALAR-NEXT: movq %rcx, %r8
-; SCALAR-NEXT: shrq $32, %r8
-; SCALAR-NEXT: notb %r8b
-; SCALAR-NEXT: movzbl %r8b, %r8d
-; SCALAR-NEXT: notb %r9b
-; SCALAR-NEXT: movzbl %r9b, %r9d
-; SCALAR-NEXT: shll $8, %r9d
-; SCALAR-NEXT: orl %r8d, %r9d
-; SCALAR-NEXT: movl %ecx, %r11d
-; SCALAR-NEXT: shrl $24, %r11d
-; SCALAR-NEXT: shll $16, %r10d
-; SCALAR-NEXT: movzwl %r9w, %r8d
-; SCALAR-NEXT: orl %r10d, %r8d
-; SCALAR-NEXT: movl %ecx, %r9d
-; SCALAR-NEXT: shrl $16, %r9d
-; SCALAR-NEXT: notb %r9b
-; SCALAR-NEXT: movzbl %r9b, %r9d
-; SCALAR-NEXT: notb %r11b
-; SCALAR-NEXT: movzbl %r11b, %r10d
-; SCALAR-NEXT: shll $8, %r10d
-; SCALAR-NEXT: orl %r9d, %r10d
-; SCALAR-NEXT: movl %ecx, %r9d
-; SCALAR-NEXT: shrl $8, %r9d
-; SCALAR-NEXT: notb %cl
-; SCALAR-NEXT: movzbl %cl, %ecx
-; SCALAR-NEXT: notb %r9b
-; SCALAR-NEXT: movzbl %r9b, %r11d
-; SCALAR-NEXT: shll $8, %r11d
-; SCALAR-NEXT: orl %ecx, %r11d
-; SCALAR-NEXT: movq %rax, %r9
-; SCALAR-NEXT: shrq $40, %r9
-; SCALAR-NEXT: shll $16, %r10d
-; SCALAR-NEXT: movzwl %r11w, %ecx
-; SCALAR-NEXT: orl %r10d, %ecx
-; SCALAR-NEXT: movq %rax, %r10
-; SCALAR-NEXT: shrq $56, %r10
-; SCALAR-NEXT: shlq $32, %r8
-; SCALAR-NEXT: orq %r8, %rcx
-; SCALAR-NEXT: movq %rax, %r8
-; SCALAR-NEXT: shrq $48, %r8
-; SCALAR-NEXT: notb %r8b
-; SCALAR-NEXT: movzbl %r8b, %r8d
-; SCALAR-NEXT: notb %r10b
-; SCALAR-NEXT: movzbl %r10b, %r10d
-; SCALAR-NEXT: shll $8, %r10d
-; SCALAR-NEXT: orl %r8d, %r10d
-; SCALAR-NEXT: movq %rax, %r8
-; SCALAR-NEXT: shrq $32, %r8
-; SCALAR-NEXT: notb %r8b
-; SCALAR-NEXT: movzbl %r8b, %r8d
-; SCALAR-NEXT: notb %r9b
-; SCALAR-NEXT: movzbl %r9b, %r9d
-; SCALAR-NEXT: shll $8, %r9d
-; SCALAR-NEXT: orl %r8d, %r9d
-; SCALAR-NEXT: movl %eax, %r11d
-; SCALAR-NEXT: shrl $24, %r11d
-; SCALAR-NEXT: shll $16, %r10d
-; SCALAR-NEXT: movzwl %r9w, %r8d
-; SCALAR-NEXT: orl %r10d, %r8d
-; SCALAR-NEXT: movl %eax, %r9d
-; SCALAR-NEXT: shrl $16, %r9d
-; SCALAR-NEXT: notb %r9b
-; SCALAR-NEXT: movzbl %r9b, %r9d
-; SCALAR-NEXT: notb %r11b
-; SCALAR-NEXT: movzbl %r11b, %r10d
-; SCALAR-NEXT: shll $8, %r10d
-; SCALAR-NEXT: orl %r9d, %r10d
-; SCALAR-NEXT: movl %eax, %r9d
-; SCALAR-NEXT: shrl $8, %r9d
-; SCALAR-NEXT: notb %al
-; SCALAR-NEXT: movzbl %al, %eax
-; SCALAR-NEXT: notb %r9b
-; SCALAR-NEXT: movzbl %r9b, %r9d
-; SCALAR-NEXT: shll $8, %r9d
-; SCALAR-NEXT: orl %eax, %r9d
-; SCALAR-NEXT: shll $16, %r10d
-; SCALAR-NEXT: movzwl %r9w, %eax
-; SCALAR-NEXT: orl %r10d, %eax
-; SCALAR-NEXT: shlq $32, %r8
-; SCALAR-NEXT: orq %r8, %rax
+; SCALAR-NEXT: notq %rdi
+; SCALAR-NEXT: notq %rcx
+; SCALAR-NEXT: notq %rax
; SCALAR-NEXT: movq %rax, (%rsi)
; SCALAR-NEXT: movq %rcx, 8(%rsi)
; SCALAR-NEXT: movq %rdi, 16(%rsi)