summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen/X86/vector-interleaved-store-i64-stride-4.ll
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test/CodeGen/X86/vector-interleaved-store-i64-stride-4.ll')
-rw-r--r--llvm/test/CodeGen/X86/vector-interleaved-store-i64-stride-4.ll120
1 files changed, 56 insertions, 64 deletions
diff --git a/llvm/test/CodeGen/X86/vector-interleaved-store-i64-stride-4.ll b/llvm/test/CodeGen/X86/vector-interleaved-store-i64-stride-4.ll
index 38623c6ce0cb..ded7c002c873 100644
--- a/llvm/test/CodeGen/X86/vector-interleaved-store-i64-stride-4.ll
+++ b/llvm/test/CodeGen/X86/vector-interleaved-store-i64-stride-4.ll
@@ -94,105 +94,97 @@ define void @store_i64_stride4_vf2(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
;
; AVX512-LABEL: store_i64_stride4_vf2:
; AVX512: # %bb.0:
-; AVX512-NEXT: vmovaps (%rdi), %xmm0
-; AVX512-NEXT: vmovaps (%rdx), %xmm1
-; AVX512-NEXT: vinsertf128 $1, (%rcx), %ymm1, %ymm1
-; AVX512-NEXT: vinsertf128 $1, (%rsi), %ymm0, %ymm0
-; AVX512-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512-NEXT: vmovaps {{.*#+}} zmm1 = [0,2,4,6,1,3,5,7]
-; AVX512-NEXT: vpermpd %zmm0, %zmm1, %zmm0
-; AVX512-NEXT: vmovaps %zmm0, (%r8)
+; AVX512-NEXT: vmovdqa (%rdi), %xmm0
+; AVX512-NEXT: vmovdqa (%rdx), %xmm1
+; AVX512-NEXT: vinserti128 $1, (%rcx), %ymm1, %ymm1
+; AVX512-NEXT: vinserti128 $1, (%rsi), %ymm0, %ymm0
+; AVX512-NEXT: vpmovsxbq {{.*#+}} zmm2 = [0,2,8,10,1,3,9,11]
+; AVX512-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
+; AVX512-NEXT: vmovdqa64 %zmm2, (%r8)
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
;
; AVX512-FCP-LABEL: store_i64_stride4_vf2:
; AVX512-FCP: # %bb.0:
-; AVX512-FCP-NEXT: vmovaps (%rdi), %xmm0
-; AVX512-FCP-NEXT: vmovaps (%rdx), %xmm1
-; AVX512-FCP-NEXT: vinsertf128 $1, (%rcx), %ymm1, %ymm1
-; AVX512-FCP-NEXT: vinsertf128 $1, (%rsi), %ymm0, %ymm0
-; AVX512-FCP-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512-FCP-NEXT: vmovaps {{.*#+}} zmm1 = [0,2,4,6,1,3,5,7]
-; AVX512-FCP-NEXT: vpermpd %zmm0, %zmm1, %zmm0
-; AVX512-FCP-NEXT: vmovaps %zmm0, (%r8)
+; AVX512-FCP-NEXT: vmovdqa (%rdi), %xmm0
+; AVX512-FCP-NEXT: vmovdqa (%rdx), %xmm1
+; AVX512-FCP-NEXT: vinserti128 $1, (%rcx), %ymm1, %ymm1
+; AVX512-FCP-NEXT: vinserti128 $1, (%rsi), %ymm0, %ymm0
+; AVX512-FCP-NEXT: vpmovsxbq {{.*#+}} zmm2 = [0,2,8,10,1,3,9,11]
+; AVX512-FCP-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
+; AVX512-FCP-NEXT: vmovdqa64 %zmm2, (%r8)
; AVX512-FCP-NEXT: vzeroupper
; AVX512-FCP-NEXT: retq
;
; AVX512DQ-LABEL: store_i64_stride4_vf2:
; AVX512DQ: # %bb.0:
-; AVX512DQ-NEXT: vmovaps (%rdi), %xmm0
-; AVX512DQ-NEXT: vmovaps (%rdx), %xmm1
-; AVX512DQ-NEXT: vinsertf128 $1, (%rcx), %ymm1, %ymm1
-; AVX512DQ-NEXT: vinsertf128 $1, (%rsi), %ymm0, %ymm0
-; AVX512DQ-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512DQ-NEXT: vmovaps {{.*#+}} zmm1 = [0,2,4,6,1,3,5,7]
-; AVX512DQ-NEXT: vpermpd %zmm0, %zmm1, %zmm0
-; AVX512DQ-NEXT: vmovaps %zmm0, (%r8)
+; AVX512DQ-NEXT: vmovdqa (%rdi), %xmm0
+; AVX512DQ-NEXT: vmovdqa (%rdx), %xmm1
+; AVX512DQ-NEXT: vinserti128 $1, (%rcx), %ymm1, %ymm1
+; AVX512DQ-NEXT: vinserti128 $1, (%rsi), %ymm0, %ymm0
+; AVX512DQ-NEXT: vpmovsxbq {{.*#+}} zmm2 = [0,2,8,10,1,3,9,11]
+; AVX512DQ-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
+; AVX512DQ-NEXT: vmovdqa64 %zmm2, (%r8)
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
;
; AVX512DQ-FCP-LABEL: store_i64_stride4_vf2:
; AVX512DQ-FCP: # %bb.0:
-; AVX512DQ-FCP-NEXT: vmovaps (%rdi), %xmm0
-; AVX512DQ-FCP-NEXT: vmovaps (%rdx), %xmm1
-; AVX512DQ-FCP-NEXT: vinsertf128 $1, (%rcx), %ymm1, %ymm1
-; AVX512DQ-FCP-NEXT: vinsertf128 $1, (%rsi), %ymm0, %ymm0
-; AVX512DQ-FCP-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512DQ-FCP-NEXT: vmovaps {{.*#+}} zmm1 = [0,2,4,6,1,3,5,7]
-; AVX512DQ-FCP-NEXT: vpermpd %zmm0, %zmm1, %zmm0
-; AVX512DQ-FCP-NEXT: vmovaps %zmm0, (%r8)
+; AVX512DQ-FCP-NEXT: vmovdqa (%rdi), %xmm0
+; AVX512DQ-FCP-NEXT: vmovdqa (%rdx), %xmm1
+; AVX512DQ-FCP-NEXT: vinserti128 $1, (%rcx), %ymm1, %ymm1
+; AVX512DQ-FCP-NEXT: vinserti128 $1, (%rsi), %ymm0, %ymm0
+; AVX512DQ-FCP-NEXT: vpmovsxbq {{.*#+}} zmm2 = [0,2,8,10,1,3,9,11]
+; AVX512DQ-FCP-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
+; AVX512DQ-FCP-NEXT: vmovdqa64 %zmm2, (%r8)
; AVX512DQ-FCP-NEXT: vzeroupper
; AVX512DQ-FCP-NEXT: retq
;
; AVX512BW-LABEL: store_i64_stride4_vf2:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vmovaps (%rdi), %xmm0
-; AVX512BW-NEXT: vmovaps (%rdx), %xmm1
-; AVX512BW-NEXT: vinsertf128 $1, (%rcx), %ymm1, %ymm1
-; AVX512BW-NEXT: vinsertf128 $1, (%rsi), %ymm0, %ymm0
-; AVX512BW-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512BW-NEXT: vmovaps {{.*#+}} zmm1 = [0,2,4,6,1,3,5,7]
-; AVX512BW-NEXT: vpermpd %zmm0, %zmm1, %zmm0
-; AVX512BW-NEXT: vmovaps %zmm0, (%r8)
+; AVX512BW-NEXT: vmovdqa (%rdi), %xmm0
+; AVX512BW-NEXT: vmovdqa (%rdx), %xmm1
+; AVX512BW-NEXT: vinserti128 $1, (%rcx), %ymm1, %ymm1
+; AVX512BW-NEXT: vinserti128 $1, (%rsi), %ymm0, %ymm0
+; AVX512BW-NEXT: vpmovsxbq {{.*#+}} zmm2 = [0,2,8,10,1,3,9,11]
+; AVX512BW-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
+; AVX512BW-NEXT: vmovdqa64 %zmm2, (%r8)
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
;
; AVX512BW-FCP-LABEL: store_i64_stride4_vf2:
; AVX512BW-FCP: # %bb.0:
-; AVX512BW-FCP-NEXT: vmovaps (%rdi), %xmm0
-; AVX512BW-FCP-NEXT: vmovaps (%rdx), %xmm1
-; AVX512BW-FCP-NEXT: vinsertf128 $1, (%rcx), %ymm1, %ymm1
-; AVX512BW-FCP-NEXT: vinsertf128 $1, (%rsi), %ymm0, %ymm0
-; AVX512BW-FCP-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512BW-FCP-NEXT: vmovaps {{.*#+}} zmm1 = [0,2,4,6,1,3,5,7]
-; AVX512BW-FCP-NEXT: vpermpd %zmm0, %zmm1, %zmm0
-; AVX512BW-FCP-NEXT: vmovaps %zmm0, (%r8)
+; AVX512BW-FCP-NEXT: vmovdqa (%rdi), %xmm0
+; AVX512BW-FCP-NEXT: vmovdqa (%rdx), %xmm1
+; AVX512BW-FCP-NEXT: vinserti128 $1, (%rcx), %ymm1, %ymm1
+; AVX512BW-FCP-NEXT: vinserti128 $1, (%rsi), %ymm0, %ymm0
+; AVX512BW-FCP-NEXT: vpmovsxbq {{.*#+}} zmm2 = [0,2,8,10,1,3,9,11]
+; AVX512BW-FCP-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
+; AVX512BW-FCP-NEXT: vmovdqa64 %zmm2, (%r8)
; AVX512BW-FCP-NEXT: vzeroupper
; AVX512BW-FCP-NEXT: retq
;
; AVX512DQ-BW-LABEL: store_i64_stride4_vf2:
; AVX512DQ-BW: # %bb.0:
-; AVX512DQ-BW-NEXT: vmovaps (%rdi), %xmm0
-; AVX512DQ-BW-NEXT: vmovaps (%rdx), %xmm1
-; AVX512DQ-BW-NEXT: vinsertf128 $1, (%rcx), %ymm1, %ymm1
-; AVX512DQ-BW-NEXT: vinsertf128 $1, (%rsi), %ymm0, %ymm0
-; AVX512DQ-BW-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512DQ-BW-NEXT: vmovaps {{.*#+}} zmm1 = [0,2,4,6,1,3,5,7]
-; AVX512DQ-BW-NEXT: vpermpd %zmm0, %zmm1, %zmm0
-; AVX512DQ-BW-NEXT: vmovaps %zmm0, (%r8)
+; AVX512DQ-BW-NEXT: vmovdqa (%rdi), %xmm0
+; AVX512DQ-BW-NEXT: vmovdqa (%rdx), %xmm1
+; AVX512DQ-BW-NEXT: vinserti128 $1, (%rcx), %ymm1, %ymm1
+; AVX512DQ-BW-NEXT: vinserti128 $1, (%rsi), %ymm0, %ymm0
+; AVX512DQ-BW-NEXT: vpmovsxbq {{.*#+}} zmm2 = [0,2,8,10,1,3,9,11]
+; AVX512DQ-BW-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
+; AVX512DQ-BW-NEXT: vmovdqa64 %zmm2, (%r8)
; AVX512DQ-BW-NEXT: vzeroupper
; AVX512DQ-BW-NEXT: retq
;
; AVX512DQ-BW-FCP-LABEL: store_i64_stride4_vf2:
; AVX512DQ-BW-FCP: # %bb.0:
-; AVX512DQ-BW-FCP-NEXT: vmovaps (%rdi), %xmm0
-; AVX512DQ-BW-FCP-NEXT: vmovaps (%rdx), %xmm1
-; AVX512DQ-BW-FCP-NEXT: vinsertf128 $1, (%rcx), %ymm1, %ymm1
-; AVX512DQ-BW-FCP-NEXT: vinsertf128 $1, (%rsi), %ymm0, %ymm0
-; AVX512DQ-BW-FCP-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512DQ-BW-FCP-NEXT: vmovaps {{.*#+}} zmm1 = [0,2,4,6,1,3,5,7]
-; AVX512DQ-BW-FCP-NEXT: vpermpd %zmm0, %zmm1, %zmm0
-; AVX512DQ-BW-FCP-NEXT: vmovaps %zmm0, (%r8)
+; AVX512DQ-BW-FCP-NEXT: vmovdqa (%rdi), %xmm0
+; AVX512DQ-BW-FCP-NEXT: vmovdqa (%rdx), %xmm1
+; AVX512DQ-BW-FCP-NEXT: vinserti128 $1, (%rcx), %ymm1, %ymm1
+; AVX512DQ-BW-FCP-NEXT: vinserti128 $1, (%rsi), %ymm0, %ymm0
+; AVX512DQ-BW-FCP-NEXT: vpmovsxbq {{.*#+}} zmm2 = [0,2,8,10,1,3,9,11]
+; AVX512DQ-BW-FCP-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
+; AVX512DQ-BW-FCP-NEXT: vmovdqa64 %zmm2, (%r8)
; AVX512DQ-BW-FCP-NEXT: vzeroupper
; AVX512DQ-BW-FCP-NEXT: retq
%in.vec0 = load <2 x i64>, ptr %in.vecptr0, align 64