summaryrefslogtreecommitdiffstats
path: root/chromium/third_party/libvpx/source/libvpx/vp8/common/x86/recon_sse2.asm
diff options
context:
space:
mode:
Diffstat (limited to 'chromium/third_party/libvpx/source/libvpx/vp8/common/x86/recon_sse2.asm')
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp8/common/x86/recon_sse2.asm79
1 files changed, 56 insertions, 23 deletions
diff --git a/chromium/third_party/libvpx/source/libvpx/vp8/common/x86/recon_sse2.asm b/chromium/third_party/libvpx/source/libvpx/vp8/common/x86/recon_sse2.asm
index 1434bcd9379..7141f832463 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp8/common/x86/recon_sse2.asm
+++ b/chromium/third_party/libvpx/source/libvpx/vp8/common/x86/recon_sse2.asm
@@ -365,6 +365,7 @@ sym(vp8_intra_pred_uv_tm_%1):
GET_GOT rbx
push rsi
push rdi
+ push rbx
; end prolog
; read top row
@@ -395,8 +396,11 @@ sym(vp8_intra_pred_uv_tm_%1):
movsxd rcx, dword ptr arg(1) ;dst_stride
.vp8_intra_pred_uv_tm_%1_loop:
- movd xmm3, [rsi]
- movd xmm5, [rsi+rax]
+ mov bl, [rsi]
+ movd xmm3, ebx
+
+ mov bl, [rsi+rax]
+ movd xmm5, ebx
%ifidn %1, sse2
punpcklbw xmm3, xmm0
punpcklbw xmm5, xmm0
@@ -419,6 +423,7 @@ sym(vp8_intra_pred_uv_tm_%1):
jnz .vp8_intra_pred_uv_tm_%1_loop
; begin epilog
+ pop rbx
pop rdi
pop rsi
RESTORE_GOT
@@ -486,10 +491,8 @@ sym(vp8_intra_pred_uv_ho_%1):
SHADOW_ARGS_TO_STACK 5
push rsi
push rdi
-%ifidn %1, ssse3
-%ifndef GET_GOT_SAVE_ARG
push rbx
-%endif
+%ifidn %1, ssse3
GET_GOT rbx
%endif
; end prolog
@@ -507,13 +510,16 @@ sym(vp8_intra_pred_uv_ho_%1):
%ifidn %1, ssse3
lea rdx, [rcx*3]
movdqa xmm2, [GLOBAL(dc_00001111)]
- lea rbx, [rax*3]
%endif
%ifidn %1, mmx2
.vp8_intra_pred_uv_ho_%1_loop:
- movd mm0, [rsi]
- movd mm1, [rsi+rax]
+ mov bl, [rsi]
+ movd mm0, ebx
+
+ mov bl, [rsi+rax]
+ movd mm1, ebx
+
punpcklbw mm0, mm0
punpcklbw mm1, mm1
pshufw mm0, mm0, 0x0
@@ -525,10 +531,19 @@ sym(vp8_intra_pred_uv_ho_%1):
dec edx
jnz .vp8_intra_pred_uv_ho_%1_loop
%else
- movd xmm0, [rsi]
- movd xmm3, [rsi+rax]
- movd xmm1, [rsi+rax*2]
- movd xmm4, [rsi+rbx]
+ mov bl, [rsi]
+ movd xmm0, ebx
+
+ mov bl, [rsi+rax]
+ movd xmm3, ebx
+
+ mov bl, [rsi+rax*2]
+ movd xmm1, ebx
+
+ lea rbx, [rax*3]
+ mov bl, [rsi+rbx]
+ movd xmm4, ebx
+
punpcklbw xmm0, xmm3
punpcklbw xmm1, xmm4
pshufb xmm0, xmm2
@@ -539,10 +554,20 @@ sym(vp8_intra_pred_uv_ho_%1):
movhps [rdi+rdx], xmm1
lea rsi, [rsi+rax*4]
lea rdi, [rdi+rcx*4]
- movd xmm0, [rsi]
- movd xmm3, [rsi+rax]
- movd xmm1, [rsi+rax*2]
- movd xmm4, [rsi+rbx]
+
+ mov bl, [rsi]
+ movd xmm0, ebx
+
+ mov bl, [rsi+rax]
+ movd xmm3, ebx
+
+ mov bl, [rsi+rax*2]
+ movd xmm1, ebx
+
+ lea rbx, [rax*3]
+ mov bl, [rsi+rbx]
+ movd xmm4, ebx
+
punpcklbw xmm0, xmm3
punpcklbw xmm1, xmm4
pshufb xmm0, xmm2
@@ -556,10 +581,8 @@ sym(vp8_intra_pred_uv_ho_%1):
; begin epilog
%ifidn %1, ssse3
RESTORE_GOT
-%ifndef GET_GOT_SAVE_ARG
- pop rbx
-%endif
%endif
+ pop rbx
pop rdi
pop rsi
UNSHADOW_ARGS
@@ -893,6 +916,7 @@ sym(vp8_intra_pred_y_tm_%1):
SAVE_XMM 7
push rsi
push rdi
+ push rbx
GET_GOT rbx
; end prolog
@@ -926,8 +950,11 @@ sym(vp8_intra_pred_y_tm_%1):
mov rdi, arg(0) ;dst;
movsxd rcx, dword ptr arg(1) ;dst_stride
vp8_intra_pred_y_tm_%1_loop:
- movd xmm4, [rsi]
- movd xmm5, [rsi+rax]
+ mov bl, [rsi]
+ movd xmm4, ebx
+
+ mov bl, [rsi+rax]
+ movd xmm5, ebx
%ifidn %1, sse2
punpcklbw xmm4, xmm0
punpcklbw xmm5, xmm0
@@ -956,6 +983,7 @@ vp8_intra_pred_y_tm_%1_loop:
; begin epilog
RESTORE_GOT
+ pop rbx
pop rdi
pop rsi
RESTORE_XMM
@@ -1029,6 +1057,7 @@ sym(vp8_intra_pred_y_ho_sse2):
SHADOW_ARGS_TO_STACK 5
push rsi
push rdi
+ push rbx
; end prolog
;arg(2) not used
@@ -1041,8 +1070,11 @@ sym(vp8_intra_pred_y_ho_sse2):
movsxd rcx, dword ptr arg(1) ;dst_stride
vp8_intra_pred_y_ho_sse2_loop:
- movd xmm0, [rsi]
- movd xmm1, [rsi+rax]
+ mov bl, [rsi]
+ movd xmm0, ebx
+ mov bl, [rsi+rax]
+ movd xmm1, ebx
+
; FIXME use pshufb for ssse3 version
punpcklbw xmm0, xmm0
punpcklbw xmm1, xmm1
@@ -1058,6 +1090,7 @@ vp8_intra_pred_y_ho_sse2_loop:
jnz vp8_intra_pred_y_ho_sse2_loop
; begin epilog
+ pop rbx
pop rdi
pop rsi
UNSHADOW_ARGS