summaryrefslogtreecommitdiffstats
path: root/src/3rdparty/libwebp/src/dsp/enc_mips32.c
diff options
context:
space:
mode:
authorLiang Qi <liang.qi@theqtcompany.com>2016-03-09 10:22:13 +0100
committerLiang Qi <liang.qi@theqtcompany.com>2016-03-11 20:05:19 +0000
commitb114e552211456fbde3ff6ca2da21cbc8d1e90e2 (patch)
tree9c033ea7bcc9cc7314eaa8aff57356b2ae301257 /src/3rdparty/libwebp/src/dsp/enc_mips32.c
parent1d4f24820c0fff474d524e006d715e13e409a4b8 (diff)
libwebp: update to 0.5.0
This commit imports libwebp 0.5.0, including AUTHORS, COPYING, ChangeLog, NEWS, PATENTS, README and src directories. In src, only includes header and source files. The patches required to build it in Qt will follow in separate commit(s). Change-Id: I96b4961ba63c75cc7fbab158c36a0f403f254c14 Reviewed-by: aavit <eirik.aavitsland@theqtcompany.com>
Diffstat (limited to 'src/3rdparty/libwebp/src/dsp/enc_mips32.c')
-rw-r--r--src/3rdparty/libwebp/src/dsp/enc_mips32.c482
1 files changed, 189 insertions, 293 deletions
diff --git a/src/3rdparty/libwebp/src/dsp/enc_mips32.c b/src/3rdparty/libwebp/src/dsp/enc_mips32.c
index 6cede18..fd10143 100644
--- a/src/3rdparty/libwebp/src/dsp/enc_mips32.c
+++ b/src/3rdparty/libwebp/src/dsp/enc_mips32.c
@@ -17,13 +17,10 @@
#if defined(WEBP_USE_MIPS32)
+#include "./mips_macro.h"
#include "../enc/vp8enci.h"
#include "../enc/cost.h"
-#if defined(__GNUC__) && defined(__ANDROID__) && LOCAL_GCC_VERSION == 0x409
-#define WORK_AROUND_GCC
-#endif
-
static const int kC1 = 20091 + (1 << 16);
static const int kC2 = 35468;
@@ -59,61 +56,61 @@ static const int kC2 = 35468;
// MUL and STORE macros inlined
// a = clip_8b(a) is replaced with: a = max(a, 0); a = min(a, 255)
// temp0..temp15 holds tmp[0]..tmp[15]
-// A..D - offsets in bytes to load from ref and store to dst buffer
+// A - offset in bytes to load from ref and store to dst buffer
// TEMP0, TEMP4, TEMP8 and TEMP12 - registers for corresponding tmp elements
-#define HORIZONTAL_PASS(A, B, C, D, TEMP0, TEMP4, TEMP8, TEMP12) \
- "addiu %[" #TEMP0 "], %[" #TEMP0 "], 4 \n\t" \
- "addu %[temp16], %[" #TEMP0 "], %[" #TEMP8 "] \n\t" \
- "subu %[temp17], %[" #TEMP0 "], %[" #TEMP8 "] \n\t" \
- "mul %[" #TEMP0 "], %[" #TEMP4 "], %[kC2] \n\t" \
- "mul %[" #TEMP8 "], %[" #TEMP12 "], %[kC1] \n\t" \
- "mul %[" #TEMP4 "], %[" #TEMP4 "], %[kC1] \n\t" \
- "mul %[" #TEMP12 "], %[" #TEMP12 "], %[kC2] \n\t" \
- "sra %[" #TEMP0 "], %[" #TEMP0 "], 16 \n\t" \
- "sra %[" #TEMP8 "], %[" #TEMP8 "], 16 \n\t" \
- "sra %[" #TEMP4 "], %[" #TEMP4 "], 16 \n\t" \
- "sra %[" #TEMP12 "], %[" #TEMP12 "], 16 \n\t" \
- "subu %[temp18], %[" #TEMP0 "], %[" #TEMP8 "] \n\t" \
- "addu %[temp19], %[" #TEMP4 "], %[" #TEMP12 "] \n\t" \
- "addu %[" #TEMP0 "], %[temp16], %[temp19] \n\t" \
- "addu %[" #TEMP4 "], %[temp17], %[temp18] \n\t" \
- "subu %[" #TEMP8 "], %[temp17], %[temp18] \n\t" \
- "subu %[" #TEMP12 "], %[temp16], %[temp19] \n\t" \
- "lw %[temp20], 0(%[args]) \n\t" \
- "sra %[" #TEMP0 "], %[" #TEMP0 "], 3 \n\t" \
- "sra %[" #TEMP4 "], %[" #TEMP4 "], 3 \n\t" \
- "sra %[" #TEMP8 "], %[" #TEMP8 "], 3 \n\t" \
- "sra %[" #TEMP12 "], %[" #TEMP12 "], 3 \n\t" \
- "lbu %[temp16], " #A "(%[temp20]) \n\t" \
- "lbu %[temp17], " #B "(%[temp20]) \n\t" \
- "lbu %[temp18], " #C "(%[temp20]) \n\t" \
- "lbu %[temp19], " #D "(%[temp20]) \n\t" \
- "addu %[" #TEMP0 "], %[temp16], %[" #TEMP0 "] \n\t" \
- "addu %[" #TEMP4 "], %[temp17], %[" #TEMP4 "] \n\t" \
- "addu %[" #TEMP8 "], %[temp18], %[" #TEMP8 "] \n\t" \
- "addu %[" #TEMP12 "], %[temp19], %[" #TEMP12 "] \n\t" \
- "slt %[temp16], %[" #TEMP0 "], $zero \n\t" \
- "slt %[temp17], %[" #TEMP4 "], $zero \n\t" \
- "slt %[temp18], %[" #TEMP8 "], $zero \n\t" \
- "slt %[temp19], %[" #TEMP12 "], $zero \n\t" \
- "movn %[" #TEMP0 "], $zero, %[temp16] \n\t" \
- "movn %[" #TEMP4 "], $zero, %[temp17] \n\t" \
- "movn %[" #TEMP8 "], $zero, %[temp18] \n\t" \
- "movn %[" #TEMP12 "], $zero, %[temp19] \n\t" \
- "addiu %[temp20], $zero, 255 \n\t" \
- "slt %[temp16], %[" #TEMP0 "], %[temp20] \n\t" \
- "slt %[temp17], %[" #TEMP4 "], %[temp20] \n\t" \
- "slt %[temp18], %[" #TEMP8 "], %[temp20] \n\t" \
- "slt %[temp19], %[" #TEMP12 "], %[temp20] \n\t" \
- "movz %[" #TEMP0 "], %[temp20], %[temp16] \n\t" \
- "movz %[" #TEMP4 "], %[temp20], %[temp17] \n\t" \
- "lw %[temp16], 8(%[args]) \n\t" \
- "movz %[" #TEMP8 "], %[temp20], %[temp18] \n\t" \
- "movz %[" #TEMP12 "], %[temp20], %[temp19] \n\t" \
- "sb %[" #TEMP0 "], " #A "(%[temp16]) \n\t" \
- "sb %[" #TEMP4 "], " #B "(%[temp16]) \n\t" \
- "sb %[" #TEMP8 "], " #C "(%[temp16]) \n\t" \
- "sb %[" #TEMP12 "], " #D "(%[temp16]) \n\t"
+#define HORIZONTAL_PASS(A, TEMP0, TEMP4, TEMP8, TEMP12) \
+ "addiu %[" #TEMP0 "], %[" #TEMP0 "], 4 \n\t" \
+ "addu %[temp16], %[" #TEMP0 "], %[" #TEMP8 "] \n\t" \
+ "subu %[temp17], %[" #TEMP0 "], %[" #TEMP8 "] \n\t" \
+ "mul %[" #TEMP0 "], %[" #TEMP4 "], %[kC2] \n\t" \
+ "mul %[" #TEMP8 "], %[" #TEMP12 "], %[kC1] \n\t" \
+ "mul %[" #TEMP4 "], %[" #TEMP4 "], %[kC1] \n\t" \
+ "mul %[" #TEMP12 "], %[" #TEMP12 "], %[kC2] \n\t" \
+ "sra %[" #TEMP0 "], %[" #TEMP0 "], 16 \n\t" \
+ "sra %[" #TEMP8 "], %[" #TEMP8 "], 16 \n\t" \
+ "sra %[" #TEMP4 "], %[" #TEMP4 "], 16 \n\t" \
+ "sra %[" #TEMP12 "], %[" #TEMP12 "], 16 \n\t" \
+ "subu %[temp18], %[" #TEMP0 "], %[" #TEMP8 "] \n\t" \
+ "addu %[temp19], %[" #TEMP4 "], %[" #TEMP12 "] \n\t" \
+ "addu %[" #TEMP0 "], %[temp16], %[temp19] \n\t" \
+ "addu %[" #TEMP4 "], %[temp17], %[temp18] \n\t" \
+ "subu %[" #TEMP8 "], %[temp17], %[temp18] \n\t" \
+ "subu %[" #TEMP12 "], %[temp16], %[temp19] \n\t" \
+ "lw %[temp20], 0(%[args]) \n\t" \
+ "sra %[" #TEMP0 "], %[" #TEMP0 "], 3 \n\t" \
+ "sra %[" #TEMP4 "], %[" #TEMP4 "], 3 \n\t" \
+ "sra %[" #TEMP8 "], %[" #TEMP8 "], 3 \n\t" \
+ "sra %[" #TEMP12 "], %[" #TEMP12 "], 3 \n\t" \
+ "lbu %[temp16], 0+" XSTR(BPS) "*" #A "(%[temp20]) \n\t" \
+ "lbu %[temp17], 1+" XSTR(BPS) "*" #A "(%[temp20]) \n\t" \
+ "lbu %[temp18], 2+" XSTR(BPS) "*" #A "(%[temp20]) \n\t" \
+ "lbu %[temp19], 3+" XSTR(BPS) "*" #A "(%[temp20]) \n\t" \
+ "addu %[" #TEMP0 "], %[temp16], %[" #TEMP0 "] \n\t" \
+ "addu %[" #TEMP4 "], %[temp17], %[" #TEMP4 "] \n\t" \
+ "addu %[" #TEMP8 "], %[temp18], %[" #TEMP8 "] \n\t" \
+ "addu %[" #TEMP12 "], %[temp19], %[" #TEMP12 "] \n\t" \
+ "slt %[temp16], %[" #TEMP0 "], $zero \n\t" \
+ "slt %[temp17], %[" #TEMP4 "], $zero \n\t" \
+ "slt %[temp18], %[" #TEMP8 "], $zero \n\t" \
+ "slt %[temp19], %[" #TEMP12 "], $zero \n\t" \
+ "movn %[" #TEMP0 "], $zero, %[temp16] \n\t" \
+ "movn %[" #TEMP4 "], $zero, %[temp17] \n\t" \
+ "movn %[" #TEMP8 "], $zero, %[temp18] \n\t" \
+ "movn %[" #TEMP12 "], $zero, %[temp19] \n\t" \
+ "addiu %[temp20], $zero, 255 \n\t" \
+ "slt %[temp16], %[" #TEMP0 "], %[temp20] \n\t" \
+ "slt %[temp17], %[" #TEMP4 "], %[temp20] \n\t" \
+ "slt %[temp18], %[" #TEMP8 "], %[temp20] \n\t" \
+ "slt %[temp19], %[" #TEMP12 "], %[temp20] \n\t" \
+ "movz %[" #TEMP0 "], %[temp20], %[temp16] \n\t" \
+ "movz %[" #TEMP4 "], %[temp20], %[temp17] \n\t" \
+ "lw %[temp16], 8(%[args]) \n\t" \
+ "movz %[" #TEMP8 "], %[temp20], %[temp18] \n\t" \
+ "movz %[" #TEMP12 "], %[temp20], %[temp19] \n\t" \
+ "sb %[" #TEMP0 "], 0+" XSTR(BPS) "*" #A "(%[temp16]) \n\t" \
+ "sb %[" #TEMP4 "], 1+" XSTR(BPS) "*" #A "(%[temp16]) \n\t" \
+ "sb %[" #TEMP8 "], 2+" XSTR(BPS) "*" #A "(%[temp16]) \n\t" \
+ "sb %[" #TEMP12 "], 3+" XSTR(BPS) "*" #A "(%[temp16]) \n\t"
// Does one or two inverse transforms.
static WEBP_INLINE void ITransformOne(const uint8_t* ref, const int16_t* in,
@@ -130,10 +127,10 @@ static WEBP_INLINE void ITransformOne(const uint8_t* ref, const int16_t* in,
VERTICAL_PASS(4, 20, 12, 28, temp12, temp8, temp9, temp10, temp11)
VERTICAL_PASS(6, 22, 14, 30, temp20, temp12, temp13, temp14, temp15)
- HORIZONTAL_PASS( 0, 1, 2, 3, temp0, temp4, temp8, temp12)
- HORIZONTAL_PASS(16, 17, 18, 19, temp1, temp5, temp9, temp13)
- HORIZONTAL_PASS(32, 33, 34, 35, temp2, temp6, temp10, temp14)
- HORIZONTAL_PASS(48, 49, 50, 51, temp3, temp7, temp11, temp15)
+ HORIZONTAL_PASS(0, temp0, temp4, temp8, temp12)
+ HORIZONTAL_PASS(1, temp1, temp5, temp9, temp13)
+ HORIZONTAL_PASS(2, temp2, temp6, temp10, temp14)
+ HORIZONTAL_PASS(3, temp3, temp7, temp11, temp15)
: [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
[temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
@@ -241,46 +238,54 @@ static int QuantizeBlock(int16_t in[16], int16_t out[16],
return 0;
}
+static int Quantize2Blocks(int16_t in[32], int16_t out[32],
+ const VP8Matrix* const mtx) {
+ int nz;
+ nz = QuantizeBlock(in + 0 * 16, out + 0 * 16, mtx) << 0;
+ nz |= QuantizeBlock(in + 1 * 16, out + 1 * 16, mtx) << 1;
+ return nz;
+}
+
#undef QUANTIZE_ONE
// macro for one horizontal pass in Disto4x4 (TTransform)
// two calls of function TTransform are merged into single one
-// A..D - offsets in bytes to load from a and b buffers
+// A - offset in bytes to load from a and b buffers
// E..H - offsets in bytes to store first results to tmp buffer
// E1..H1 - offsets in bytes to store second results to tmp buffer
-#define HORIZONTAL_PASS(A, B, C, D, E, F, G, H, E1, F1, G1, H1) \
- "lbu %[temp0], " #A "(%[a]) \n\t" \
- "lbu %[temp1], " #B "(%[a]) \n\t" \
- "lbu %[temp2], " #C "(%[a]) \n\t" \
- "lbu %[temp3], " #D "(%[a]) \n\t" \
- "lbu %[temp4], " #A "(%[b]) \n\t" \
- "lbu %[temp5], " #B "(%[b]) \n\t" \
- "lbu %[temp6], " #C "(%[b]) \n\t" \
- "lbu %[temp7], " #D "(%[b]) \n\t" \
- "addu %[temp8], %[temp0], %[temp2] \n\t" \
- "subu %[temp0], %[temp0], %[temp2] \n\t" \
- "addu %[temp2], %[temp1], %[temp3] \n\t" \
- "subu %[temp1], %[temp1], %[temp3] \n\t" \
- "addu %[temp3], %[temp4], %[temp6] \n\t" \
- "subu %[temp4], %[temp4], %[temp6] \n\t" \
- "addu %[temp6], %[temp5], %[temp7] \n\t" \
- "subu %[temp5], %[temp5], %[temp7] \n\t" \
- "addu %[temp7], %[temp8], %[temp2] \n\t" \
- "subu %[temp2], %[temp8], %[temp2] \n\t" \
- "addu %[temp8], %[temp0], %[temp1] \n\t" \
- "subu %[temp0], %[temp0], %[temp1] \n\t" \
- "addu %[temp1], %[temp3], %[temp6] \n\t" \
- "subu %[temp3], %[temp3], %[temp6] \n\t" \
- "addu %[temp6], %[temp4], %[temp5] \n\t" \
- "subu %[temp4], %[temp4], %[temp5] \n\t" \
- "sw %[temp7], " #E "(%[tmp]) \n\t" \
- "sw %[temp2], " #H "(%[tmp]) \n\t" \
- "sw %[temp8], " #F "(%[tmp]) \n\t" \
- "sw %[temp0], " #G "(%[tmp]) \n\t" \
- "sw %[temp1], " #E1 "(%[tmp]) \n\t" \
- "sw %[temp3], " #H1 "(%[tmp]) \n\t" \
- "sw %[temp6], " #F1 "(%[tmp]) \n\t" \
- "sw %[temp4], " #G1 "(%[tmp]) \n\t"
+#define HORIZONTAL_PASS(A, E, F, G, H, E1, F1, G1, H1) \
+ "lbu %[temp0], 0+" XSTR(BPS) "*" #A "(%[a]) \n\t" \
+ "lbu %[temp1], 1+" XSTR(BPS) "*" #A "(%[a]) \n\t" \
+ "lbu %[temp2], 2+" XSTR(BPS) "*" #A "(%[a]) \n\t" \
+ "lbu %[temp3], 3+" XSTR(BPS) "*" #A "(%[a]) \n\t" \
+ "lbu %[temp4], 0+" XSTR(BPS) "*" #A "(%[b]) \n\t" \
+ "lbu %[temp5], 1+" XSTR(BPS) "*" #A "(%[b]) \n\t" \
+ "lbu %[temp6], 2+" XSTR(BPS) "*" #A "(%[b]) \n\t" \
+ "lbu %[temp7], 3+" XSTR(BPS) "*" #A "(%[b]) \n\t" \
+ "addu %[temp8], %[temp0], %[temp2] \n\t" \
+ "subu %[temp0], %[temp0], %[temp2] \n\t" \
+ "addu %[temp2], %[temp1], %[temp3] \n\t" \
+ "subu %[temp1], %[temp1], %[temp3] \n\t" \
+ "addu %[temp3], %[temp4], %[temp6] \n\t" \
+ "subu %[temp4], %[temp4], %[temp6] \n\t" \
+ "addu %[temp6], %[temp5], %[temp7] \n\t" \
+ "subu %[temp5], %[temp5], %[temp7] \n\t" \
+ "addu %[temp7], %[temp8], %[temp2] \n\t" \
+ "subu %[temp2], %[temp8], %[temp2] \n\t" \
+ "addu %[temp8], %[temp0], %[temp1] \n\t" \
+ "subu %[temp0], %[temp0], %[temp1] \n\t" \
+ "addu %[temp1], %[temp3], %[temp6] \n\t" \
+ "subu %[temp3], %[temp3], %[temp6] \n\t" \
+ "addu %[temp6], %[temp4], %[temp5] \n\t" \
+ "subu %[temp4], %[temp4], %[temp5] \n\t" \
+ "sw %[temp7], " #E "(%[tmp]) \n\t" \
+ "sw %[temp2], " #H "(%[tmp]) \n\t" \
+ "sw %[temp8], " #F "(%[tmp]) \n\t" \
+ "sw %[temp0], " #G "(%[tmp]) \n\t" \
+ "sw %[temp1], " #E1 "(%[tmp]) \n\t" \
+ "sw %[temp3], " #H1 "(%[tmp]) \n\t" \
+ "sw %[temp6], " #F1 "(%[tmp]) \n\t" \
+ "sw %[temp4], " #G1 "(%[tmp]) \n\t"
// macro for one vertical pass in Disto4x4 (TTransform)
// two calls of function TTransform are merged into single one
@@ -362,10 +367,10 @@ static int Disto4x4(const uint8_t* const a, const uint8_t* const b,
int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8;
__asm__ volatile(
- HORIZONTAL_PASS( 0, 1, 2, 3, 0, 4, 8, 12, 64, 68, 72, 76)
- HORIZONTAL_PASS(16, 17, 18, 19, 16, 20, 24, 28, 80, 84, 88, 92)
- HORIZONTAL_PASS(32, 33, 34, 35, 32, 36, 40, 44, 96, 100, 104, 108)
- HORIZONTAL_PASS(48, 49, 50, 51, 48, 52, 56, 60, 112, 116, 120, 124)
+ HORIZONTAL_PASS(0, 0, 4, 8, 12, 64, 68, 72, 76)
+ HORIZONTAL_PASS(1, 16, 20, 24, 28, 80, 84, 88, 92)
+ HORIZONTAL_PASS(2, 32, 36, 40, 44, 96, 100, 104, 108)
+ HORIZONTAL_PASS(3, 48, 52, 56, 60, 112, 116, 120, 124)
"mthi $zero \n\t"
"mtlo $zero \n\t"
VERTICAL_PASS( 0, 16, 32, 48, 64, 80, 96, 112, 0, 8, 16, 24)
@@ -405,41 +410,41 @@ static int Disto16x16(const uint8_t* const a, const uint8_t* const b,
// macro for one horizontal pass in FTransform
// temp0..temp15 holds tmp[0]..tmp[15]
-// A..D - offsets in bytes to load from src and ref buffers
+// A - offset in bytes to load from src and ref buffers
// TEMP0..TEMP3 - registers for corresponding tmp elements
-#define HORIZONTAL_PASS(A, B, C, D, TEMP0, TEMP1, TEMP2, TEMP3) \
- "lw %[" #TEMP1 "], 0(%[args]) \n\t" \
- "lw %[" #TEMP2 "], 4(%[args]) \n\t" \
- "lbu %[temp16], " #A "(%[" #TEMP1 "]) \n\t" \
- "lbu %[temp17], " #A "(%[" #TEMP2 "]) \n\t" \
- "lbu %[temp18], " #B "(%[" #TEMP1 "]) \n\t" \
- "lbu %[temp19], " #B "(%[" #TEMP2 "]) \n\t" \
- "subu %[temp20], %[temp16], %[temp17] \n\t" \
- "lbu %[temp16], " #C "(%[" #TEMP1 "]) \n\t" \
- "lbu %[temp17], " #C "(%[" #TEMP2 "]) \n\t" \
- "subu %[" #TEMP0 "], %[temp18], %[temp19] \n\t" \
- "lbu %[temp18], " #D "(%[" #TEMP1 "]) \n\t" \
- "lbu %[temp19], " #D "(%[" #TEMP2 "]) \n\t" \
- "subu %[" #TEMP1 "], %[temp16], %[temp17] \n\t" \
- "subu %[" #TEMP2 "], %[temp18], %[temp19] \n\t" \
- "addu %[" #TEMP3 "], %[temp20], %[" #TEMP2 "] \n\t" \
- "subu %[" #TEMP2 "], %[temp20], %[" #TEMP2 "] \n\t" \
- "addu %[temp20], %[" #TEMP0 "], %[" #TEMP1 "] \n\t" \
- "subu %[" #TEMP0 "], %[" #TEMP0 "], %[" #TEMP1 "] \n\t" \
- "mul %[temp16], %[" #TEMP2 "], %[c5352] \n\t" \
- "mul %[temp17], %[" #TEMP2 "], %[c2217] \n\t" \
- "mul %[temp18], %[" #TEMP0 "], %[c5352] \n\t" \
- "mul %[temp19], %[" #TEMP0 "], %[c2217] \n\t" \
- "addu %[" #TEMP1 "], %[" #TEMP3 "], %[temp20] \n\t" \
- "subu %[temp20], %[" #TEMP3 "], %[temp20] \n\t" \
- "sll %[" #TEMP0 "], %[" #TEMP1 "], 3 \n\t" \
- "sll %[" #TEMP2 "], %[temp20], 3 \n\t" \
- "addiu %[temp16], %[temp16], 1812 \n\t" \
- "addiu %[temp17], %[temp17], 937 \n\t" \
- "addu %[temp16], %[temp16], %[temp19] \n\t" \
- "subu %[temp17], %[temp17], %[temp18] \n\t" \
- "sra %[" #TEMP1 "], %[temp16], 9 \n\t" \
- "sra %[" #TEMP3 "], %[temp17], 9 \n\t"
+#define HORIZONTAL_PASS(A, TEMP0, TEMP1, TEMP2, TEMP3) \
+ "lw %[" #TEMP1 "], 0(%[args]) \n\t" \
+ "lw %[" #TEMP2 "], 4(%[args]) \n\t" \
+ "lbu %[temp16], 0+" XSTR(BPS) "*" #A "(%[" #TEMP1 "]) \n\t" \
+ "lbu %[temp17], 0+" XSTR(BPS) "*" #A "(%[" #TEMP2 "]) \n\t" \
+ "lbu %[temp18], 1+" XSTR(BPS) "*" #A "(%[" #TEMP1 "]) \n\t" \
+ "lbu %[temp19], 1+" XSTR(BPS) "*" #A "(%[" #TEMP2 "]) \n\t" \
+ "subu %[temp20], %[temp16], %[temp17] \n\t" \
+ "lbu %[temp16], 2+" XSTR(BPS) "*" #A "(%[" #TEMP1 "]) \n\t" \
+ "lbu %[temp17], 2+" XSTR(BPS) "*" #A "(%[" #TEMP2 "]) \n\t" \
+ "subu %[" #TEMP0 "], %[temp18], %[temp19] \n\t" \
+ "lbu %[temp18], 3+" XSTR(BPS) "*" #A "(%[" #TEMP1 "]) \n\t" \
+ "lbu %[temp19], 3+" XSTR(BPS) "*" #A "(%[" #TEMP2 "]) \n\t" \
+ "subu %[" #TEMP1 "], %[temp16], %[temp17] \n\t" \
+ "subu %[" #TEMP2 "], %[temp18], %[temp19] \n\t" \
+ "addu %[" #TEMP3 "], %[temp20], %[" #TEMP2 "] \n\t" \
+ "subu %[" #TEMP2 "], %[temp20], %[" #TEMP2 "] \n\t" \
+ "addu %[temp20], %[" #TEMP0 "], %[" #TEMP1 "] \n\t" \
+ "subu %[" #TEMP0 "], %[" #TEMP0 "], %[" #TEMP1 "] \n\t" \
+ "mul %[temp16], %[" #TEMP2 "], %[c5352] \n\t" \
+ "mul %[temp17], %[" #TEMP2 "], %[c2217] \n\t" \
+ "mul %[temp18], %[" #TEMP0 "], %[c5352] \n\t" \
+ "mul %[temp19], %[" #TEMP0 "], %[c2217] \n\t" \
+ "addu %[" #TEMP1 "], %[" #TEMP3 "], %[temp20] \n\t" \
+ "subu %[temp20], %[" #TEMP3 "], %[temp20] \n\t" \
+ "sll %[" #TEMP0 "], %[" #TEMP1 "], 3 \n\t" \
+ "sll %[" #TEMP2 "], %[temp20], 3 \n\t" \
+ "addiu %[temp16], %[temp16], 1812 \n\t" \
+ "addiu %[temp17], %[temp17], 937 \n\t" \
+ "addu %[temp16], %[temp16], %[temp19] \n\t" \
+ "subu %[temp17], %[temp17], %[temp18] \n\t" \
+ "sra %[" #TEMP1 "], %[temp16], 9 \n\t" \
+ "sra %[" #TEMP3 "], %[temp17], 9 \n\t"
// macro for one vertical pass in FTransform
// temp0..temp15 holds tmp[0]..tmp[15]
@@ -483,10 +488,10 @@ static void FTransform(const uint8_t* src, const uint8_t* ref, int16_t* out) {
{ (const int*)src, (const int*)ref, (const int*)out };
__asm__ volatile(
- HORIZONTAL_PASS( 0, 1, 2, 3, temp0, temp1, temp2, temp3)
- HORIZONTAL_PASS(16, 17, 18, 19, temp4, temp5, temp6, temp7)
- HORIZONTAL_PASS(32, 33, 34, 35, temp8, temp9, temp10, temp11)
- HORIZONTAL_PASS(48, 49, 50, 51, temp12, temp13, temp14, temp15)
+ HORIZONTAL_PASS(0, temp0, temp1, temp2, temp3)
+ HORIZONTAL_PASS(1, temp4, temp5, temp6, temp7)
+ HORIZONTAL_PASS(2, temp8, temp9, temp10, temp11)
+ HORIZONTAL_PASS(3, temp12, temp13, temp14, temp15)
"lw %[temp20], 8(%[args]) \n\t"
VERTICAL_PASS(0, 8, 16, 24, temp0, temp4, temp8, temp12)
VERTICAL_PASS(2, 10, 18, 26, temp1, temp5, temp9, temp13)
@@ -508,118 +513,7 @@ static void FTransform(const uint8_t* src, const uint8_t* ref, int16_t* out) {
#undef VERTICAL_PASS
#undef HORIZONTAL_PASS
-// Forward declaration.
-extern int VP8GetResidualCostMIPS32(int ctx0, const VP8Residual* const res);
-
-int VP8GetResidualCostMIPS32(int ctx0, const VP8Residual* const res) {
- int n = res->first;
- // should be prob[VP8EncBands[n]], but it's equivalent for n=0 or 1
- int p0 = res->prob[n][ctx0][0];
- const uint16_t* t = res->cost[n][ctx0];
- int cost;
- const int const_2 = 2;
- const int const_255 = 255;
- const int const_max_level = MAX_VARIABLE_LEVEL;
- int res_cost;
- int res_prob;
- int res_coeffs;
- int res_last;
- int v_reg;
- int b_reg;
- int ctx_reg;
- int cost_add, temp_1, temp_2, temp_3;
-
- if (res->last < 0) {
- return VP8BitCost(0, p0);
- }
-
- cost = (ctx0 == 0) ? VP8BitCost(1, p0) : 0;
-
- res_cost = (int)res->cost;
- res_prob = (int)res->prob;
- res_coeffs = (int)res->coeffs;
- res_last = (int)res->last;
-
- __asm__ volatile(
- ".set push \n\t"
- ".set noreorder \n\t"
-
- "sll %[temp_1], %[n], 1 \n\t"
- "addu %[res_coeffs], %[res_coeffs], %[temp_1] \n\t"
- "slt %[temp_2], %[n], %[res_last] \n\t"
- "bnez %[temp_2], 1f \n\t"
- " li %[cost_add], 0 \n\t"
- "b 2f \n\t"
- " nop \n\t"
- "1: \n\t"
- "lh %[v_reg], 0(%[res_coeffs]) \n\t"
- "addu %[b_reg], %[n], %[VP8EncBands] \n\t"
- "move %[temp_1], %[const_max_level] \n\t"
- "addu %[cost], %[cost], %[cost_add] \n\t"
- "negu %[temp_2], %[v_reg] \n\t"
- "slti %[temp_3], %[v_reg], 0 \n\t"
- "movn %[v_reg], %[temp_2], %[temp_3] \n\t"
- "lbu %[b_reg], 1(%[b_reg]) \n\t"
- "li %[cost_add], 0 \n\t"
-
- "sltiu %[temp_3], %[v_reg], 2 \n\t"
- "move %[ctx_reg], %[v_reg] \n\t"
- "movz %[ctx_reg], %[const_2], %[temp_3] \n\t"
- // cost += VP8LevelCost(t, v);
- "slt %[temp_3], %[v_reg], %[const_max_level] \n\t"
- "movn %[temp_1], %[v_reg], %[temp_3] \n\t"
- "sll %[temp_2], %[v_reg], 1 \n\t"
- "addu %[temp_2], %[temp_2], %[VP8LevelFixedCosts] \n\t"
- "lhu %[temp_2], 0(%[temp_2]) \n\t"
- "sll %[temp_1], %[temp_1], 1 \n\t"
- "addu %[temp_1], %[temp_1], %[t] \n\t"
- "lhu %[temp_3], 0(%[temp_1]) \n\t"
- "addu %[cost], %[cost], %[temp_2] \n\t"
-
- // t = res->cost[b][ctx];
- "sll %[temp_1], %[ctx_reg], 7 \n\t"
- "sll %[temp_2], %[ctx_reg], 3 \n\t"
- "addu %[cost], %[cost], %[temp_3] \n\t"
- "addu %[temp_1], %[temp_1], %[temp_2] \n\t"
- "sll %[temp_2], %[b_reg], 3 \n\t"
- "sll %[temp_3], %[b_reg], 5 \n\t"
- "sub %[temp_2], %[temp_3], %[temp_2] \n\t"
- "sll %[temp_3], %[temp_2], 4 \n\t"
- "addu %[temp_1], %[temp_1], %[temp_3] \n\t"
- "addu %[temp_2], %[temp_2], %[res_cost] \n\t"
- "addiu %[n], %[n], 1 \n\t"
- "addu %[t], %[temp_1], %[temp_2] \n\t"
- "slt %[temp_1], %[n], %[res_last] \n\t"
- "bnez %[temp_1], 1b \n\t"
- " addiu %[res_coeffs], %[res_coeffs], 2 \n\t"
- "2: \n\t"
-
- ".set pop \n\t"
- : [cost]"+r"(cost), [t]"+r"(t), [n]"+r"(n), [v_reg]"=&r"(v_reg),
- [ctx_reg]"=&r"(ctx_reg), [b_reg]"=&r"(b_reg), [cost_add]"=&r"(cost_add),
- [temp_1]"=&r"(temp_1), [temp_2]"=&r"(temp_2), [temp_3]"=&r"(temp_3)
- : [const_2]"r"(const_2), [const_255]"r"(const_255), [res_last]"r"(res_last),
- [VP8EntropyCost]"r"(VP8EntropyCost), [VP8EncBands]"r"(VP8EncBands),
- [const_max_level]"r"(const_max_level), [res_prob]"r"(res_prob),
- [VP8LevelFixedCosts]"r"(VP8LevelFixedCosts), [res_coeffs]"r"(res_coeffs),
- [res_cost]"r"(res_cost)
- : "memory"
- );
-
- // Last coefficient is always non-zero
- {
- const int v = abs(res->coeffs[n]);
- assert(v != 0);
- cost += VP8LevelCost(t, v);
- if (n < 15) {
- const int b = VP8EncBands[n + 1];
- const int ctx = (v == 1) ? 1 : 2;
- const int last_p0 = res->prob[b][ctx][0];
- cost += VP8BitCost(0, last_p0);
- }
- }
- return cost;
-}
+#if !defined(WORK_AROUND_GCC)
#define GET_SSE_INNER(A, B, C, D) \
"lbu %[temp0], " #A "(%[a]) \n\t" \
@@ -645,7 +539,6 @@ int VP8GetResidualCostMIPS32(int ctx0, const VP8Residual* const res) {
GET_SSE_INNER(C, C + 1, C + 2, C + 3) \
GET_SSE_INNER(D, D + 1, D + 2, D + 3)
-#if !defined(WORK_AROUND_GCC)
static int SSE16x16(const uint8_t* a, const uint8_t* b) {
int count;
int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
@@ -653,29 +546,29 @@ static int SSE16x16(const uint8_t* a, const uint8_t* b) {
__asm__ volatile(
"mult $zero, $zero \n\t"
- GET_SSE( 0, 4, 8, 12)
- GET_SSE( 16, 20, 24, 28)
- GET_SSE( 32, 36, 40, 44)
- GET_SSE( 48, 52, 56, 60)
- GET_SSE( 64, 68, 72, 76)
- GET_SSE( 80, 84, 88, 92)
- GET_SSE( 96, 100, 104, 108)
- GET_SSE(112, 116, 120, 124)
- GET_SSE(128, 132, 136, 140)
- GET_SSE(144, 148, 152, 156)
- GET_SSE(160, 164, 168, 172)
- GET_SSE(176, 180, 184, 188)
- GET_SSE(192, 196, 200, 204)
- GET_SSE(208, 212, 216, 220)
- GET_SSE(224, 228, 232, 236)
- GET_SSE(240, 244, 248, 252)
+ GET_SSE( 0 * BPS, 4 + 0 * BPS, 8 + 0 * BPS, 12 + 0 * BPS)
+ GET_SSE( 1 * BPS, 4 + 1 * BPS, 8 + 1 * BPS, 12 + 1 * BPS)
+ GET_SSE( 2 * BPS, 4 + 2 * BPS, 8 + 2 * BPS, 12 + 2 * BPS)
+ GET_SSE( 3 * BPS, 4 + 3 * BPS, 8 + 3 * BPS, 12 + 3 * BPS)
+ GET_SSE( 4 * BPS, 4 + 4 * BPS, 8 + 4 * BPS, 12 + 4 * BPS)
+ GET_SSE( 5 * BPS, 4 + 5 * BPS, 8 + 5 * BPS, 12 + 5 * BPS)
+ GET_SSE( 6 * BPS, 4 + 6 * BPS, 8 + 6 * BPS, 12 + 6 * BPS)
+ GET_SSE( 7 * BPS, 4 + 7 * BPS, 8 + 7 * BPS, 12 + 7 * BPS)
+ GET_SSE( 8 * BPS, 4 + 8 * BPS, 8 + 8 * BPS, 12 + 8 * BPS)
+ GET_SSE( 9 * BPS, 4 + 9 * BPS, 8 + 9 * BPS, 12 + 9 * BPS)
+ GET_SSE(10 * BPS, 4 + 10 * BPS, 8 + 10 * BPS, 12 + 10 * BPS)
+ GET_SSE(11 * BPS, 4 + 11 * BPS, 8 + 11 * BPS, 12 + 11 * BPS)
+ GET_SSE(12 * BPS, 4 + 12 * BPS, 8 + 12 * BPS, 12 + 12 * BPS)
+ GET_SSE(13 * BPS, 4 + 13 * BPS, 8 + 13 * BPS, 12 + 13 * BPS)
+ GET_SSE(14 * BPS, 4 + 14 * BPS, 8 + 14 * BPS, 12 + 14 * BPS)
+ GET_SSE(15 * BPS, 4 + 15 * BPS, 8 + 15 * BPS, 12 + 15 * BPS)
"mflo %[count] \n\t"
: [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
[temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
[temp6]"=&r"(temp6), [temp7]"=&r"(temp7), [count]"=&r"(count)
: [a]"r"(a), [b]"r"(b)
- : "memory", "hi" , "lo"
+ : "memory", "hi", "lo"
);
return count;
}
@@ -687,21 +580,21 @@ static int SSE16x8(const uint8_t* a, const uint8_t* b) {
__asm__ volatile(
"mult $zero, $zero \n\t"
- GET_SSE( 0, 4, 8, 12)
- GET_SSE( 16, 20, 24, 28)
- GET_SSE( 32, 36, 40, 44)
- GET_SSE( 48, 52, 56, 60)
- GET_SSE( 64, 68, 72, 76)
- GET_SSE( 80, 84, 88, 92)
- GET_SSE( 96, 100, 104, 108)
- GET_SSE(112, 116, 120, 124)
+ GET_SSE( 0 * BPS, 4 + 0 * BPS, 8 + 0 * BPS, 12 + 0 * BPS)
+ GET_SSE( 1 * BPS, 4 + 1 * BPS, 8 + 1 * BPS, 12 + 1 * BPS)
+ GET_SSE( 2 * BPS, 4 + 2 * BPS, 8 + 2 * BPS, 12 + 2 * BPS)
+ GET_SSE( 3 * BPS, 4 + 3 * BPS, 8 + 3 * BPS, 12 + 3 * BPS)
+ GET_SSE( 4 * BPS, 4 + 4 * BPS, 8 + 4 * BPS, 12 + 4 * BPS)
+ GET_SSE( 5 * BPS, 4 + 5 * BPS, 8 + 5 * BPS, 12 + 5 * BPS)
+ GET_SSE( 6 * BPS, 4 + 6 * BPS, 8 + 6 * BPS, 12 + 6 * BPS)
+ GET_SSE( 7 * BPS, 4 + 7 * BPS, 8 + 7 * BPS, 12 + 7 * BPS)
"mflo %[count] \n\t"
: [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
[temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
[temp6]"=&r"(temp6), [temp7]"=&r"(temp7), [count]"=&r"(count)
: [a]"r"(a), [b]"r"(b)
- : "memory", "hi" , "lo"
+ : "memory", "hi", "lo"
);
return count;
}
@@ -713,17 +606,17 @@ static int SSE8x8(const uint8_t* a, const uint8_t* b) {
__asm__ volatile(
"mult $zero, $zero \n\t"
- GET_SSE( 0, 4, 16, 20)
- GET_SSE(32, 36, 48, 52)
- GET_SSE(64, 68, 80, 84)
- GET_SSE(96, 100, 112, 116)
+ GET_SSE(0 * BPS, 4 + 0 * BPS, 1 * BPS, 4 + 1 * BPS)
+ GET_SSE(2 * BPS, 4 + 2 * BPS, 3 * BPS, 4 + 3 * BPS)
+ GET_SSE(4 * BPS, 4 + 4 * BPS, 5 * BPS, 4 + 5 * BPS)
+ GET_SSE(6 * BPS, 4 + 6 * BPS, 7 * BPS, 4 + 7 * BPS)
"mflo %[count] \n\t"
: [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
[temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
[temp6]"=&r"(temp6), [temp7]"=&r"(temp7), [count]"=&r"(count)
: [a]"r"(a), [b]"r"(b)
- : "memory", "hi" , "lo"
+ : "memory", "hi", "lo"
);
return count;
}
@@ -735,42 +628,45 @@ static int SSE4x4(const uint8_t* a, const uint8_t* b) {
__asm__ volatile(
"mult $zero, $zero \n\t"
- GET_SSE(0, 16, 32, 48)
+ GET_SSE(0 * BPS, 1 * BPS, 2 * BPS, 3 * BPS)
"mflo %[count] \n\t"
: [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
[temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
[temp6]"=&r"(temp6), [temp7]"=&r"(temp7), [count]"=&r"(count)
: [a]"r"(a), [b]"r"(b)
- : "memory", "hi" , "lo"
+ : "memory", "hi", "lo"
);
return count;
}
-#endif // WORK_AROUND_GCC
+#undef GET_SSE
+#undef GET_SSE_INNER
-#undef GET_SSE_MIPS32
-#undef GET_SSE_MIPS32_INNER
-
-#endif // WEBP_USE_MIPS32
+#endif // !WORK_AROUND_GCC
//------------------------------------------------------------------------------
// Entry point
extern void VP8EncDspInitMIPS32(void);
-void VP8EncDspInitMIPS32(void) {
-#if defined(WEBP_USE_MIPS32)
+WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspInitMIPS32(void) {
VP8ITransform = ITransform;
+ VP8FTransform = FTransform;
VP8EncQuantizeBlock = QuantizeBlock;
+ VP8EncQuantize2Blocks = Quantize2Blocks;
VP8TDisto4x4 = Disto4x4;
VP8TDisto16x16 = Disto16x16;
- VP8FTransform = FTransform;
#if !defined(WORK_AROUND_GCC)
VP8SSE16x16 = SSE16x16;
VP8SSE8x8 = SSE8x8;
VP8SSE16x8 = SSE16x8;
VP8SSE4x4 = SSE4x4;
#endif
-#endif // WEBP_USE_MIPS32
}
+
+#else // !WEBP_USE_MIPS32
+
+WEBP_DSP_INIT_STUB(VP8EncDspInitMIPS32)
+
+#endif // WEBP_USE_MIPS32