summaryrefslogtreecommitdiffstats
path: root/src/gui/painting/qdrawhelper_mips_dsp_asm.S
diff options
context:
space:
mode:
Diffstat (limited to 'src/gui/painting/qdrawhelper_mips_dsp_asm.S')
-rw-r--r--src/gui/painting/qdrawhelper_mips_dsp_asm.S1303
1 files changed, 1241 insertions, 62 deletions
diff --git a/src/gui/painting/qdrawhelper_mips_dsp_asm.S b/src/gui/painting/qdrawhelper_mips_dsp_asm.S
index f426905aad..58cc176529 100644
--- a/src/gui/painting/qdrawhelper_mips_dsp_asm.S
+++ b/src/gui/painting/qdrawhelper_mips_dsp_asm.S
@@ -41,67 +41,6 @@
#include "qt_mips_asm_dsp.h"
-LEAF_MIPS_DSP(INTERPOLATE_PIXEL_255_asm_mips_dsp)
-/*
- * a0 - uint x (First value to multiply)
- * a1 - uint a (Multiplicator byte for first value)
- * a2 - uint y (Second value to multiply)
- * a3 - uint b (Multiplicator byte for second value)
- */
-
- .set reorder
- li t4, 8388736
- preceu.ph.qbra t0, a0 /* (x & 0xff00ff) */
- mul t0, t0, a1 /* (x & 0xff00ff) * a */
- preceu.ph.qbra t1, a2 /* (y & 0xff00ff) */
- mul t1, t1, a3 /* (y & 0xff00ff) * b */
- addu t0, t0, t1 /* (x & 0xff00ff) * a +
- * (y & 0xff00ff) * b
- */
- preceu.ph.qbla t1, t0 /* (t >> 8) & 0xff00ff */
- addu t0, t0, t1 /* t + ((t >> 8) & 0xff00ff */
- addu t0, t0, t4 /* t + ((t >> 8) & 0xff00ff) + 0x800080 */
- preceu.ph.qbla t0, t0 /* t >> 8 and t&=0xff00ff */
- preceu.ph.qbla t2, a0 /* (x>>8) & 0xff00ff */
- mul t2, t2, a1 /* ((x>>8) & 0xff00ff) * a */
- preceu.ph.qbla t3, a2 /* ((y>>8) & 0xff00ff) */
- mul t3, t3, a3 /* ((y>>8) & 0xff00ff) * b */
- addu t2, t2, t3 /* ((x>>8) & 0xff00ff) * a +
- * ((y >> 8) & 0xff00ff) * b
- */
- preceu.ph.qbla t3, t2 /* (x>>8) & 0xff00ff */
- addu t2, t2, t3 /* (x>>8) & 0xff00ff) + 0x800080 */
- addu t2, t2, t4 /* x + ((x>>8) & 0xff00ff) + 0x800080 */
- and t2, t2, 0xff00ff00
- or t1, t0, t2
- move v0, t1
- j ra
-
-END(INTERPOLATE_PIXEL_255_asm_mips_dsp)
-
-LEAF_MIPS_DSP(BYTE_MUL_asm_mips_dsp)
-/*
- * a0 - uint x (Value to multiply)
- * a1 - uint a (Multiplicator byte)
- */
-
- .set reorder
- replv.ph a1, a1 /* a1 = 0x00a00a */
- li t4, 8388736 /* t4 = 0x800080 */
- muleu_s.ph.qbl t0, a0, a1
- muleu_s.ph.qbr t2, a0, a1
- preceu.ph.qbla t1, t0
- addu t0, t0, t1
- addu t0, t0, t4
- preceu.ph.qbla t3, t2
- addu t2, t2, t3
- addu t2, t2, t4
- precrq.qb.ph t4, t0, t2
- move v0, t4
- j ra
-
-END(BYTE_MUL_asm_mips_dsp)
-
LEAF_MIPS_DSP(destfetchARGB32_asm_mips_dsp)
/*
* a0 - buffer address (dst)
@@ -349,7 +288,7 @@ LEAF_MIPS_DSP(comp_func_SourceOver_asm_mips_dsp)
END(comp_func_SourceOver_asm_mips_dsp)
-LEAF_MIPS_DSP(qt_destStoreARGB32_asm_mips_dsp)
+LEAF_MIPS_DSPR2(qt_destStoreARGB32_asm_mips_dsp)
/*
* a0 - uint * data
* a1 - const uint *buffer
@@ -422,3 +361,1243 @@ LEAF_MIPS_DSP(qt_destStoreARGB32_asm_mips_dsp)
nop
END(qt_destStoreARGB32_asm_mips_dsp)
+
+LEAF_MIPS_DSP(comp_func_solid_Source_dsp_asm_x2)
+/*
+ * a0 - const uint *dest
+ * a1 - int length
+ * a2 - uint color
+ * a3 - uint ialpha
+ */
+
+ beqz a1, 2f
+ nop
+ replv.ph a3, a3
+ li t9, 8388736 /* t9 = 0x800080 */
+1:
+ lw t0, 0(a0)
+ lw t1, 4(a0)
+ or t2, t0, t1 /* if both dest are zero, no computation needed */
+ beqz t2, 12f
+ addiu a1, -2
+
+ BYTE_MUL_x2 t0, t1, t6, t7, a3, a3, t9, t2, t3, t4, t5, 0
+11:
+ addu t2, a2, t6
+ addu t3, a2, t7
+ sw t2, 0(a0)
+ sw t3, 4(a0)
+ bnez a1, 1b
+ addiu a0, 8
+ b 2f
+12:
+ addu t2, a2, t0
+ addu t3, a2, t1
+ sw t2, 0(a0)
+ sw t3, 4(a0)
+ bnez a1, 1b
+ addiu a0, 8
+2:
+ jr ra
+ nop
+
+END(comp_func_solid_Source_dsp_asm_x2)
+
+LEAF_MIPS_DSP(comp_func_solid_DestinationOver_dsp_asm_x2)
+/*
+ * a0 - uint *dest
+ * a1 - int length
+ * a2 - uint color
+ */
+
+ addiu sp, sp, -8
+ sw s0, 0(sp)
+ sw s1, 4(sp)
+ beqz a1, 2f
+ nop
+ beqz a2, 2f
+ nop
+ li t9, 8388736 /* t4 = 0x800080 */
+
+1:
+ lw t0, 0(a0)
+ lw t1, 4(a0)
+ not t2, t0
+ not t3, t1
+ srl t4, t2, 24
+ srl t5, t3, 24
+ or t2, t4, t5 /* if both dest are zero, no computation needed */
+ beqz t2, 11f
+ addiu a1, -2
+ replv.ph t2, t4
+ replv.ph t3, t5
+
+ BYTE_MUL_x2 a2, a2, t8, a3, t2, t3, t9, t4, t5, t6, t7
+
+ addu t0, t0, t8
+ addu t1, t1, a3
+11:
+ sw t0, 0(a0)
+ sw t1, 4(a0)
+ bnez a1, 1b
+ addiu a0, 8
+
+2:
+ lw s0, 0(sp)
+ lw s1, 4(sp)
+ addiu sp, sp, 8
+ jr ra
+ nop
+
+END(comp_func_solid_DestinationOver_dsp_asm_x2)
+
+LEAF_MIPS_DSP(comp_func_DestinationOver_dsp_asm_x2)
+/*
+ * a0 - uint *dest
+ * a1 - uint *src
+ * a2 - int length
+ * a3 - uint const_alpha
+ */
+
+ .set noat
+ addiu sp, sp, -8
+ sw s0, 0(sp)
+ sw s1, 4(sp)
+ beqz a2, 3f
+ nop
+ li t9, 8388736 /* t4 = 0x800080 */
+ li t0, 0xff
+ beq a3, t0, 2f
+ nop
+
+/* part where const_alpha != 255 */
+1:
+ replv.ph a3, a3
+11:
+ lw t0, 0(a1) # src_1
+ lw t1, 4(a1) # src_2
+ addiu a2, -2
+
+ BYTE_MUL_x2 t0, t1, t8, AT, a3, a3, t9, t4, t5, t6, t7, 0
+ # t8 = s1
+ # AT = s2
+ lw t0, 0(a0) # dest_1
+ lw t1, 4(a0) # dest_2
+ addiu a1, 8
+ not t2, t0
+ not t3, t1
+ srl t4, t2, 24
+ srl t5, t3, 24
+ replv.ph t2, t4 # qAlpha(~d) 1
+ replv.ph t3, t5 # qAlpha(~d) 2
+
+ BYTE_MUL_x2 t8, AT, s0, s1, t2, t3, t9, t4, t5, t6, t7
+
+ addu t0, t0, s0
+ addu t1, t1, s1
+ sw t0, 0(a0)
+ sw t1, 4(a0)
+ bnez a2, 11b
+ addiu a0, 8
+ b 3f
+ nop
+
+/* part where const_alpha = 255 */
+2:
+ lw t0, 0(a0) # dest 1
+ lw t1, 4(a0) # dest 2
+ lw s0, 0(a1) # src 1
+ lw s1, 4(a1) # src 2
+ not t2, t0
+ not t3, t1
+ srl t4, t2, 24
+ srl t5, t3, 24
+ replv.ph t2, t4
+ replv.ph t3, t5
+ addiu a1, 8
+ addiu a2, -2
+
+ BYTE_MUL_x2 s0, s1, t8, AT, t2, t3, t9, t4, t5, t6, t7
+
+ addu t0, t0, t8
+ addu t1, t1, AT
+ sw t0, 0(a0)
+ sw t1, 4(a0)
+ bnez a2, 2b
+ addiu a0, 8
+
+3:
+ lw s0, 0(sp)
+ lw s1, 4(sp)
+ addiu sp, sp, 8
+ jr ra
+ nop
+ .set at
+
+END(comp_func_DestinationOver_dsp_asm_x2)
+
+LEAF_MIPS_DSP(comp_func_solid_SourceIn_dsp_asm_x2)
+/*
+ * a0 - uint *dest
+ * a1 - int length
+ * a2 - uint color
+ * a3 - uint const_alpha
+ */
+
+ .set noat
+ addiu sp, -12
+ sw s0, 0(sp)
+ sw s1, 4(sp)
+ sw s2, 8(sp)
+ beqz a1, 3f
+ nop
+ li t9, 8388736 /* t9 = 0x800080 (rounding_factor) */
+ lui t8, 0xff00
+ li t0, 0xff
+ beq a3, t0, 2f
+ ori t8, t8, 0xff00 /* t8 = 0xff00ff00 (andi_factor) */
+
+/* part where const_alpha != 255 */
+1:
+ replv.ph t0, a3
+ li t5, 0xff
+ BYTE_MUL a2, a2, t0, t9, t1, t2, t3, t4 /* a2 = color ( = BYTE_MUL(color, const_alpha)); */
+ subu t1, t5, a3 /* t1 = cia = 255 - const_alpha */
+11:
+ lw t2, 0(a0) /* t2 = d */
+ lw s0, 4(a0)
+ addiu a1, -2
+ srl t3, t2, 24 /* t3 = qAlpha(d) */
+ srl s2, s0, 24
+
+ INTERPOLATE_PIXEL_255 a2, t3, t2, t1, AT, t9, t8, t4, t5, t6, t7
+ INTERPOLATE_PIXEL_255 a2, s2, s0, t1, s1, t9, t8, t4, t5, t6, t7
+
+ sw AT, 0(a0)
+ sw s1, 4(a0)
+ bnez a1, 11b
+ addiu a0, 8
+ b 3f
+ nop
+
+/* part where const_alpha = 255 */
+2:
+ lw t0, 0(a0) /* dest 1 */
+ lw t1, 4(a0) /* dest 2 */
+ srl t4, t0, 24
+ srl t5, t1, 24
+ replv.ph t2, t4
+ replv.ph t3, t5
+ addiu a1, -2
+
+ BYTE_MUL_x2 a2, a2, t8, AT, t2, t3, t9, t4, t5, t6, t7
+
+ sw t8, 0(a0)
+ sw AT, 4(a0)
+ bnez a1, 2b
+ addiu a0, 8
+
+3:
+ lw s0, 0(sp)
+ lw s1, 4(sp)
+ lw s2, 8(sp)
+ addiu sp, 12
+ jr ra
+ nop
+ .set at
+
+END(comp_func_solid_SourceIn_dsp_asm_x2)
+
+LEAF_MIPS_DSP(comp_func_SourceIn_dsp_asm_x2)
+/*
+ * a0 - uint *dest
+ * a1 - const uint *src
+ * a2 - int length
+ * a3 - uint const_alpha
+ */
+
+ .set noat
+ addiu sp, -16
+ sw s0, 0(sp)
+ sw s1, 4(sp)
+ sw s2, 8(sp)
+ sw s3, 12(sp)
+ beqz a2, 3f
+ nop
+ li t9, 8388736 /* t9 = 0x800080 (rounding_factor) */
+ lui t8, 0xff00
+ li t0, 0xff
+ beq a3, t0, 2f
+ ori t8, t8, 0xff00 /* t8 = 0xff00ff00 (andi_factor) */
+
+/* part where const_alpha != 255 */
+1:
+ li t5, 0xff
+ subu t7, t5, a3 /* t7 = cia = 255 - const_alpha */
+ replv.ph a3, a3
+11:
+ lw t0, 0(a1) /* t0 = src 1 */
+ lw t1, 4(a1) /* t1 = src 2 */
+ addiu a2, -2
+
+ BYTE_MUL_x2 t0, t1, AT, s0, a3, a3, t9, t3, t4, t5, t6, 0
+
+ lw t0, 0(a0) /* t0 = dest 1 */
+ lw t1, 4(a0) /* t1 = dest 2 */
+ addiu a1, 8
+
+ srl t2, t0, 24 /* t2 = qAlpha(d) 1 */
+ srl t3, t1, 24 /* t3 = qAlpha(d) 2 */
+
+ INTERPOLATE_PIXEL_255 AT, t2, t0, t7, s1, t9, t8, t4, t5, t6, s3
+ INTERPOLATE_PIXEL_255 s0, t3, t1, t7, s2, t9, t8, t4, t5, t6, s3
+
+ sw s1, 0(a0)
+ sw s2, 4(a0)
+ bnez a2, 11b
+ addiu a0, 8
+ b 3f
+ nop
+
+/* part where const_alpha = 255 */
+2:
+ lw t2, 0(a0) /* dest 1 */
+ lw t3, 4(a0) /* dest 2 */
+ lw t0, 0(a1) /* src 1 */
+ lw t1, 4(a1) /* src 2 */
+ srl t4, t2, 24
+ srl t5, t3, 24
+ replv.ph t2, t4
+ replv.ph t3, t5
+ addiu a2, -2
+
+ BYTE_MUL_x2 t0, t1, t8, AT, t2, t3, t9, t4, t5, t6, t7
+
+ addiu a1, 8
+ sw t8, 0(a0)
+ sw AT, 4(a0)
+ bnez a2, 2b
+ addiu a0, 8
+
+3:
+ lw s0, 0(sp)
+ lw s1, 4(sp)
+ lw s2, 8(sp)
+ lw s3, 12(sp)
+ addiu sp, 16
+ jr ra
+ nop
+ .set at
+
+END(comp_func_SourceIn_dsp_asm_x2)
+
+LEAF_MIPS_DSP(comp_func_solid_DestinationIn_dsp_asm_x2)
+/*
+ * a0 - uint *dest
+ * a1 - int length
+ * a2 - uint a
+ */
+
+ .set noat
+ beqz a1, 2f
+ nop
+ li t9, 8388736 /* t9 = 0x800080 (rounding_factor) */
+ replv.ph a2, a2
+1:
+ lw t0, 0(a0)
+ lw t1, 4(a0)
+ addiu a1, -2
+
+ BYTE_MUL_x2 t0, t1, t8, AT, a2, a2, t9, t4, t5, t6, t7, 0
+
+ sw t8, 0(a0)
+ sw AT, 4(a0)
+ bnez a1, 1b
+ addiu a0, 8
+2:
+ jr ra
+ nop
+ .set at
+
+END(comp_func_solid_DestinationIn_dsp_asm_x2)
+
+LEAF_MIPS_DSP(comp_func_DestinationIn_dsp_asm_x2)
+/*
+ * a0 - uint *dest
+ * a1 - const uint *src
+ * a2 - int length
+ * a3 - uint const_alpha
+ */
+
+ addiu sp, -8
+ sw s0, 0(sp)
+ sw s1, 4(sp)
+ beqz a2, 3f
+ nop
+ li t9, 8388736 /* t9 = 0x800080 (rounding_factor) */
+ li t0, 0xff
+ beq a3, t0, 2f
+ nop
+
+/* part where const_alpha != 255 */
+1:
+ li t5, 0xff
+ subu t8, t5, a3 /* t8 = cia = 255 - const_alpha */
+ replv.ph a3, a3
+11:
+ lw t0, 0(a1) /* t0 = src 1 */
+ lw t1, 4(a1) /* t1 = src 2 */
+ addiu a2, -2
+ srl t0, t0, 24
+ srl t1, t1, 24
+
+ BYTE_MUL_x2 t0, t1, s1, t7, a3, a3, t9, t3, t4, t5, t6, 0
+
+ lw t0, 0(a0) /* t0 = dest 1 */
+ lw t1, 4(a0) /* t1 = dest 2 */
+ addu s1, s1, t8 /* a 1 */
+ addu t7, t7, t8 /* a 2 */
+ replv.ph t2, s1
+ replv.ph t3, t7
+
+ BYTE_MUL_x2 t0, t1, s1, t7, t2, t3, t9, t4, t5, t6, s0
+
+ addiu a1, 8
+ sw s1, 0(a0)
+ sw t7, 4(a0)
+ bnez a2, 11b
+ addiu a0, 8
+ b 3f
+ nop
+
+/* part where const_alpha = 255 */
+2:
+ lw t2, 0(a1) /* src 1 */
+ lw t3, 4(a1) /* src 2 */
+ lw t0, 0(a0) /* dest 1 */
+ lw t1, 4(a0) /* dest 2 */
+ srl t4, t2, 24
+ srl t5, t3, 24
+ replv.ph t2, t4 /* t2 = qAlpha(src 1) */
+ replv.ph t3, t5 /* t3 = qAlpha(src 2) */
+ addiu a2, -2
+
+ BYTE_MUL_x2 t0, t1, t8, s1, t2, t3, t9, t4, t5, t6, t7
+
+ addiu a1, 8
+ sw t8, 0(a0)
+ sw s1, 4(a0)
+ bnez a2, 2b
+ addiu a0, 8
+
+3:
+ lw s0, 0(sp)
+ lw s1, 4(sp)
+ addiu sp, 8
+ jr ra
+ nop
+
+END(comp_func_DestinationIn_dsp_asm_x2)
+
+LEAF_MIPS_DSP(comp_func_DestinationOut_dsp_asm_x2)
+/*
+ * a0 - uint *dest
+ * a1 - const uint *src
+ * a2 - int length
+ * a3 - uint const_alpha
+ */
+
+ .set noat
+ addiu sp, -4
+ sw s0, 0(sp)
+ beqz a2, 3f
+ nop
+ li t9, 8388736 /* t9 = 0x800080 (rounding_factor) */
+ li t0, 0xff
+ beq a3, t0, 2f
+ nop
+
+/* part where const_alpha != 255 */
+1:
+ li t5, 0xff
+ subu t8, t5, a3 /* t8 = cia = 255 - const_alpha */
+ replv.ph a3, a3
+11:
+ lw t0, 0(a1) /* t0 = src 1 */
+ lw t1, 4(a1) /* t1 = src 2 */
+ not t0, t0
+ not t1, t1
+ addiu a2, -2
+ srl t0, t0, 24
+ srl t1, t1, 24
+
+ BYTE_MUL_x2 t0, t1, AT, t7, a3, a3, t9, t3, t4, t5, t6, 0
+
+ lw t0, 0(a0) /* t0 = dest 1 */
+ lw t1, 4(a0) /* t1 = dest 2 */
+ addu AT, AT, t8 /* a 1 */
+ addu t7, t7, t8 /* a 2 */
+ replv.ph t2, AT
+ replv.ph t3, t7
+
+ BYTE_MUL_x2 t0, t1, AT, t7, t2, t3, t9, t4, t5, t6, s0
+
+ addiu a1, 8
+ sw AT, 0(a0)
+ sw t7, 4(a0)
+ bnez a2, 11b
+ addiu a0, 8
+ b 3f
+ nop
+
+/* part where const_alpha = 255 */
+2:
+ lw t2, 0(a1) /* src 1 */
+ lw t3, 4(a1) /* src 2 */
+ not t2, t2
+ not t3, t3
+ lw t0, 0(a0) /* dest 1 */
+ lw t1, 4(a0) /* dest 2 */
+ srl t4, t2, 24
+ srl t5, t3, 24
+ replv.ph t2, t4 /* t2 = qAlpha(src 1) */
+ replv.ph t3, t5 /* t3 = qAlpha(src 2) */
+ addiu a2, -2
+
+ BYTE_MUL_x2 t0, t1, t8, AT, t2, t3, t9, t4, t5, t6, t7
+
+ addiu a1, 8
+ sw t8, 0(a0)
+ sw AT, 4(a0)
+ bnez a2, 2b
+ addiu a0, 8
+
+3:
+ lw s0, 0(sp)
+ addiu sp, 4
+ jr ra
+ nop
+ .set at
+
+END(comp_func_DestinationOut_dsp_asm_x2)
+
+LEAF_MIPS_DSP(comp_func_solid_SourceAtop_dsp_asm_x2)
+/*
+ * a0 - uint *dest
+ * a1 - int length
+ * a2 - uint color
+ * a3 - uint sia
+ */
+
+ .set noat
+ addu sp, -4
+ sw s0, 0(sp)
+ beqz a1, 2f
+ nop
+ li t9, 8388736 /* t9 = 0x800080 (rounding_factor) */
+ lui t8, 0xff00
+ ori t8, t8, 0xff00 /* t8 = 0xff00ff00 (andi_factor) */
+1:
+ lw t0, 0(a0) /* t0 = dest 1 */
+ lw t1, 4(a0) /* t1 = dest 2 */
+ addiu a1, -2
+ srl t2, t0, 24 /* t2 = qAlpha(dest 1) */
+ srl t3, t1, 24 /* t3 = qAlpha(dest 2) */
+
+ INTERPOLATE_PIXEL_255 a2, t2, t0, a3, AT, t9, t8, t4, t5, t6, t7
+ INTERPOLATE_PIXEL_255 a2, t3, t1, a3, s0, t9, t8, t4, t5, t6, t7
+
+ sw AT, 0(a0)
+ sw s0, 4(a0)
+ bnez a1, 1b
+ addiu a0, 8
+2:
+ lw s0, 0(sp)
+ addiu sp, 4
+ jr ra
+ nop
+ .set at
+
+END(comp_func_solid_SourceAtop_dsp_asm_x2)
+
+LEAF_MIPS_DSP(comp_func_SourceAtop_dsp_asm_x2)
+/*
+ * a0 - uint *dest
+ * a1 - const uint *src
+ * a2 - int length
+ * a3 - uint const_alpha
+ */
+
+ .set noat
+ addiu sp, -20
+ sw s0, 0(sp)
+ sw s1, 4(sp)
+ sw s2, 8(sp)
+ sw s3, 12(sp)
+ sw s4, 16(sp)
+ beqz a2, 3f
+ nop
+ li t9, 8388736 /* t9 = 0x800080 (rounding_factor) */
+ lui t8, 0xff00
+ li t0, 0xff
+ beq a3, t0, 2f
+ ori t8, t8, 0xff00 /* t8 = 0xff00ff00 (andi_factor) */
+
+/* part where const_alpha != 255 */
+1:
+ replv.ph a3, a3
+11:
+ lw AT, 0(a1) /* src 1 */
+ lw s0, 4(a1) /* src 2 */
+
+ BYTE_MUL_x2 AT, s0, t0, t1, a3, a3, t9, t3, t4, t5, t6, 0
+ /* t0 = s */
+
+ lw t2, 0(a0) /* t2 = dest 1 */
+ lw t3, 4(a0) /* t3 = dest 2 */
+
+ srl t4, t2, 24 /* t4 = qAplpha(dest 1) */
+ srl t5, t3, 24
+ not t6, t0
+ not t7, t1
+ srl t6, t6, 24 /* t6 = qAlpha(~s) */
+ srl t7, t7, 24
+ addiu a2, -2
+
+ INTERPOLATE_PIXEL_255 t0, t4, t2, t6, AT, t9, t8, s1, s2, s3, s4
+ INTERPOLATE_PIXEL_255 t1, t5, t3, t7, s0, t9, t8, s1, s2, s3, s4
+
+ addiu a1, 8
+ sw AT, 0(a0)
+ sw s0, 4(a0)
+ bnez a2, 11b
+ addiu a0, 8
+ b 3f
+ nop
+
+/* part where const_alpha = 255 */
+2:
+ lw t2, 0(a0) /* dest 1 */
+ lw t3, 4(a0) /* dest 2 */
+ lw t0, 0(a1) /* src 1 */
+ lw t1, 4(a1) /* src 2 */
+ srl t4, t2, 24
+ srl t5, t3, 24
+ not t6, t0
+ not t7, t1
+ srl t6, t6, 24
+ srl t7, t7, 24
+ addiu a2, -2
+
+ INTERPOLATE_PIXEL_255 t0, t4, t2, t6, AT, t9, t8, s1, s2, s3, s4
+ INTERPOLATE_PIXEL_255 t1, t5, t3, t7, s0, t9, t8, s1, s2, s3, s4
+
+ addiu a1, 8
+ sw AT, 0(a0)
+ sw s0, 4(a0)
+ bnez a2, 2b
+ addiu a0, 8
+
+3:
+ lw s0, 0(sp)
+ lw s1, 4(sp)
+ lw s2, 8(sp)
+ lw s3, 12(sp)
+ lw s4, 16(sp)
+ addiu sp, 20
+ jr ra
+ nop
+ .set at
+
+END(comp_func_SourceAtop_dsp_asm_x2)
+
+LEAF_MIPS_DSP(comp_func_solid_DestinationAtop_dsp_asm_x2)
+/*
+ * a0 - uint *dest
+ * a1 - int length
+ * a2 - uint color
+ * a3 - uint a
+ */
+
+ .set noat
+ addiu sp, -4
+ sw s0, 0(sp)
+ beqz a1, 2f
+ nop
+ li t9, 8388736 /* t9 = 0x800080 (rounding_factor) */
+ lui t8, 0xff00
+ ori t8, t8, 0xff00 /* t8 = 0xff00ff00 (andi_factor) */
+1:
+ lw t0, 0(a0) /* t0 = dest 1 */
+ lw t1, 4(a0) /* t1 = dest 2 */
+ addiu a1, -2
+ not t2, t0
+ not t3, t1
+ srl t2, t2, 24 /* t2 = qAlpha(~(dest 1)) */
+ srl t3, t3, 24 /* t3 = qAlpha(~(dest 2)) */
+
+ INTERPOLATE_PIXEL_255 t0, a3, a2, t2, AT, t9, t8, t4, t5, t6, t7
+ INTERPOLATE_PIXEL_255 t1, a3, a2, t3, s0, t9, t8, t4, t5, t6, t7
+
+ sw AT, 0(a0)
+ sw s0, 4(a0)
+ bnez a1, 1b
+ addiu a0, 8
+2:
+ lw s0, 0(sp)
+ addiu sp, 4
+ jr ra
+ nop
+ .set at
+
+END(comp_func_solid_DestinationAtop_dsp_asm_x2)
+
+LEAF_MIPS_DSP(comp_func_DestinationAtop_dsp_asm_x2)
+/*
+ * a0 - uint *dest
+ * a1 - const uint *src
+ * a2 - int length
+ * a3 - uint const_alpha
+ */
+
+ .set noat
+ addiu sp, -24
+ sw s0, 0(sp)
+ sw s1, 4(sp)
+ sw s2, 8(sp)
+ sw s3, 12(sp)
+ sw s4, 16(sp)
+ sw s5, 20(sp)
+ beqz a2, 3f
+ nop
+ li t9, 8388736 /* t9 = 0x800080 (rounding_factor) */
+ lui t8, 0xff00
+ li t0, 0xff
+ beq a3, t0, 2f
+ ori t8, t8, 0xff00 /* t8 = 0xff00ff00 (andi_factor) */
+
+/* part where const_alpha != 255 */
+1:
+ li s5, 0xff
+ subu s5, s5, a3 /* s5 = cia = 255 - const_alpha */
+ replv.ph a3, a3
+11:
+ lw AT, 0(a1) /* src 1 */
+ lw s0, 4(a1) /* src 2 */
+
+ BYTE_MUL_x2 AT, s0, t0, t1, a3, a3, t9, t3, t4, t5, t6, 0
+ /* t0 = s */
+
+ lw t2, 0(a0) /* t2 = dest 1 */
+ lw t3, 4(a0) /* t3 = dest 2 */
+
+ not t4, t2
+ not t5, t3
+ srl t4, t4, 24 /* t4 = qAplpha(~(dest 1)) */
+ srl t5, t5, 24
+ srl t6, t0, 24
+ srl t7, t1, 24
+ addu t6, t6, s5 /* t6 = a = qAlpha(s1) + cia */
+ addu t7, t7, s5
+ addiu a2, -2
+
+ INTERPOLATE_PIXEL_255 t2, t6, t0, t4, AT, t9, t8, s1, s2, s3, s4
+ INTERPOLATE_PIXEL_255 t3, t7, t1, t5, s0, t9, t8, s1, s2, s3, s4
+
+ addiu a1, 8
+ sw AT, 0(a0)
+ sw s0, 4(a0)
+ bnez a2, 11b
+ addiu a0, 8
+ b 3f
+ nop
+
+/* part where const_alpha = 255 */
+2:
+ lw t2, 0(a0) /* d1 */
+ lw t3, 4(a0) /* d2 */
+ lw t0, 0(a1) /* s1 */
+ lw t1, 4(a1) /* s2 */
+ srl t4, t0, 24 /* t4 = qAlpha(s1) */
+ srl t5, t1, 24
+ not t6, t2
+ not t7, t3
+ srl t6, t6, 24 /* qAlpha(~d1) */
+ srl t7, t7, 24
+ addiu a2, -2
+
+ INTERPOLATE_PIXEL_255 t2, t4, t0, t6, AT, t9, t8, s1, s2, s3, s4
+ INTERPOLATE_PIXEL_255 t3, t5, t1, t7, s0, t9, t8, s1, s2, s3, s4
+
+ addiu a1, 8
+ sw AT, 0(a0)
+ sw s0, 4(a0)
+ bnez a2, 2b
+ addiu a0, 8
+
+3:
+ lw s0, 0(sp)
+ lw s1, 4(sp)
+ lw s2, 8(sp)
+ lw s3, 12(sp)
+ lw s4, 16(sp)
+ lw s5, 20(sp)
+ addiu sp, 24
+ jr ra
+ nop
+ .set at
+
+END(comp_func_DestinationAtop_dsp_asm_x2)
+
+LEAF_MIPS_DSP(comp_func_solid_XOR_dsp_asm_x2)
+/*
+ * a0 - uint *dest
+ * a1 - int length
+ * a2 - uint color
+ * a3 - uint sia
+ */
+
+ .set noat
+ addu sp, -4
+ sw s0, 0(sp)
+ beqz a1, 2f
+ nop
+ li t9, 8388736 /* t9 = 0x800080 (rounding_factor) */
+ lui t8, 0xff00
+ ori t8, t8, 0xff00 /* t8 = 0xff00ff00 (andi_factor) */
+1:
+ lw t0, 0(a0) /* t0 = dest 1 */
+ lw t1, 4(a0) /* t1 = dest 2 */
+ addiu a1, -2
+ not t2, t0
+ not t3, t1
+ srl t2, t2, 24 /* t2 = qAlpha(~(dest 1)) */
+ srl t3, t3, 24 /* t3 = qAlpha(~(dest 2)) */
+
+ INTERPOLATE_PIXEL_255 a2, t2, t0, a3, AT, t9, t8, t4, t5, t6, t7
+ INTERPOLATE_PIXEL_255 a2, t3, t1, a3, s0, t9, t8, t4, t5, t6, t7
+
+ sw AT, 0(a0)
+ sw s0, 4(a0)
+ bnez a1, 1b
+ addiu a0, 8
+2:
+ lw s0, 0(sp)
+ addu sp, 4
+ jr ra
+ nop
+ .set at
+
+END(comp_func_solid_XOR_dsp_asm_x2)
+
+LEAF_MIPS_DSP(comp_func_XOR_dsp_asm_x2)
+/*
+ * a0 - uint *dest
+ * a1 - const uint *src
+ * a2 - int length
+ * a3 - uint const_alpha
+ */
+
+ .set noat
+ addiu sp, -20
+ sw s0, 0(sp)
+ sw s1, 4(sp)
+ sw s2, 8(sp)
+ sw s3, 12(sp)
+ sw s4, 16(sp)
+ beqz a2, 3f
+ nop
+ li t9, 8388736 /* t9 = 0x800080 (rounding_factor) */
+ lui t8, 0xff00
+ li t0, 0xff
+ beq a3, t0, 2f
+ ori t8, t8, 0xff00 /* t8 = 0xff00ff00 (andi_factor) */
+
+/* part where const_alpha != 255 */
+1:
+ replv.ph a3, a3
+11:
+ lw AT, 0(a1) /* src 1 */
+ lw s0, 4(a1) /* src 2 */
+
+ BYTE_MUL_x2 AT, s0, t0, t1, a3, a3, t9, t3, t4, t5, t6, 0
+ /* t0 = s1 */
+ /* t1 = s2 */
+
+ lw t2, 0(a0) /* t2 = dest 1 */
+ lw t3, 4(a0) /* t3 = dest 2 */
+
+ not t4, t2
+ not t5, t3
+ srl t4, t4, 24 /* t4 = qAplpha(~(dest 1)) */
+ srl t5, t5, 24
+ not t6, t0
+ not t7, t1
+ srl t6, t6, 24 /* t6 = qAlpha(~s) */
+ srl t7, t7, 24
+ addiu a2, -2
+
+ INTERPOLATE_PIXEL_255 t0, t4, t2, t6, AT, t9, t8, s1, s2, s3, s4
+ INTERPOLATE_PIXEL_255 t1, t5, t3, t7, s0, t9, t8, s1, s2, s3, s4
+
+ addiu a1, 8
+ sw AT, 0(a0)
+ sw s0, 4(a0)
+ bnez a2, 11b
+ addiu a0, 8
+ b 3f
+ nop
+
+/* part where const_alpha = 255 */
+2:
+ lw t2, 0(a0) /* d1 */
+ lw t3, 4(a0) /* d2 */
+ lw t0, 0(a1) /* s1 */
+ lw t1, 4(a1) /* s2 */
+ not t4, t0
+ not t5, t1
+ srl t4, t4, 24 /* t4 = qAlpha(~s1) */
+ srl t5, t5, 24
+ not t6, t2
+ not t7, t3
+ srl t6, t6, 24 /* qAlpha(~d1) */
+ srl t7, t7, 24
+ addiu a2, -2
+
+ INTERPOLATE_PIXEL_255 t0, t6, t2, t4, AT, t9, t8, s1, s2, s3, s4
+ INTERPOLATE_PIXEL_255 t1, t7, t3, t5, s0, t9, t8, s1, s2, s3, s4
+
+ addiu a1, 8
+ sw AT, 0(a0)
+ sw s0, 4(a0)
+ bnez a2, 2b
+ addiu a0, 8
+
+3:
+ lw s0, 0(sp)
+ lw s1, 4(sp)
+ lw s2, 8(sp)
+ lw s3, 12(sp)
+ lw s4, 16(sp)
+ addiu sp, 20
+ jr ra
+ nop
+ .set at
+
+END(comp_func_XOR_dsp_asm_x2)
+
+LEAF_MIPS_DSP(comp_func_solid_SourceOut_dsp_asm_x2)
+/*
+ * a0 - uint *dest
+ * a1 - int length
+ * a2 - uint color
+ * a3 - uint const_alpha
+ */
+
+ .set noat
+ addiu sp, -12
+ sw s0, 0(sp)
+ sw s1, 4(sp)
+ sw s2, 8(sp)
+ beqz a1, 3f
+ nop
+ li t9, 8388736 /* t9 = 0x800080 (rounding_factor) */
+ lui t8, 0xff00
+ li t0, 0xff
+ beq a3, t0, 2f
+ ori t8, t8, 0xff00 /* t8 = 0xff00ff00 (andi_factor) */
+
+/* part where const_alpha != 255 */
+1:
+ replv.ph t0, a3
+ li t5, 0xff
+ BYTE_MUL a2, a2, t0, t9, t1, t2, t3, t4 /* a2 = color ( = BYTE_MUL(color, const_alpha)); */
+ subu t1, t5, a3 /* t1 = cia = 255 - const_alpha */
+11:
+ lw t2, 0(a0) /* t2 = d1 */
+ lw s0, 4(a0) /* s0 = d2 */
+ addiu a1, -2
+ not t3, t2
+ not s2, s0
+ srl t3, t3, 24 /* t3 = qAlpha(~d1) */
+ srl s2, s2, 24 /* s2 = qAlpha(~d2) */
+
+ INTERPOLATE_PIXEL_255 a2, t3, t2, t1, AT, t9, t8, t4, t5, t6, t7
+ INTERPOLATE_PIXEL_255 a2, s2, s0, t1, s1, t9, t8, t4, t5, t6, t7
+
+ sw AT, 0(a0)
+ sw s1, 4(a0)
+ bnez a1, 11b
+ addiu a0, 8
+ b 3f
+ nop
+
+/* part where const_alpha = 255 */
+2:
+ lw t0, 0(a0) /* dest 1 */
+ lw t1, 4(a0) /* dest 2 */
+ not t4, t0
+ not t5, t1
+ srl t4, t4, 24
+ srl t5, t5, 24
+ replv.ph t2, t4
+ replv.ph t3, t5
+ addiu a1, -2
+
+ BYTE_MUL_x2 a2, a2, t8, AT, t2, t3, t9, t4, t5, t6, t7
+
+ sw t8, 0(a0)
+ sw AT, 4(a0)
+ bnez a1, 2b
+ addiu a0, 8
+
+3:
+ lw s0, 0(sp)
+ lw s1, 4(sp)
+ lw s2, 8(sp)
+ addiu sp, 12
+ jr ra
+ nop
+ .set at
+
+END(comp_func_solid_SourceOut_dsp_asm_x2)
+
+LEAF_MIPS_DSP(comp_func_SourceOut_dsp_asm_x2)
+/*
+ * a0 - uint *dest
+ * a1 - const uint *src
+ * a2 - int length
+ * a3 - uint const_alpha
+ */
+
+ .set noat
+ addiu sp, -16
+ sw s0, 0(sp)
+ sw s1, 4(sp)
+ sw s2, 8(sp)
+ sw s3, 12(sp)
+ beqz a2, 3f
+ nop
+ li t9, 8388736 /* t9 = 0x800080 (rounding_factor) */
+ lui t8, 0xff00
+ li t0, 0xff
+ beq a3, t0, 2f
+ ori t8, t8, 0xff00 /* t8 = 0xff00ff00 (andi_factor) */
+
+/* part where const_alpha != 255 */
+1:
+ li t5, 0xff
+ subu t7, t5, a3 /* t7 = cia = 255 - const_alpha */
+ replv.ph a3, a3
+11:
+ lw t0, 0(a1) /* t0 = src 1 */
+ lw t1, 4(a1) /* t1 = src 2 */
+ addiu a2, -2
+
+ BYTE_MUL_x2 t0, t1, AT, s0, a3, a3, t9, t3, t4, t5, t6, 0
+
+ lw t0, 0(a0) /* t0 = dest 1 */
+ lw t1, 4(a0) /* t1 = dest 2 */
+ addiu a1, 8
+
+ not t2, t0
+ not t3, t1
+ srl t2, t2, 24 /* t2 = qAlpha(~d1) */
+ srl t3, t3, 24 /* t3 = qAlpha(~d2) */
+
+ INTERPOLATE_PIXEL_255 AT, t2, t0, t7, s1, t9, t8, t4, t5, t6, s3
+ INTERPOLATE_PIXEL_255 s0, t3, t1, t7, s2, t9, t8, t4, t5, t6, s3
+
+ sw s1, 0(a0)
+ sw s2, 4(a0)
+ bnez a2, 11b
+ addiu a0, 8
+ b 3f
+ nop
+
+/* part where const_alpha = 255 */
+2:
+ lw t2, 0(a0) /* dest 1 */
+ lw t3, 4(a0) /* dest 2 */
+ lw t0, 0(a1) /* src 1 */
+ lw t1, 4(a1) /* src 2 */
+ not t4, t2
+ not t5, t3
+ srl t4, t4, 24 /* qAlpha(~d1) */
+ srl t5, t5, 24 /* qAlpha(~d2) */
+ replv.ph t2, t4
+ replv.ph t3, t5
+ addiu a2, -2
+
+ BYTE_MUL_x2 t0, t1, t8, AT, t2, t3, t9, t4, t5, t6, t7
+
+ addiu a1, 8
+ sw t8, 0(a0)
+ sw AT, 4(a0)
+ bnez a2, 2b
+ addiu a0, 8
+
+3:
+ lw s0, 0(sp)
+ lw s1, 4(sp)
+ lw s2, 8(sp)
+ lw s3, 12(sp)
+ addiu sp, 16
+ jr ra
+ nop
+ .set at
+
+END(comp_func_SourceOut_dsp_asm_x2)
+
+LEAF_MIPS_DSP(comp_func_Source_dsp_asm_x2)
+/*
+ * a0 - uint *dest
+ * a1 - const uint *src
+ * a2 - int length
+ * a3 - uint const_alpha
+ */
+
+ .set noat
+ addiu sp, -8
+ sw s0, 0(sp)
+ sw s1, 4(sp)
+ beqz a2, 2f
+ nop
+ li t9, 8388736 /* t9 = 0x800080 (rounding_factor) */
+ lui t8, 0xff00
+ ori t8, t8, 0xff00 /* t8 = 0xff00ff00 (andi_factor) */
+ li t7, 0xff
+ subu t7, t7, a3 /* t7 = ialpha */
+1:
+ lw t0, 0(a0) /* t0 = dest 1 */
+ lw t1, 4(a0) /* t1 = dest 2 */
+ lw t2, 0(a1) /* t2 = src 1 */
+ lw t3, 4(a1) /* t3 = src 2 */
+ addiu a2, -2
+ addiu a1, 8
+
+ INTERPOLATE_PIXEL_255 t2, a3, t0, t7, AT, t9, t8, t4, t5, t6, s1
+ INTERPOLATE_PIXEL_255 t3, a3, t1, t7, s0, t9, t8, t4, t5, t6, s1
+
+ sw AT, 0(a0)
+ sw s0, 4(a0)
+ bnez a2, 1b
+ addiu a0, 8
+2:
+ lw s0, 0(sp)
+ lw s1, 4(sp)
+ addiu sp, 8
+ jr ra
+ nop
+ .set at
+
+END(comp_func_Source_dsp_asm_x2)
+
+LEAF_MIPS_DSP(qt_blend_argb32_on_argb32_mips_dsp_asm_x2)
+/*
+ * a0 - uint *dest
+ * a1 - const uint *src
+ * a2 - int length
+ * a3 - uint const_alpha
+ */
+
+ .set noat
+ addiu sp, -12
+ sw s0, 0(sp)
+ sw s1, 4(sp)
+ sw s2, 8(sp)
+ beqz a2, 2f
+ nop
+ replv.ph a3, a3
+ li t9, 8388736 /* t9 = 0x800080 (rounding_factor) */
+
+1:
+ lw t0, 0(a1) /* t0 = src 1 */
+ lw t1, 4(a1) /* t1 = src 2 */
+ addiu a2, -2
+
+ BYTE_MUL_x2 t0, t1, AT, t7, a3, a3, t9, t3, t4, t5, t6, 0
+
+ lw t0, 0(a0) /* t0 = dest 1 */
+ lw t1, 4(a0) /* t1 = dest 2 */
+ not s1, AT
+ not s2, t7
+ srl s1, s1, 24 /* s1 = qAlpha(~s1) */
+ srl s2, s2, 24 /* s2 = qAlpha(~s2) */
+ replv.ph s1, s1
+ replv.ph s2, s2
+
+ BYTE_MUL_x2 t0, t1, t2, t3, s1, s2, t9, t4, t5, t6, s0
+
+ addiu a1, 8
+ addu AT, AT, t2
+ addu t7, t7, t3
+ sw AT, 0(a0)
+ sw t7, 4(a0)
+ bnez a2, 1b
+ addiu a0, 8
+
+2:
+ lw s0, 0(sp)
+ lw s1, 4(sp)
+ lw s2, 8(sp)
+ addiu sp, 12
+ jr ra
+ nop
+ .set at
+
+END(qt_blend_argb32_on_argb32_mips_dsp_asm_x2)
+
+LEAF_MIPS_DSP(qt_blend_argb32_on_argb32_const_alpha_256_mips_dsp_asm)
+/*
+ * a0 - uint *dest
+ * a1 - const uint *src
+ * a2 - int length
+ */
+
+ beqz a2, 5f
+ nop
+ li t7, 8388736 /* t7 = 0x800080 */
+ b 2f
+ nop
+1:
+ addiu a0, a0, 4
+ addiu a2, a2, -1
+ beqz a2, 5f
+ nop
+2:
+ lw t0, 0(a1) /* t0 = s = src[i] */
+ addiu a1, a1, 4
+ nor t1, t0, zero
+ srl t1, t1, 24 /* t1 = ~qAlpha(s) */
+ bnez t1, 3f
+ nop
+ sw t0, 0(a0) /* dst[i] = src[i] */
+ addiu a2, a2, -1
+ bnez a2, 2b
+ addiu a0, a0, 4
+ b 5f
+ nop
+3:
+ beqz t0, 1b
+ replv.ph t6, t1 /* | 0 | qAlpha(~s) | 0 | qAlpha(~s) | */
+
+ lw t4, 0(a0)
+ addiu a2, a2, -1
+ beqz t4, 31f
+ move t8, zero
+
+ BYTE_MUL t4, t8, t6, t7, t1, t2, t3, t4
+31:
+ addu t8, t0, t8 /* dst[i] =
+ * s + BYTE_MUL(dst[i],~qAlpha(s)) */
+ sw t8, 0(a0)
+ bnez a2, 2b
+ addiu a0, a0, 4
+ b 5f
+ nop
+5:
+ jr ra
+ nop
+
+END(qt_blend_argb32_on_argb32_const_alpha_256_mips_dsp_asm)