/**************************************************************************** ** ** Copyright (C) 2013 Imagination Technologies Limited, www.imgtec.com ** Contact: https://www.qt.io/licensing/ ** ** This file is part of the QtGui module of the Qt Toolkit. ** ** $QT_BEGIN_LICENSE:LGPL$ ** Commercial License Usage ** Licensees holding valid commercial Qt licenses may use this file in ** accordance with the commercial license agreement provided with the ** Software or, alternatively, in accordance with the terms contained in ** a written agreement between you and The Qt Company. For licensing terms ** and conditions see https://www.qt.io/terms-conditions. For further ** information use the contact form at https://www.qt.io/contact-us. ** ** GNU Lesser General Public License Usage ** Alternatively, this file may be used under the terms of the GNU Lesser ** General Public License version 3 as published by the Free Software ** Foundation and appearing in the file LICENSE.LGPL3 included in the ** packaging of this file. Please review the following information to ** ensure the GNU Lesser General Public License version 3 requirements ** will be met: https://www.gnu.org/licenses/lgpl-3.0.html. ** ** GNU General Public License Usage ** Alternatively, this file may be used under the terms of the GNU ** General Public License version 2.0 or (at your option) the GNU General ** Public license version 3 or any later version approved by the KDE Free ** Qt Foundation. The licenses are as published by the Free Software ** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3 ** included in the packaging of this file. Please review the following ** information to ensure the GNU General Public License requirements will ** be met: https://www.gnu.org/licenses/gpl-2.0.html and ** https://www.gnu.org/licenses/gpl-3.0.html. ** ** $QT_END_LICENSE$ ** ****************************************************************************/ #include "qt_mips_asm_dsp_p.h" LEAF_MIPS_DSP(destfetchARGB32_asm_mips_dsp) /* * a0 - buffer address (dst) * a1 - data address (src) * a2 - length */ beqz a2, 2f move v0, a0 /* just return the address of buffer * for storing returning values */ move v0, a0 andi t1, a2, 0x1 li t7, 8388736 /* t7 = 0x800080 */ beqz t1, 1f nop lw t8, 0(a1) addiu a2, a2, -1 srl t6, t8, 24 /* t6 = alpha */ preceu.ph.qbra t0, t8 mul t1, t0, t6 preceu.ph.qbla t4, t8 mul t5, t4, t6 preceu.ph.qbla t2, t1 addq.ph t3, t1, t2 addq.ph t3, t3, t7 preceu.ph.qbla t1, t3 /* t1 holds R & B blended with alpha * | 0 | dRab | 0 | dBab | */ preceu.ph.qbla t2, t5 addq.ph t3, t2, t5 addq.ph t4, t3, t7 preceu.ph.qbla t2, t4 /* t2 holds A & G blended with alpha * | 0 | dAab | 0 | dGab | */ andi t2, t2, 255 /* t2 = 0xff */ sll t0, t6, 24 sll t3, t2, 8 or t4, t0, t3 or t0, t1, t4 sw t0, 0(a0) addiu a0, a0, 4 addiu a1, a1, 4 beqz a2, 2f /* there was only one member */ nop 1: lw t0, 0(a1) /* t0 = src1 */ lw t1, 4(a1) /* t1 = src2 */ precrq.qb.ph t4, t0, t1 /* t4 = a1 G1 a2 G2 */ preceu.ph.qbra t3, t4 /* t3 = 0 G1 0 G2 */ preceu.ph.qbla t2, t4 /* t2 = | 0 | a1 | 0 | a2 | */ srl t5, t2, 8 or t8, t2, t5 /* t8 = 0 a1 a1 a2 */ muleu_s.ph.qbr t5, t8, t3 addiu a2, a2, -2 addiu a1, a1, 8 precrq.ph.w t9, t0, t1 preceu.ph.qbra t9, t9 preceu.ph.qbla t6, t5 addq.ph t5, t5, t6 addq.ph t2, t5, t7 muleu_s.ph.qbr t6, t8, t9 sll t3, t1, 16 packrl.ph t3, t0, t3 preceu.ph.qbra t3, t3 muleu_s.ph.qbr t8, t8, t3 preceu.ph.qbla t3, t6 addq.ph t3, t6, t3 addq.ph t3, t3, t7 preceu.ph.qbla t5, t8 addq.ph t5, t8, t5 addq.ph t5, t5, t7 precrq.ph.w t0, t4, t3 /* t0 = | 0 | a1 | 0 | dR1 | */ precrq.ph.w t1, t2, t5 /* t1 = | 0 | dG1 | 0 | dB1 | */ precrq.qb.ph t6, t0, t1 /* t6 = | a1 | dR1 | dG1 | dB1 | */ sll t3, t3, 16 sll t5, t5, 16 packrl.ph t0, t4, t3 packrl.ph t1, t2, t5 precrq.qb.ph t8, t0, t1 /* t8 = | a2 | dR2 | dG2 | dB2 | */ sw t6, 0(a0) sw t8, 4(a0) bnez a2, 1b addiu a0, a0, 8 2: j ra nop END(destfetchARGB32_asm_mips_dsp) LEAF_MIPS_DSP(qt_memfill32_asm_mips_dsp) /* * a0 - destination address (dst) * a1 - value * a2 - count */ beqz a2, 5f nop li t8, 8 andi t0, a2, 0x7 /* t0 holds how many counts exceeds 8 */ beqzl t0, 2f /* count is multiple of 8 (8, 16, 24, ....) */ addiu a2, a2, -8 subu a2, a2, t0 1: sw a1, 0(a0) addiu t0, t0, -1 bnez t0, 1b addiu a0, a0, 4 bgeu a2, t8, 2f addiu a2, a2, -8 b 5f nop 2: beqz a2, 4f nop 3: pref 30, 32(a0) addiu a2, a2, -8 sw a1, 0( a0) sw a1, 4(a0) sw a1, 8(a0) sw a1, 12(a0) addiu a0, a0, 32 sw a1, -16(a0) sw a1, -12(a0) sw a1, -8(a0) bnez a2, 3b sw a1, -4(a0) 4: sw a1, 0(a0) sw a1, 4(a0) sw a1, 8(a0) sw a1, 12(a0) addiu a0, a0, 32 sw a1, -16(a0) sw a1, -12(a0) sw a1, -8(a0) sw a1, -4(a0) 5: jr ra nop END(qt_memfill32_asm_mips_dsp) LEAF_MIPS_DSP(comp_func_SourceOver_asm_mips_dsp) /* * a0 - uint *dest * a1 - const uint *src * a2 - int length * a3 - uint const_alpha */ beqz a2, 5f nop li t8, 0xff li t7, 8388736 /* t7 = 0x800080 */ bne a3, t8, 4f nop /* part where const_alpha = 255 */ b 2f nop 1: addiu a0, a0, 4 addiu a2, a2, -1 beqz a2, 5f nop 2: lw t0, 0(a1) /* t0 = s = src[i] */ addiu a1, a1, 4 nor t1, t0, zero srl t1, t1, 24 /* t1 = ~qAlpha(s) */ bnez t1, 3f nop sw t0, 0(a0) /* dst[i] = src[i] */ addiu a2, a2, -1 bnez a2, 2b addiu a0, a0, 4 b 5f nop 3: beqz t0, 1b nop lw t4, 0(a0) replv.ph t6, t1 muleu_s.ph.qbl t2, t4, t6 muleu_s.ph.qbr t3, t4, t6 addiu a2, a2, -1 preceu.ph.qbla t4, t2 addq.ph t4, t2, t4 addq.ph t4, t4, t7 preceu.ph.qbla t5, t3 addq.ph t5, t5, t3 addq.ph t5, t5, t7 precrq.qb.ph t8, t4, t5 /* t8 = | dsA | dsR | dsG | dsB | */ addu t8, t0, t8 /* dst[i] = * s + BYTE_MUL(dst[i],~qAlpha(s)) */ sw t8, 0(a0) bnez a2, 2b addiu a0, a0, 4 b 5f nop 4: lw t0, 0(a0) /* t0 - dst[i] "1" */ lw t1, 0(a1) /* t1 - src[i] "2" */ addiu a1, a1, 4 addiu a2, a2, -1 replv.ph t6, a3 /* a1 = 0x00a00a */ muleu_s.ph.qbl t2, t1, t6 muleu_s.ph.qbr t3, t1, t6 preceu.ph.qbla t4, t2 addq.ph t4, t2, t4 addq.ph t4, t4, t7 preceu.ph.qbla t5, t3 addq.ph t5, t5, t3 addq.ph t5, t5, t7 precrq.qb.ph t8, t4, t5 /* t8 = | dsA | dsR | dsG | dsB | */ nor t6, t8, zero srl t6, t6, 24 replv.ph t6, t6 muleu_s.ph.qbl t2, t0, t6 muleu_s.ph.qbr t3, t0, t6 preceu.ph.qbla t4, t2 addq.ph t4, t2, t4 addq.ph t4, t4, t7 preceu.ph.qbla t5, t3 addq.ph t5, t5, t3 addq.ph t5, t5, t7 precrq.qb.ph t6, t4, t5 /* t6 = | ddA | ddR | ddG | ddB | */ addu t0, t8, t6 sw t0, 0(a0) bnez a2, 4b addiu a0, a0, 4 5: jr ra nop END(comp_func_SourceOver_asm_mips_dsp) LEAF_MIPS_DSPR2(qt_destStoreARGB32_asm_mips_dsp) /* * a0 - uint * data * a1 - const uint *buffer * a2 - int length */ blez a2, 6f move v1, zero li t0, 255 lui a3, 0xff j 2f lui t2, 0xff00 1: addiu v1, v1, 1 sw zero, 0(a0) addiu a1, a1, 4 beq v1, a2, 6f addiu a0, a0, 4 2: lw v0, 0(a1) srl t3, v0, 0x18 beql t3, t0, 5f addiu v1, v1, 1 beqz t3, 1b srl t1, v0, 0x8 andi t1, t1, 0xff teq t3, zero, 0x7 div zero, a3, t3 move t8, t3 andi t6, v0, 0xff srl t3,v0,0x10 andi t3,t3,0xff and t5, v0, t2 mflo t4 mult $ac0, t4, t6 mult $ac1, t1, t4 mul t4, t3, t4 sltiu t8, t8, 2 beqz t8, 3f nop mflo t6, $ac0 mflo t1, $ac1 sra t6, t6, 0x10 sra t1, t1, 0x8 b 4f nop 3: extr.w t6, $ac0, 0x10 extr.w t1, $ac1, 0x8 4: and v0, t4, a3 or v0, v0, t6 or v0, v0, t5 andi t1, t1, 0xff00 or v0, v0, t1 addiu v1, v1, 1 5: sw v0, 0(a0) addiu a1, a1, 4 bne v1, a2, 2b addiu a0, a0, 4 6: jr ra nop END(qt_destStoreARGB32_asm_mips_dsp) LEAF_MIPS_DSP(comp_func_solid_Source_dsp_asm_x2) /* * a0 - const uint *dest * a1 - int length * a2 - uint color * a3 - uint ialpha */ beqz a1, 2f nop replv.ph a3, a3 li t9, 8388736 /* t9 = 0x800080 */ 1: lw t0, 0(a0) lw t1, 4(a0) or t2, t0, t1 /* if both dest are zero, no computation needed */ beqz t2, 12f addiu a1, -2 BYTE_MUL_x2 t0, t1, t6, t7, a3, a3, t9, t2, t3, t4, t5, 0 11: addu t2, a2, t6 addu t3, a2, t7 sw t2, 0(a0) sw t3, 4(a0) bnez a1, 1b addiu a0, 8 b 2f 12: addu t2, a2, t0 addu t3, a2, t1 sw t2, 0(a0) sw t3, 4(a0) bnez a1, 1b addiu a0, 8 2: jr ra nop END(comp_func_solid_Source_dsp_asm_x2) LEAF_MIPS_DSP(comp_func_solid_DestinationOver_dsp_asm_x2) /* * a0 - uint *dest * a1 - int length * a2 - uint color */ addiu sp, sp, -8 sw s0, 0(sp) sw s1, 4(sp) beqz a1, 2f nop beqz a2, 2f nop li t9, 8388736 /* t4 = 0x800080 */ 1: lw t0, 0(a0) lw t1, 4(a0) not t2, t0 not t3, t1 srl t4, t2, 24 srl t5, t3, 24 or t2, t4, t5 /* if both dest are zero, no computation needed */ beqz t2, 11f addiu a1, -2 replv.ph t2, t4 replv.ph t3, t5 BYTE_MUL_x2 a2, a2, t8, a3, t2, t3, t9, t4, t5, t6, t7 addu t0, t0, t8 addu t1, t1, a3 11: sw t0, 0(a0) sw t1, 4(a0) bnez a1, 1b addiu a0, 8 2: lw s0, 0(sp) lw s1, 4(sp) addiu sp, sp, 8 jr ra nop END(comp_func_solid_DestinationOver_dsp_asm_x2) LEAF_MIPS_DSP(comp_func_DestinationOver_dsp_asm_x2) /* * a0 - uint *dest * a1 - uint *src * a2 - int length * a3 - uint const_alpha */ .set noat addiu sp, sp, -8 sw s0, 0(sp) sw s1, 4(sp) beqz a2, 3f nop li t9, 8388736 /* t4 = 0x800080 */ li t0, 0xff beq a3, t0, 2f nop /* part where const_alpha != 255 */ 1: replv.ph a3, a3 11: lw t0, 0(a1) # src_1 lw t1, 4(a1) # src_2 addiu a2, -2 BYTE_MUL_x2 t0, t1, t8, AT, a3, a3, t9, t4, t5, t6, t7, 0 # t8 = s1 # AT = s2 lw t0, 0(a0) # dest_1 lw t1, 4(a0) # dest_2 addiu a1, 8 not t2, t0 not t3, t1 srl t4, t2, 24 srl t5, t3, 24 replv.ph t2, t4 # qAlpha(~d) 1 replv.ph t3, t5 # qAlpha(~d) 2 BYTE_MUL_x2 t8, AT, s0, s1, t2, t3, t9, t4, t5, t6, t7 addu t0, t0, s0 addu t1, t1, s1 sw t0, 0(a0) sw t1, 4(a0) bnez a2, 11b addiu a0, 8 b 3f nop /* part where const_alpha = 255 */ 2: lw t0, 0(a0) # dest 1 lw t1, 4(a0) # dest 2 lw s0, 0(a1) # src 1 lw s1, 4(a1) # src 2 not t2, t0 not t3, t1 srl t4, t2, 24 srl t5, t3, 24 replv.ph t2, t4 replv.ph t3, t5 addiu a1, 8 addiu a2, -2 BYTE_MUL_x2 s0, s1, t8, AT, t2, t3, t9, t4, t5, t6, t7 addu t0, t0, t8 addu t1, t1, AT sw t0, 0(a0) sw t1, 4(a0) bnez a2, 2b addiu a0, 8 3: lw s0, 0(sp) lw s1, 4(sp) addiu sp, sp, 8 jr ra nop .set at END(comp_func_DestinationOver_dsp_asm_x2) LEAF_MIPS_DSP(comp_func_solid_SourceIn_dsp_asm_x2) /* * a0 - uint *dest * a1 - int length * a2 - uint color * a3 - uint const_alpha */ .set noat addiu sp, -12 sw s0, 0(sp) sw s1, 4(sp) sw s2, 8(sp) beqz a1, 3f nop li t9, 8388736 /* t9 = 0x800080 (rounding_factor) */ lui t8, 0xff00 li t0, 0xff beq a3, t0, 2f ori t8, t8, 0xff00 /* t8 = 0xff00ff00 (andi_factor) */ /* part where const_alpha != 255 */ 1: replv.ph t0, a3 li t5, 0xff BYTE_MUL a2, a2, t0, t9, t1, t2, t3, t4 /* a2 = color ( = BYTE_MUL(color, const_alpha)); */ subu t1, t5, a3 /* t1 = cia = 255 - const_alpha */ 11: lw t2, 0(a0) /* t2 = d */ lw s0, 4(a0) addiu a1, -2 srl t3, t2, 24 /* t3 = qAlpha(d) */ srl s2, s0, 24 INTERPOLATE_PIXEL_255 a2, t3, t2, t1, AT, t9, t8, t4, t5, t6, t7 INTERPOLATE_PIXEL_255 a2, s2, s0, t1, s1, t9, t8, t4, t5, t6, t7 sw AT, 0(a0) sw s1, 4(a0) bnez a1, 11b addiu a0, 8 b 3f nop /* part where const_alpha = 255 */ 2: lw t0, 0(a0) /* dest 1 */ lw t1, 4(a0) /* dest 2 */ srl t4, t0, 24 srl t5, t1, 24 replv.ph t2, t4 replv.ph t3, t5 addiu a1, -2 BYTE_MUL_x2 a2, a2, t8, AT, t2, t3, t9, t4, t5, t6, t7 sw t8, 0(a0) sw AT, 4(a0) bnez a1, 2b addiu a0, 8 3: lw s0, 0(sp) lw s1, 4(sp) lw s2, 8(sp) addiu sp, 12 jr ra nop .set at END(comp_func_solid_SourceIn_dsp_asm_x2) LEAF_MIPS_DSP(comp_func_SourceIn_dsp_asm_x2) /* * a0 - uint *dest * a1 - const uint *src * a2 - int length * a3 - uint const_alpha */ .set noat addiu sp, -16 sw s0, 0(sp) sw s1, 4(sp) sw s2, 8(sp) sw s3, 12(sp) beqz a2, 3f nop li t9, 8388736 /* t9 = 0x800080 (rounding_factor) */ lui t8, 0xff00 li t0, 0xff beq a3, t0, 2f ori t8, t8, 0xff00 /* t8 = 0xff00ff00 (andi_factor) */ /* part where const_alpha != 255 */ 1: li t5, 0xff subu t7, t5, a3 /* t7 = cia = 255 - const_alpha */ replv.ph a3, a3 11: lw t0, 0(a1) /* t0 = src 1 */ lw t1, 4(a1) /* t1 = src 2 */ addiu a2, -2 BYTE_MUL_x2 t0, t1, AT, s0, a3, a3, t9, t3, t4, t5, t6, 0 lw t0, 0(a0) /* t0 = dest 1 */ lw t1, 4(a0) /* t1 = dest 2 */ addiu a1, 8 srl t2, t0, 24 /* t2 = qAlpha(d) 1 */ srl t3, t1, 24 /* t3 = qAlpha(d) 2 */ INTERPOLATE_PIXEL_255 AT, t2, t0, t7, s1, t9, t8, t4, t5, t6, s3 INTERPOLATE_PIXEL_255 s0, t3, t1, t7, s2, t9, t8, t4, t5, t6, s3 sw s1, 0(a0) sw s2, 4(a0) bnez a2, 11b addiu a0, 8 b 3f nop /* part where const_alpha = 255 */ 2: lw t2, 0(a0) /* dest 1 */ lw t3, 4(a0) /* dest 2 */ lw t0, 0(a1) /* src 1 */ lw t1, 4(a1) /* src 2 */ srl t4, t2, 24 srl t5, t3, 24 replv.ph t2, t4 replv.ph t3, t5 addiu a2, -2 BYTE_MUL_x2 t0, t1, t8, AT, t2, t3, t9, t4, t5, t6, t7 addiu a1, 8 sw t8, 0(a0) sw AT, 4(a0) bnez a2, 2b addiu a0, 8 3: lw s0, 0(sp) lw s1, 4(sp) lw s2, 8(sp) lw s3, 12(sp) addiu sp, 16 jr ra nop .set at END(comp_func_SourceIn_dsp_asm_x2) LEAF_MIPS_DSP(comp_func_solid_DestinationIn_dsp_asm_x2) /* * a0 - uint *dest * a1 - int length * a2 - uint a */ .set noat beqz a1, 2f nop li t9, 8388736 /* t9 = 0x800080 (rounding_factor) */ replv.ph a2, a2 1: lw t0, 0(a0) lw t1, 4(a0) addiu a1, -2 BYTE_MUL_x2 t0, t1, t8, AT, a2, a2, t9, t4, t5, t6, t7, 0 sw t8, 0(a0) sw AT, 4(a0) bnez a1, 1b addiu a0, 8 2: jr ra nop .set at END(comp_func_solid_DestinationIn_dsp_asm_x2) LEAF_MIPS_DSP(comp_func_DestinationIn_dsp_asm_x2) /* * a0 - uint *dest * a1 - const uint *src * a2 - int length * a3 - uint const_alpha */ addiu sp, -8 sw s0, 0(sp) sw s1, 4(sp) beqz a2, 3f nop li t9, 8388736 /* t9 = 0x800080 (rounding_factor) */ li t0, 0xff beq a3, t0, 2f nop /* part where const_alpha != 255 */ 1: li t5, 0xff subu t8, t5, a3 /* t8 = cia = 255 - const_alpha */ replv.ph a3, a3 11: lw t0, 0(a1) /* t0 = src 1 */ lw t1, 4(a1) /* t1 = src 2 */ addiu a2, -2 srl t0, t0, 24 srl t1, t1, 24 BYTE_MUL_x2 t0, t1, s1, t7, a3, a3, t9, t3, t4, t5, t6, 0 lw t0, 0(a0) /* t0 = dest 1 */ lw t1, 4(a0) /* t1 = dest 2 */ addu s1, s1, t8 /* a 1 */ addu t7, t7, t8 /* a 2 */ replv.ph t2, s1 replv.ph t3, t7 BYTE_MUL_x2 t0, t1, s1, t7, t2, t3, t9, t4, t5, t6, s0 addiu a1, 8 sw s1, 0(a0) sw t7, 4(a0) bnez a2, 11b addiu a0, 8 b 3f nop /* part where const_alpha = 255 */ 2: lw t2, 0(a1) /* src 1 */ lw t3, 4(a1) /* src 2 */ lw t0, 0(a0) /* dest 1 */ lw t1, 4(a0) /* dest 2 */ srl t4, t2, 24 srl t5, t3, 24 replv.ph t2, t4 /* t2 = qAlpha(src 1) */ replv.ph t3, t5 /* t3 = qAlpha(src 2) */ addiu a2, -2 BYTE_MUL_x2 t0, t1, t8, s1, t2, t3, t9, t4, t5, t6, t7 addiu a1, 8 sw t8, 0(a0) sw s1, 4(a0) bnez a2, 2b addiu a0, 8 3: lw s0, 0(sp) lw s1, 4(sp) addiu sp, 8 jr ra nop END(comp_func_DestinationIn_dsp_asm_x2) LEAF_MIPS_DSP(comp_func_DestinationOut_dsp_asm_x2) /* * a0 - uint *dest * a1 - const uint *src * a2 - int length * a3 - uint const_alpha */ .set noat addiu sp, -4 sw s0, 0(sp) beqz a2, 3f nop li t9, 8388736 /* t9 = 0x800080 (rounding_factor) */ li t0, 0xff beq a3, t0, 2f nop /* part where const_alpha != 255 */ 1: li t5, 0xff subu t8, t5, a3 /* t8 = cia = 255 - const_alpha */ replv.ph a3, a3 11: lw t0, 0(a1) /* t0 = src 1 */ lw t1, 4(a1) /* t1 = src 2 */ not t0, t0 not t1, t1 addiu a2, -2 srl t0, t0, 24 srl t1, t1, 24 BYTE_MUL_x2 t0, t1, AT, t7, a3, a3, t9, t3, t4, t5, t6, 0 lw t0, 0(a0) /* t0 = dest 1 */ lw t1, 4(a0) /* t1 = dest 2 */ addu AT, AT, t8 /* a 1 */ addu t7, t7, t8 /* a 2 */ replv.ph t2, AT replv.ph t3, t7 BYTE_MUL_x2 t0, t1, AT, t7, t2, t3, t9, t4, t5, t6, s0 addiu a1, 8 sw AT, 0(a0) sw t7, 4(a0) bnez a2, 11b addiu a0, 8 b 3f nop /* part where const_alpha = 255 */ 2: lw t2, 0(a1) /* src 1 */ lw t3, 4(a1) /* src 2 */ not t2, t2 not t3, t3 lw t0, 0(a0) /* dest 1 */ lw t1, 4(a0) /* dest 2 */ srl t4, t2, 24 srl t5, t3, 24 replv.ph t2, t4 /* t2 = qAlpha(src 1) */ replv.ph t3, t5 /* t3 = qAlpha(src 2) */ addiu a2, -2 BYTE_MUL_x2 t0, t1, t8, AT, t2, t3, t9, t4, t5, t6, t7 addiu a1, 8 sw t8, 0(a0) sw AT, 4(a0) bnez a2, 2b addiu a0, 8 3: lw s0, 0(sp) addiu sp, 4 jr ra nop .set at END(comp_func_DestinationOut_dsp_asm_x2) LEAF_MIPS_DSP(comp_func_solid_SourceAtop_dsp_asm_x2) /* * a0 - uint *dest * a1 - int length * a2 - uint color * a3 - uint sia */ .set noat addu sp, -4 sw s0, 0(sp) beqz a1, 2f nop li t9, 8388736 /* t9 = 0x800080 (rounding_factor) */ lui t8, 0xff00 ori t8, t8, 0xff00 /* t8 = 0xff00ff00 (andi_factor) */ 1: lw t0, 0(a0) /* t0 = dest 1 */ lw t1, 4(a0) /* t1 = dest 2 */ addiu a1, -2 srl t2, t0, 24 /* t2 = qAlpha(dest 1) */ srl t3, t1, 24 /* t3 = qAlpha(dest 2) */ INTERPOLATE_PIXEL_255 a2, t2, t0, a3, AT, t9, t8, t4, t5, t6, t7 INTERPOLATE_PIXEL_255 a2, t3, t1, a3, s0, t9, t8, t4, t5, t6, t7 sw AT, 0(a0) sw s0, 4(a0) bnez a1, 1b addiu a0, 8 2: lw s0, 0(sp) addiu sp, 4 jr ra nop .set at END(comp_func_solid_SourceAtop_dsp_asm_x2) LEAF_MIPS_DSP(comp_func_SourceAtop_dsp_asm_x2) /* * a0 - uint *dest * a1 - const uint *src * a2 - int length * a3 - uint const_alpha */ .set noat addiu sp, -20 sw s0, 0(sp) sw s1, 4(sp) sw s2, 8(sp) sw s3, 12(sp) sw s4, 16(sp) beqz a2, 3f nop li t9, 8388736 /* t9 = 0x800080 (rounding_factor) */ lui t8, 0xff00 li t0, 0xff beq a3, t0, 2f ori t8, t8, 0xff00 /* t8 = 0xff00ff00 (andi_factor) */ /* part where const_alpha != 255 */ 1: replv.ph a3, a3 11: lw AT, 0(a1) /* src 1 */ lw s0, 4(a1) /* src 2 */ BYTE_MUL_x2 AT, s0, t0, t1, a3, a3, t9, t3, t4, t5, t6, 0 /* t0 = s */ lw t2, 0(a0) /* t2 = dest 1 */ lw t3, 4(a0) /* t3 = dest 2 */ srl t4, t2, 24 /* t4 = qAplpha(dest 1) */ srl t5, t3, 24 not t6, t0 not t7, t1 srl t6, t6, 24 /* t6 = qAlpha(~s) */ srl t7, t7, 24 addiu a2, -2 INTERPOLATE_PIXEL_255 t0, t4, t2, t6, AT, t9, t8, s1, s2, s3, s4 INTERPOLATE_PIXEL_255 t1, t5, t3, t7, s0, t9, t8, s1, s2, s3, s4 addiu a1, 8 sw AT, 0(a0) sw s0, 4(a0) bnez a2, 11b addiu a0, 8 b 3f nop /* part where const_alpha = 255 */ 2: lw t2, 0(a0) /* dest 1 */ lw t3, 4(a0) /* dest 2 */ lw t0, 0(a1) /* src 1 */ lw t1, 4(a1) /* src 2 */ srl t4, t2, 24 srl t5, t3, 24 not t6, t0 not t7, t1 srl t6, t6, 24 srl t7, t7, 24 addiu a2, -2 INTERPOLATE_PIXEL_255 t0, t4, t2, t6, AT, t9, t8, s1, s2, s3, s4 INTERPOLATE_PIXEL_255 t1, t5, t3, t7, s0, t9, t8, s1, s2, s3, s4 addiu a1, 8 sw AT, 0(a0) sw s0, 4(a0) bnez a2, 2b addiu a0, 8 3: lw s0, 0(sp) lw s1, 4(sp) lw s2, 8(sp) lw s3, 12(sp) lw s4, 16(sp) addiu sp, 20 jr ra nop .set at END(comp_func_SourceAtop_dsp_asm_x2) LEAF_MIPS_DSP(comp_func_solid_DestinationAtop_dsp_asm_x2) /* * a0 - uint *dest * a1 - int length * a2 - uint color * a3 - uint a */ .set noat addiu sp, -4 sw s0, 0(sp) beqz a1, 2f nop li t9, 8388736 /* t9 = 0x800080 (rounding_factor) */ lui t8, 0xff00 ori t8, t8, 0xff00 /* t8 = 0xff00ff00 (andi_factor) */ 1: lw t0, 0(a0) /* t0 = dest 1 */ lw t1, 4(a0) /* t1 = dest 2 */ addiu a1, -2 not t2, t0 not t3, t1 srl t2, t2, 24 /* t2 = qAlpha(~(dest 1)) */ srl t3, t3, 24 /* t3 = qAlpha(~(dest 2)) */ INTERPOLATE_PIXEL_255 t0, a3, a2, t2, AT, t9, t8, t4, t5, t6, t7 INTERPOLATE_PIXEL_255 t1, a3, a2, t3, s0, t9, t8, t4, t5, t6, t7 sw AT, 0(a0) sw s0, 4(a0) bnez a1, 1b addiu a0, 8 2: lw s0, 0(sp) addiu sp, 4 jr ra nop .set at END(comp_func_solid_DestinationAtop_dsp_asm_x2) LEAF_MIPS_DSP(comp_func_DestinationAtop_dsp_asm_x2) /* * a0 - uint *dest * a1 - const uint *src * a2 - int length * a3 - uint const_alpha */ .set noat addiu sp, -24 sw s0, 0(sp) sw s1, 4(sp) sw s2, 8(sp) sw s3, 12(sp) sw s4, 16(sp) sw s5, 20(sp) beqz a2, 3f nop li t9, 8388736 /* t9 = 0x800080 (rounding_factor) */ lui t8, 0xff00 li t0, 0xff beq a3, t0, 2f ori t8, t8, 0xff00 /* t8 = 0xff00ff00 (andi_factor) */ /* part where const_alpha != 255 */ 1: li s5, 0xff subu s5, s5, a3 /* s5 = cia = 255 - const_alpha */ replv.ph a3, a3 11: lw AT, 0(a1) /* src 1 */ lw s0, 4(a1) /* src 2 */ BYTE_MUL_x2 AT, s0, t0, t1, a3, a3, t9, t3, t4, t5, t6, 0 /* t0 = s */ lw t2, 0(a0) /* t2 = dest 1 */ lw t3, 4(a0) /* t3 = dest 2 */ not t4, t2 not t5, t3 srl t4, t4, 24 /* t4 = qAplpha(~(dest 1)) */ srl t5, t5, 24 srl t6, t0, 24 srl t7, t1, 24 addu t6, t6, s5 /* t6 = a = qAlpha(s1) + cia */ addu t7, t7, s5 addiu a2, -2 INTERPOLATE_PIXEL_255 t2, t6, t0, t4, AT, t9, t8, s1, s2, s3, s4 INTERPOLATE_PIXEL_255 t3, t7, t1, t5, s0, t9, t8, s1, s2, s3, s4 addiu a1, 8 sw AT, 0(a0) sw s0, 4(a0) bnez a2, 11b addiu a0, 8 b 3f nop /* part where const_alpha = 255 */ 2: lw t2, 0(a0) /* d1 */ lw t3, 4(a0) /* d2 */ lw t0, 0(a1) /* s1 */ lw t1, 4(a1) /* s2 */ srl t4, t0, 24 /* t4 = qAlpha(s1) */ srl t5, t1, 24 not t6, t2 not t7, t3 srl t6, t6, 24 /* qAlpha(~d1) */ srl t7, t7, 24 addiu a2, -2 INTERPOLATE_PIXEL_255 t2, t4, t0, t6, AT, t9, t8, s1, s2, s3, s4 INTERPOLATE_PIXEL_255 t3, t5, t1, t7, s0, t9, t8, s1, s2, s3, s4 addiu a1, 8 sw AT, 0(a0) sw s0, 4(a0) bnez a2, 2b addiu a0, 8 3: lw s0, 0(sp) lw s1, 4(sp) lw s2, 8(sp) lw s3, 12(sp) lw s4, 16(sp) lw s5, 20(sp) addiu sp, 24 jr ra nop .set at END(comp_func_DestinationAtop_dsp_asm_x2) LEAF_MIPS_DSP(comp_func_solid_XOR_dsp_asm_x2) /* * a0 - uint *dest * a1 - int length * a2 - uint color * a3 - uint sia */ .set noat addu sp, -4 sw s0, 0(sp) beqz a1, 2f nop li t9, 8388736 /* t9 = 0x800080 (rounding_factor) */ lui t8, 0xff00 ori t8, t8, 0xff00 /* t8 = 0xff00ff00 (andi_factor) */ 1: lw t0, 0(a0) /* t0 = dest 1 */ lw t1, 4(a0) /* t1 = dest 2 */ addiu a1, -2 not t2, t0 not t3, t1 srl t2, t2, 24 /* t2 = qAlpha(~(dest 1)) */ srl t3, t3, 24 /* t3 = qAlpha(~(dest 2)) */ INTERPOLATE_PIXEL_255 a2, t2, t0, a3, AT, t9, t8, t4, t5, t6, t7 INTERPOLATE_PIXEL_255 a2, t3, t1, a3, s0, t9, t8, t4, t5, t6, t7 sw AT, 0(a0) sw s0, 4(a0) bnez a1, 1b addiu a0, 8 2: lw s0, 0(sp) addu sp, 4 jr ra nop .set at END(comp_func_solid_XOR_dsp_asm_x2) LEAF_MIPS_DSP(comp_func_XOR_dsp_asm_x2) /* * a0 - uint *dest * a1 - const uint *src * a2 - int length * a3 - uint const_alpha */ .set noat addiu sp, -20 sw s0, 0(sp) sw s1, 4(sp) sw s2, 8(sp) sw s3, 12(sp) sw s4, 16(sp) beqz a2, 3f nop li t9, 8388736 /* t9 = 0x800080 (rounding_factor) */ lui t8, 0xff00 li t0, 0xff beq a3, t0, 2f ori t8, t8, 0xff00 /* t8 = 0xff00ff00 (andi_factor) */ /* part where const_alpha != 255 */ 1: replv.ph a3, a3 11: lw AT, 0(a1) /* src 1 */ lw s0, 4(a1) /* src 2 */ BYTE_MUL_x2 AT, s0, t0, t1, a3, a3, t9, t3, t4, t5, t6, 0 /* t0 = s1 */ /* t1 = s2 */ lw t2, 0(a0) /* t2 = dest 1 */ lw t3, 4(a0) /* t3 = dest 2 */ not t4, t2 not t5, t3 srl t4, t4, 24 /* t4 = qAplpha(~(dest 1)) */ srl t5, t5, 24 not t6, t0 not t7, t1 srl t6, t6, 24 /* t6 = qAlpha(~s) */ srl t7, t7, 24 addiu a2, -2 INTERPOLATE_PIXEL_255 t0, t4, t2, t6, AT, t9, t8, s1, s2, s3, s4 INTERPOLATE_PIXEL_255 t1, t5, t3, t7, s0, t9, t8, s1, s2, s3, s4 addiu a1, 8 sw AT, 0(a0) sw s0, 4(a0) bnez a2, 11b addiu a0, 8 b 3f nop /* part where const_alpha = 255 */ 2: lw t2, 0(a0) /* d1 */ lw t3, 4(a0) /* d2 */ lw t0, 0(a1) /* s1 */ lw t1, 4(a1) /* s2 */ not t4, t0 not t5, t1 srl t4, t4, 24 /* t4 = qAlpha(~s1) */ srl t5, t5, 24 not t6, t2 not t7, t3 srl t6, t6, 24 /* qAlpha(~d1) */ srl t7, t7, 24 addiu a2, -2 INTERPOLATE_PIXEL_255 t0, t6, t2, t4, AT, t9, t8, s1, s2, s3, s4 INTERPOLATE_PIXEL_255 t1, t7, t3, t5, s0, t9, t8, s1, s2, s3, s4 addiu a1, 8 sw AT, 0(a0) sw s0, 4(a0) bnez a2, 2b addiu a0, 8 3: lw s0, 0(sp) lw s1, 4(sp) lw s2, 8(sp) lw s3, 12(sp) lw s4, 16(sp) addiu sp, 20 jr ra nop .set at END(comp_func_XOR_dsp_asm_x2) LEAF_MIPS_DSP(comp_func_solid_SourceOut_dsp_asm_x2) /* * a0 - uint *dest * a1 - int length * a2 - uint color * a3 - uint const_alpha */ .set noat addiu sp, -12 sw s0, 0(sp) sw s1, 4(sp) sw s2, 8(sp) beqz a1, 3f nop li t9, 8388736 /* t9 = 0x800080 (rounding_factor) */ lui t8, 0xff00 li t0, 0xff beq a3, t0, 2f ori t8, t8, 0xff00 /* t8 = 0xff00ff00 (andi_factor) */ /* part where const_alpha != 255 */ 1: replv.ph t0, a3 li t5, 0xff BYTE_MUL a2, a2, t0, t9, t1, t2, t3, t4 /* a2 = color ( = BYTE_MUL(color, const_alpha)); */ subu t1, t5, a3 /* t1 = cia = 255 - const_alpha */ 11: lw t2, 0(a0) /* t2 = d1 */ lw s0, 4(a0) /* s0 = d2 */ addiu a1, -2 not t3, t2 not s2, s0 srl t3, t3, 24 /* t3 = qAlpha(~d1) */ srl s2, s2, 24 /* s2 = qAlpha(~d2) */ INTERPOLATE_PIXEL_255 a2, t3, t2, t1, AT, t9, t8, t4, t5, t6, t7 INTERPOLATE_PIXEL_255 a2, s2, s0, t1, s1, t9, t8, t4, t5, t6, t7 sw AT, 0(a0) sw s1, 4(a0) bnez a1, 11b addiu a0, 8 b 3f nop /* part where const_alpha = 255 */ 2: lw t0, 0(a0) /* dest 1 */ lw t1, 4(a0) /* dest 2 */ not t4, t0 not t5, t1 srl t4, t4, 24 srl t5, t5, 24 replv.ph t2, t4 replv.ph t3, t5 addiu a1, -2 BYTE_MUL_x2 a2, a2, t8, AT, t2, t3, t9, t4, t5, t6, t7 sw t8, 0(a0) sw AT, 4(a0) bnez a1, 2b addiu a0, 8 3: lw s0, 0(sp) lw s1, 4(sp) lw s2, 8(sp) addiu sp, 12 jr ra nop .set at END(comp_func_solid_SourceOut_dsp_asm_x2) LEAF_MIPS_DSP(comp_func_SourceOut_dsp_asm_x2) /* * a0 - uint *dest * a1 - const uint *src * a2 - int length * a3 - uint const_alpha */ .set noat addiu sp, -16 sw s0, 0(sp) sw s1, 4(sp) sw s2, 8(sp) sw s3, 12(sp) beqz a2, 3f nop li t9, 8388736 /* t9 = 0x800080 (rounding_factor) */ lui t8, 0xff00 li t0, 0xff beq a3, t0, 2f ori t8, t8, 0xff00 /* t8 = 0xff00ff00 (andi_factor) */ /* part where const_alpha != 255 */ 1: li t5, 0xff subu t7, t5, a3 /* t7 = cia = 255 - const_alpha */ replv.ph a3, a3 11: lw t0, 0(a1) /* t0 = src 1 */ lw t1, 4(a1) /* t1 = src 2 */ addiu a2, -2 BYTE_MUL_x2 t0, t1, AT, s0, a3, a3, t9, t3, t4, t5, t6, 0 lw t0, 0(a0) /* t0 = dest 1 */ lw t1, 4(a0) /* t1 = dest 2 */ addiu a1, 8 not t2, t0 not t3, t1 srl t2, t2, 24 /* t2 = qAlpha(~d1) */ srl t3, t3, 24 /* t3 = qAlpha(~d2) */ INTERPOLATE_PIXEL_255 AT, t2, t0, t7, s1, t9, t8, t4, t5, t6, s3 INTERPOLATE_PIXEL_255 s0, t3, t1, t7, s2, t9, t8, t4, t5, t6, s3 sw s1, 0(a0) sw s2, 4(a0) bnez a2, 11b addiu a0, 8 b 3f nop /* part where const_alpha = 255 */ 2: lw t2, 0(a0) /* dest 1 */ lw t3, 4(a0) /* dest 2 */ lw t0, 0(a1) /* src 1 */ lw t1, 4(a1) /* src 2 */ not t4, t2 not t5, t3 srl t4, t4, 24 /* qAlpha(~d1) */ srl t5, t5, 24 /* qAlpha(~d2) */ replv.ph t2, t4 replv.ph t3, t5 addiu a2, -2 BYTE_MUL_x2 t0, t1, t8, AT, t2, t3, t9, t4, t5, t6, t7 addiu a1, 8 sw t8, 0(a0) sw AT, 4(a0) bnez a2, 2b addiu a0, 8 3: lw s0, 0(sp) lw s1, 4(sp) lw s2, 8(sp) lw s3, 12(sp) addiu sp, 16 jr ra nop .set at END(comp_func_SourceOut_dsp_asm_x2) LEAF_MIPS_DSP(comp_func_Source_dsp_asm_x2) /* * a0 - uint *dest * a1 - const uint *src * a2 - int length * a3 - uint const_alpha */ .set noat addiu sp, -8 sw s0, 0(sp) sw s1, 4(sp) beqz a2, 2f nop li t9, 8388736 /* t9 = 0x800080 (rounding_factor) */ lui t8, 0xff00 ori t8, t8, 0xff00 /* t8 = 0xff00ff00 (andi_factor) */ li t7, 0xff subu t7, t7, a3 /* t7 = ialpha */ 1: lw t0, 0(a0) /* t0 = dest 1 */ lw t1, 4(a0) /* t1 = dest 2 */ lw t2, 0(a1) /* t2 = src 1 */ lw t3, 4(a1) /* t3 = src 2 */ addiu a2, -2 addiu a1, 8 INTERPOLATE_PIXEL_255 t2, a3, t0, t7, AT, t9, t8, t4, t5, t6, s1 INTERPOLATE_PIXEL_255 t3, a3, t1, t7, s0, t9, t8, t4, t5, t6, s1 sw AT, 0(a0) sw s0, 4(a0) bnez a2, 1b addiu a0, 8 2: lw s0, 0(sp) lw s1, 4(sp) addiu sp, 8 jr ra nop .set at END(comp_func_Source_dsp_asm_x2) LEAF_MIPS_DSP(qt_blend_argb32_on_argb32_mips_dsp_asm_x2) /* * a0 - uint *dest * a1 - const uint *src * a2 - int length * a3 - uint const_alpha */ .set noat addiu sp, -12 sw s0, 0(sp) sw s1, 4(sp) sw s2, 8(sp) beqz a2, 2f nop replv.ph a3, a3 li t9, 8388736 /* t9 = 0x800080 (rounding_factor) */ 1: lw t0, 0(a1) /* t0 = src 1 */ lw t1, 4(a1) /* t1 = src 2 */ addiu a2, -2 BYTE_MUL_x2 t0, t1, AT, t7, a3, a3, t9, t3, t4, t5, t6, 0 lw t0, 0(a0) /* t0 = dest 1 */ lw t1, 4(a0) /* t1 = dest 2 */ not s1, AT not s2, t7 srl s1, s1, 24 /* s1 = qAlpha(~s1) */ srl s2, s2, 24 /* s2 = qAlpha(~s2) */ replv.ph s1, s1 replv.ph s2, s2 BYTE_MUL_x2 t0, t1, t2, t3, s1, s2, t9, t4, t5, t6, s0 addiu a1, 8 addu AT, AT, t2 addu t7, t7, t3 sw AT, 0(a0) sw t7, 4(a0) bnez a2, 1b addiu a0, 8 2: lw s0, 0(sp) lw s1, 4(sp) lw s2, 8(sp) addiu sp, 12 jr ra nop .set at END(qt_blend_argb32_on_argb32_mips_dsp_asm_x2) LEAF_MIPS_DSP(qt_blend_argb32_on_argb32_const_alpha_256_mips_dsp_asm) /* * a0 - uint *dest * a1 - const uint *src * a2 - int length */ beqz a2, 5f nop li t7, 8388736 /* t7 = 0x800080 */ b 2f nop 1: addiu a0, a0, 4 addiu a2, a2, -1 beqz a2, 5f nop 2: lw t0, 0(a1) /* t0 = s = src[i] */ addiu a1, a1, 4 nor t1, t0, zero srl t1, t1, 24 /* t1 = ~qAlpha(s) */ bnez t1, 3f nop sw t0, 0(a0) /* dst[i] = src[i] */ addiu a2, a2, -1 bnez a2, 2b addiu a0, a0, 4 b 5f nop 3: beqz t0, 1b replv.ph t6, t1 /* | 0 | qAlpha(~s) | 0 | qAlpha(~s) | */ lw t4, 0(a0) addiu a2, a2, -1 beqz t4, 31f move t8, zero BYTE_MUL t4, t8, t6, t7, t1, t2, t3, t4 31: addu t8, t0, t8 /* dst[i] = * s + BYTE_MUL(dst[i],~qAlpha(s)) */ sw t8, 0(a0) bnez a2, 2b addiu a0, a0, 4 b 5f nop 5: jr ra nop END(qt_blend_argb32_on_argb32_const_alpha_256_mips_dsp_asm) #if defined(__MIPSEL) && __MIPSEL # define PACK(r, s, t) packrl.ph r, s, t # define SWHI(r, o, b) swl r, o + 1 (b) # define SWLO(r, o, b) swr r, o + 0 (b) # define LDHI(r, o, b) lwl r, o + 1 (b) # define LDLO(r, o, b) lwr r, o + 2 (b) #else # define PACK(r, s, t) packrl.ph r, t, s # define SWHI(r, o, b) swr r, o + 1 (b) # define SWLO(r, o, b) swl r, o + 0 (b) # define LDHI(r, o, b) lwr r, o + 1 (b) # define LDLO(r, o, b) lwl r, o + 2 (b) #endif LEAF_MIPS_DSP(qt_blend_rgb16_on_rgb16_const_alpha_256_mips_dsp_asm) /* * a0 - dst (*r5g6b5) * a1 - src (const *r5g6b5) * a2 - len (unsigned int) * * Register usage: * t0-3 - Scratch registers * t4 - Number of iterations to do in unrolled loops * t5-7 - Auxiliary scratch registers. * * Check if base addresses of src/dst are aligned, cases: * a) Both aligned. * b) Both unaligned: * 1. Copy a halfword * 2. Use aligned case. * c) dst aligned, src unaligned: * 1. Read a word from dst, halfword from src. * 2. Continue reading words from both. * d) dst unaligned, src aligned: * 1. Read a word from src, halfword from dst. * 2. Continue reading words from both. */ beqz a2, 0f /* if (a2:len == 0): return */ andi t0, a0, 0x3 /* t0 = a0:dst % 4 */ andi t1, a1, 0x3 /* t1 = a1:dst % 4 */ or t2, t0, t1 /* t1 = t0 | t1 */ beqz t2, 4f /* both aligned */ nop beqz t0, 3f /* dst aligned, src unaligned */ nop beqz t1, 2f /* src aligned, dst unaligned */ nop /* * Both src/dst are unaligned: read 1 halfword from each, * the fall-off to continue with word-aligned copy. */ lhu t0, 0 (a1) /* t0 <- ((uint16_t*) src)[0] */ addiu a1, a1, 2 /* src++ */ addiu a2, a2,-1 /* len-- */ sh t0, 0 (a0) /* t1 -> ((uint16_t*) dst)[0] */ addiu a0, a0, 2 /* dst++ */ /* * Both src/dst pointers are word-aligned, process eight * items at a time in an unrolled loop. */ 4: beqz a2, 0f /* if (len == 0): return */ srl t4, a2, 3 /* t4 = len / 8 */ beqz t4, 5f /* if (t4 == 0): tail */ andi a2, a2, 0x07 /* len = len % 8 */ 1: lw t0, 0 (a1) lw t1, 4 (a1) lw t2, 8 (a1) lw t3, 12 (a1) addiu t4, t4, -1 /* t4-- */ addiu a1, a1, 16 /* src += 8 */ sw t0, 0 (a0) sw t1, 4 (a0) sw t2, 8 (a0) sw t3, 12 (a0) bnez t4, 1b addiu a0, a0, 16 /* dst += 8 */ b 5f nop /* * dst pointer is unaligned */ 2: beqz a2, 0f /* if (len == 0): return */ srl t4, a2, 3 /* t4 = len / 8 */ beqz t4, 5f /* if (t4 == 0): tail */ andi a2, a2, 0x07 /* len = len % 8 */ 1: lw t0, 0 (a1) lw t1, 4 (a1) lw t2, 8 (a1) lw t3, 12 (a1) addiu t4, t4, -1 /* t4-- */ addiu a1, a1, 16 /* src += 8 */ SWLO (t0, 0, a0) PACK (t5, t1, t0) PACK (t6, t2, t1) PACK (t7, t3, t2) SWHI (t3, 14, a0) sw t5, 2 (a0) sw t6, 6 (a0) sw t7, 10 (a0) bnez t4, 1b addiu a0, a0, 16 /* dst += 8 */ b 5f nop /* * src pointer is unaligned */ 3: beqz a2, 0f /* if (len == 0): return */ srl t4, a2, 3 /* t4 = len / 8 */ beqz t4, 5f /* if (t4 == 0): tail */ andi a2, a2, 0x07 /* len = len % 8 */ 1: LDHI (t0, 0, a1) lw t1, 2 (a1) lw t2, 6 (a1) lw t3, 10 (a1) LDLO (t5, 12, a1) addiu t4, t4, -1 /* t4-- */ addiu a1, a1, 16 /* src += 8 */ PACK (t0, t1, t0) PACK (t6, t2, t1) PACK (t7, t3, t2) sw t0, 0 (a0) PACK (t0, t5, t3) sw t6, 4 (a0) sw t7, 8 (a0) sw t0, 12 (a0) bnez t4, 1b addiu a0, a0, 16 /* dst += 8 */ 5: /* Process remaining items (a2:len < 4), one at a time */ beqz a2, 0f nop 1: lhu t0, 0 (a1) /* t0 <- ((uint16_t*) src)[0] */ addiu a2, a2,-1 /* len-- */ addiu a1, a1, 2 /* src++ */ sh t0, 0 (a0) /* to -> ((uint16_t*) dst)[0] */ bnez a2, 1b /* if (len != 0): loop */ addiu a0, a0, 2 /* dst++ */ 0: jr ra nop END(qt_blend_rgb16_on_rgb16_const_alpha_256_mips_dsp_asm) #undef LDHI #undef LDLO #undef PACK #undef SWHI #undef SWLO LEAF_MIPS_DSP(qt_blend_rgb16_on_rgb16_mips_dsp_asm) /* * a0 - dst (*r5g6b5) * a1 - src (const *r5g6b5) * a2 - len (unsigned int) - batch length * a3 - alpha (int) */ beqz a2, 2f li t9, 255 sll t8, a3, 8 subu a3, t8, a3 srl a3, a3, 8 subu t9, t9, a3 addiu a3, a3, 1 srl t4, a3, 2 addiu t9, t9, 1 srl t5, t9, 2 1: lhu t0, 0(a1) lhu t1, 0(a0) addiu a2, a2, -1 andi t2, t0, 0x07e0 andi t0, t0, 0xf81f mul t2, t2, a3 mul t0, t0, t4 andi t3, t1, 0x07e0 andi t1, t1, 0xf81f mul t3, t3, t9 mul t1, t1, t5 addiu a1, a1, 2 srl t2, t2, 8 srl t0, t0, 6 andi t2, t2, 0x07e0 andi t0, t0, 0xf81f or t0, t0, t2 srl t3, t3, 8 srl t1, t1, 6 andi t3, t3, 0x07e0 andi t1, t1, 0xf81f or t1, t1, t3 addu t0, t0, t1 sh t0, 0(a0) bgtz a2, 1b addiu a0, a0, 2 2: jr ra nop END(qt_blend_rgb16_on_rgb16_mips_dsp_asm) LEAF_MIPS_DSP(fetchUntransformed_888_asm_mips_dsp) /* * a0 - dst address (address of 32-bit aRGB value) * a1 - src address * a2 - length */ beqz a2, 4f lui t8, 0xff00 andi t0, a2, 0x1 beqz t0, 1f nop /* case for one pixel */ lbu t1, 0(a1) lbu v1, 2(a1) lbu t0, 1(a1) addiu a1, a1, 3 addiu a2, a2, -1 sll t1, t1, 0x10 or v1, v1, t8 sll t0, t0, 0x8 or v1, v1, t1 or v1, v1, t0 sw v1, 0(a0) addiu a0, a0, 4 beqz a2, 4f /* only one pixel is present (length = 1) */ nop 1: andi t0, a1, 0x1 beqz t0, 3f nop 2: lbu t0, 0(a1) /* t0 = | 0 | 0 | 0 | R1 | */ lhu t1, 1(a1) /* t1 = | 0 | 0 | B1 | G1 | */ addiu a1, a1, 3 lhu t2, 0(a1) /* t2 = | 0 | 0 | G2 | R2 | */ lbu t3, 2(a1) /* t3 = | 0 | 0 | 0 | B2 | */ sll t0, t0, 16 or t0, t0, t8 /* t0 = | ff | R1 | 0 | 0 | */ shll.ph t4, t1, 8 /* t4 = | 0 | 0 | G1 | 0 | */ srl t5, t1, 8 or t4, t4, t5 /* t4 = | 0 | 0 | G1 | B1 | */ or t0, t0, t4 /* t0 = | ff | R1 | G1 | B1 | */ shll.ph t4, t2, 8 /* t4 = | 0 | 0 | R2 | 0 | */ srl t5, t2, 8 /* t5 = | 0 | 0 | 0 | G2 | */ or t4, t4, t5 sll t4, t4, 8 /* t4 = | 0 | R2 | G2 | 0 | */ or t5, t3, t8 or t2, t4, t5 /* t2 = | ff | R2 | G2 | B2 | */ sw t0, 0(a0) addiu a1, a1, 3 sw t2, 4(a0) addiu a2, a2, -2 bnez a2, 2b addiu a0, a0, 8 b 4f nop 3: lhu t0, 0(a1) /* t0 = | 0 | 0 | G1 | R1 | */ lbu t1, 2(a1) /* t1 = | 0 | 0 | 0 | B1 | */ addiu a1, a1, 3 lbu t2, 0(a1) /* t2 = | 0 | 0 | 0 | R2 | */ lhu t3, 1(a1) /* t3 = | 0 | 0 | B2 | G2 | */ srl t4, t0, 8 /* t4 = | 0 | 0 | 0 | G1 | */ shll.ph t5, t0, 8 /* t5 = | 0 | 0 | R1 | 0 | */ or t0, t4, t5 sll t6, t0, 8 /* t6 = | 0 | R1 | G1 | 0 | */ or t4, t1, t8 /* t4 = | ff | 0 | 0 | B1 | */ or t0, t6, t4 sll t2, t2, 16 srl t4, t3, 8 shll.ph t5, t3, 8 or t3, t4, t5 or t2, t2, t3 or t2, t2, t8 sw t0, 0(a0) addiu a1, a1, 3 sw t2, 4(a0) addiu a2, a2, -2 bnez a2, 3b addiu a0, a0, 8 4: jr ra nop END(fetchUntransformed_888_asm_mips_dsp) LEAF_MIPS_DSP(fetchUntransformed_444_asm_mips_dsp) /* * a0 - dst address (address of 32-bit aRGB value) * a1 - src address * a2 - length */ lui t8, 0xff00 li t4, 0x1 beqz a2, 5f move v0, a0 /* just return the address of buffer * for storing returning values */ andi t0, a2, 0x1 beqz t0, 2f /* there is more then one pixel * (check src memory alignment (word)) */ nop 1: lhu v0, 0(a1) addiu a1, a1, 2 addiu a2, a2, -1 andi t0, v0, 0xf00 andi v1, v0, 0xf andi v0, v0, 0xf0 sra t3, t0, 0x4 sra t1, v0, 0x4 sra t0, t0, 0x8 sll t2, v1, 0x4 or t0, t0, t3 or v0, t1, v0 lui t1, 0xff00 or v1, t2, v1 sll t0, t0, 0x10 or v1, v1, t1 sll v0, v0, 0x8 or v1, v1, t0 or v0, v1, v0 sw v0, 0(a0) addiu a0, a0, 4 beqz a2, 5f /* no more pixels for processing */ nop beq a2, t4, 4f /* only one more pixel remained */ nop /* check if src memory address is word aligned */ 2: andi t0, a1, 0x3 beqz t0, 3f /* memory is word aligned */ andi a3, a2, 0x1 /* set the a3 register as the comparation * for ending the unrolled loop * (1 if odd, 0 if even) */ b 1b /* not word aligned, * go another turn with * just one pixel processing */ nop 3: lw t0, 0(a1) addiu a2, a2, -2 preceu.ph.qbr t1, t0 /* t1 = | 0 | aR1 | 0 | G1B1 | */ preceu.ph.qbl t2, t0 /* t1 = | 0 | aR2 | 0 | G2B2 | */ shll.qb t3, t1, 4 /* t3 = | 0 | R1 0 | 0 | B1 0 | */ srl t4, t3, 4 or t0, t3, t4 /* t0 = | 0 | R1R1 | 0 | B1B1 | */ andi t3, t1, 0xf0 sll t3, t3, 8 srl t4, t3, 4 or t1, t3, t4 or t0, t0, t1 /* t0 = | 0 | R1R1 | G1G1 | B1B1 | */ or t0, t0, t8 /* t0 = | ff | R1R1 | G1G1 | B1B1 | */ shll.qb t3, t2, 4 /* t3 = | 0 | R1 0 | 0 | B1 0 | */ srl t4, t3, 4 or t7, t3, t4 /* t0 = | 0 | R1R1 | 0 | B1B1 | */ andi t3, t2, 0xf0 sll t3, t3, 8 srl t4, t3, 4 or t1, t3, t4 or t2, t7, t1 /* t0 = | 0 | R1R1 | G1G1 | B1B1 | */ or t2, t2, t8 /* t0 = | ff | R1R1 | G1G1 | B1B1 | */ sw t0, 0(a0) addiu a1, a1, 4 sw t2, 4(a0) bne a2, a3, 3b addiu a0, a0, 8 beqz a2, 5f /* no more pixels for processing */ nop 4: /* one more pixel remained (after loop unrolling process finished) */ lhu v0, 0(a1) addiu a1, a1, 2 addiu a2, a2, -1 andi t0, v0, 0xf00 andi v1, v0, 0xf andi v0, v0, 0xf0 sra t3, t0, 0x4 sra t1, v0, 0x4 sra t0, t0, 0x8 sll t2, v1, 0x4 or t0, t0, t3 or v0, t1, v0 lui t1, 0xff00 or v1, t2, v1 sll t0, t0, 0x10 or v1, v1, t1 sll v0, v0, 0x8 or v1, v1, t0 or v0, v1, v0 sw v0, 0(a0) addiu a0, a0, 4 5: jr ra nop END(fetchUntransformed_444_asm_mips_dsp) LEAF_MIPS_DSP(fetchUntransformed_argb8565_premultiplied_asm_mips_dsp) /* * a0 - dst address * a1 - src address * a2 - length */ beqz a2, 2f nop 1: ulh t1, 0(a1) lbu t2, 2(a1) addiu a2, a2, -1 wsbh t1, t1 sll t0, t1, 8 /* t0 = 00000000rrrrrggggggbbbbb00000000 */ ins t0, t1, 3, 16 /* t0 = 00000000rrrrrrrrrrggggggbbbbb000 */ ins t0, t1, 5, 11 /* t0 = 00000000rrrrrrrrggggggbbbbbbb000 */ srl t4, t1, 9 /* t4 = 0000000000000000000000000rrrrrgg */ replv.qb t3, t2 ins t0, t4, 8, 2 /* t0 = 00000000rrrrrrrrggggggggbbbbb000 */ ins t0, t1, 3, 5 /* t0 = 00000000rrrrrrrrggggggggbbbbb000 */ srl t4, t1, 2 /* t4 = 000000000000000000rrrrrggggggbbb */ ins t0, t4, 0, 3 /* t0 = 00000000rrrrrrrrggggggggbbbbbbbb */ ins t0, t2, 24, 8 /* t0 =aaaaaaaarrrrrrrrggggggggbbbbbbbb */ cmpu.lt.qb t3, t0 pick.qb t0, t3, t0 addiu a1, a1, 3 sw t0, 0(a0) bgtz a2, 1b addiu a0, a0, 4 2: jr ra nop END(fetchUntransformed_argb8565_premultiplied_asm_mips_dsp)