diff options
author | Damir Tatalovic <dtatalovic@mips.com> | 2012-06-22 18:13:02 +0200 |
---|---|---|
committer | Qt by Nokia <qt-info@nokia.com> | 2012-07-03 01:55:44 +0200 |
commit | 614422c2e7396b27a9802b3a0d949dfd93ba688b (patch) | |
tree | 00ba3115c2d27f5bf5a95ea629e6618042624eaf /src/gui | |
parent | 60829b4a5692ebdd68b12830d7c722ccd31921ad (diff) |
MIPS DSP composition functions optimizations.
List of optimized routines:
- comp_func_DestinationOver
- comp_func_SourceIn
- comp_func_DestinationIn
- comp_func_DestinationOut
- comp_func_SourceAtop
- comp_func_DestinationAtop
- comp_func_XOR
- comp_func_SourceOut
- comp_func_solid_SourceOver
- comp_func_solid_DestinationOver
- comp_func_solid_SourceIn
- comp_func_solid_DestinationIn
- comp_func_solid_SourceAtop
- comp_func_solid_DestinationAtop
- copm_func_solid_XOR
- comp_func_solid_SourceOut
Previously optimized routines qt_blend_argb32_on_argb32_mips_dsp and
comp_func_Source_mips_dsp are redesigned and rewritten.
Overall improvement by running tst_bench_blendbench benchmark app
from tests/benchmarks/gui/image/blendbench/ is 27%.
Change-Id: I6ab09b17cac10f4aded59787074ab4c89e72ccac
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
Reviewed-by: Samuel Rødal <samuel.rodal@nokia.com>
Diffstat (limited to 'src/gui')
-rw-r--r-- | src/gui/painting/qdrawhelper.cpp | 17 | ||||
-rw-r--r-- | src/gui/painting/qdrawhelper_mips_dsp.cpp | 338 | ||||
-rw-r--r-- | src/gui/painting/qdrawhelper_mips_dsp_asm.S | 1303 | ||||
-rw-r--r-- | src/gui/painting/qdrawhelper_mips_dsp_p.h | 82 | ||||
-rw-r--r-- | src/gui/painting/qdrawhelper_mips_dspr2_asm.S | 53 | ||||
-rw-r--r-- | src/gui/painting/qt_mips_asm_dsp.h | 136 |
6 files changed, 1770 insertions, 159 deletions
diff --git a/src/gui/painting/qdrawhelper.cpp b/src/gui/painting/qdrawhelper.cpp index 08975daf71..985ef68401 100644 --- a/src/gui/painting/qdrawhelper.cpp +++ b/src/gui/painting/qdrawhelper.cpp @@ -5971,6 +5971,23 @@ void qInitDrawhelperAsm() #if defined(QT_COMPILER_SUPPORTS_MIPS_DSP) functionForMode_C[QPainter::CompositionMode_SourceOver] = comp_func_SourceOver_asm_mips_dsp; functionForMode_C[QPainter::CompositionMode_Source] = comp_func_Source_mips_dsp; + functionForMode_C[QPainter::CompositionMode_DestinationOver] = comp_func_DestinationOver_mips_dsp; + functionForMode_C[QPainter::CompositionMode_SourceIn] = comp_func_SourceIn_mips_dsp; + functionForMode_C[QPainter::CompositionMode_DestinationIn] = comp_func_DestinationIn_mips_dsp; + functionForMode_C[QPainter::CompositionMode_DestinationOut] = comp_func_DestinationOut_mips_dsp; + functionForMode_C[QPainter::CompositionMode_SourceAtop] = comp_func_SourceAtop_mips_dsp; + functionForMode_C[QPainter::CompositionMode_DestinationAtop] = comp_func_DestinationAtop_mips_dsp; + functionForMode_C[QPainter::CompositionMode_Xor] = comp_func_XOR_mips_dsp; + functionForMode_C[QPainter::CompositionMode_SourceOut] = comp_func_SourceOut_mips_dsp; + + functionForModeSolid_C[QPainter::CompositionMode_SourceOver] = comp_func_solid_SourceOver_mips_dsp; + functionForModeSolid_C[QPainter::CompositionMode_DestinationOver] = comp_func_solid_DestinationOver_mips_dsp; + functionForModeSolid_C[QPainter::CompositionMode_SourceIn] = comp_func_solid_SourceIn_mips_dsp; + functionForModeSolid_C[QPainter::CompositionMode_DestinationIn] = comp_func_solid_DestinationIn_mips_dsp; + functionForModeSolid_C[QPainter::CompositionMode_SourceAtop] = comp_func_solid_SourceAtop_mips_dsp; + functionForModeSolid_C[QPainter::CompositionMode_DestinationAtop] = comp_func_solid_DestinationAtop_mips_dsp; + functionForModeSolid_C[QPainter::CompositionMode_Xor] = comp_func_solid_XOR_mips_dsp; + functionForModeSolid_C[QPainter::CompositionMode_SourceOut] = comp_func_solid_SourceOut_mips_dsp; qt_memfill32 = qt_memfill32_asm_mips_dsp; diff --git a/src/gui/painting/qdrawhelper_mips_dsp.cpp b/src/gui/painting/qdrawhelper_mips_dsp.cpp index b33329c090..ec1d7d2a2a 100644 --- a/src/gui/painting/qdrawhelper_mips_dsp.cpp +++ b/src/gui/painting/qdrawhelper_mips_dsp.cpp @@ -45,24 +45,6 @@ QT_BEGIN_NAMESPACE -#if defined(QT_COMPILER_SUPPORTS_MIPS_DSP) - -extern "C" uint INTERPOLATE_PIXEL_255_asm_mips_dsp(uint x, uint a, uint y, uint b); - -extern "C" uint BYTE_MUL_asm_mips_dsp(uint x, uint a); - -extern "C" uint * destfetchARGB32_asm_mips_dsp(uint *buffer, const uint *data, int length); - -extern "C" uint * qt_destStoreARGB32_asm_mips_dsp(uint *buffer, const uint *data, int length); - -#if defined(QT_COMPILER_SUPPORTS_MIPS_DSPR2) - -extern "C" uint INTERPOLATE_PIXEL_255_asm_mips_dspr2(uint x, uint a, uint y, uint b); - -extern "C" uint BYTE_MUL_asm_mips_dspr2(uint x, uint a); - -#endif // QT_COMPILER_SUPPORTS_MIPS_DSPR2 - void qt_blend_argb32_on_argb32_mips_dsp(uchar *destPixels, int dbpl, const uchar *srcPixels, int sbpl, int w, int h, @@ -80,32 +62,21 @@ void qt_blend_argb32_on_argb32_mips_dsp(uchar *destPixels, int dbpl, uint *dst = (uint *) destPixels; if (const_alpha == 256) { for (int y=0; y<h; ++y) { - for (int x=0; x<w; ++x) { - uint s = src[x]; - if (s >= 0xff000000) - dst[x] = s; - else if (s != 0) -#if !defined(QT_COMPILER_SUPPORTS_MIPS_DSPR2) - dst[x] = s + BYTE_MUL_asm_mips_dsp(dst[x], qAlpha(~s)); -#else - dst[x] = s + BYTE_MUL_asm_mips_dspr2(dst[x], qAlpha(~s)); -#endif - } + qt_blend_argb32_on_argb32_const_alpha_256_mips_dsp_asm(dst, src, w); dst = (quint32 *)(((uchar *) dst) + dbpl); src = (const quint32 *)(((const uchar *) src) + sbpl); } } else if (const_alpha != 0) { const_alpha = (const_alpha * 255) >> 8; for (int y=0; y<h; ++y) { - for (int x=0; x<w; ++x) { -#if !defined(QT_COMPILER_SUPPORTS_MIPS_DSPR2) - uint s = BYTE_MUL_asm_mips_dsp(src[x], const_alpha); - dst[x] = s + BYTE_MUL_asm_mips_dsp(dst[x], qAlpha(~s)); -#else - uint s = BYTE_MUL_asm_mips_dspr2(src[x], const_alpha); - dst[x] = s + BYTE_MUL_asm_mips_dspr2(dst[x], qAlpha(~s)); -#endif + if (h%2 > 0) { + uint s = BYTE_MUL(src[0], const_alpha); + dst[0] = s + BYTE_MUL(dst[0], qAlpha(~s)); + h--; + dst++; + src++; } + qt_blend_argb32_on_argb32_mips_dsp_asm_x2(dst, src, h, const_alpha); dst = (quint32 *)(((uchar *) dst) + dbpl); src = (const quint32 *)(((const uchar *) src) + sbpl); } @@ -145,13 +116,13 @@ void comp_func_Source_mips_dsp(uint *dest, const uint *src, int length, uint con ::memcpy(dest, src, length * sizeof(uint)); } else { int ialpha = 255 - const_alpha; - for (int i = 0; i < length; ++i) { -#if !defined(QT_COMPILER_SUPPORTS_MIPS_DSPR2) - dest[i] = INTERPOLATE_PIXEL_255_asm_mips_dsp(src[i], const_alpha, dest[i], ialpha); -#else - dest[i] = INTERPOLATE_PIXEL_255_asm_mips_dspr2(src[i], const_alpha, dest[i], ialpha); -#endif + if (length%2 > 0) { + dest[0] = INTERPOLATE_PIXEL_255(src[0], const_alpha, dest[0], ialpha); + length--; + dest++; + src++; } + comp_func_Source_dsp_asm_x2(dest, src, length, const_alpha); } } @@ -171,6 +142,285 @@ void QT_FASTCALL qt_destStoreARGB32_mips_dsp(QRasterBuffer *rasterBuffer, int x, qt_destStoreARGB32_asm_mips_dsp(data, buffer, length); } -#endif // QT_COMPILER_SUPPORTS_MIPS_DSP +void QT_FASTCALL comp_func_solid_SourceOver_mips_dsp(uint *dest, int length, uint color, uint const_alpha) +{ + if (const_alpha != 255) + color = BYTE_MUL(color, const_alpha); + if (length%2 > 0) { + dest[0] = color + BYTE_MUL(dest[0], qAlpha(~color)); + length--; + dest++; + } + comp_func_solid_Source_dsp_asm_x2(dest, length, color, qAlpha(~color)); +} + +void QT_FASTCALL comp_func_solid_DestinationOver_mips_dsp(uint *dest, int length, uint color, uint const_alpha) +{ + if (const_alpha != 255) + color = BYTE_MUL(color, const_alpha); + if (length%2 > 0) { + uint d = dest[0]; + dest[0] = d + BYTE_MUL(color, qAlpha(~d)); + length--; + dest++; + } + comp_func_solid_DestinationOver_dsp_asm_x2(dest, length, color); +} + +void QT_FASTCALL comp_func_DestinationOver_mips_dsp(uint *dest, const uint *src, int length, uint const_alpha) +{ + if (length%2 > 0) { + if (const_alpha == 255) { + uint d = dest[0]; + dest[0] = d + BYTE_MUL(src[0], qAlpha(~d)); + } else { + uint d = dest[0]; + uint s = BYTE_MUL(src[0], const_alpha); + dest[0] = d + BYTE_MUL(s, qAlpha(~d)); + } + length--; + dest++; + src++; + } + comp_func_DestinationOver_dsp_asm_x2(dest, src, length, const_alpha); +} + +void QT_FASTCALL comp_func_solid_SourceIn_mips_dsp(uint *dest, int length, uint color, uint const_alpha) +{ + if (length%2 > 0) { + if (const_alpha == 255) { + dest[0] = BYTE_MUL(color, qAlpha(dest[0])); + } else { + uint tmp_color = BYTE_MUL(color, const_alpha); + uint cia = 255 - const_alpha; + uint d = dest[0]; + dest[0] = INTERPOLATE_PIXEL_255(tmp_color, qAlpha(d), d, cia); + } + length--; + dest++; + } + comp_func_solid_SourceIn_dsp_asm_x2(dest, length, color, const_alpha); +} + +void QT_FASTCALL comp_func_SourceIn_mips_dsp(uint *dest, const uint *src, int length, uint const_alpha) +{ + if (length%2 > 0) { + if (const_alpha == 255) { + dest[0] = BYTE_MUL(src[0], qAlpha(dest[0])); + } else { + uint cia = 255 - const_alpha; + uint d = dest[0]; + uint s = BYTE_MUL(src[0], const_alpha); + dest[0] = INTERPOLATE_PIXEL_255(s, qAlpha(d), d, cia); + } + length--; + dest++; + src++; + } + comp_func_SourceIn_dsp_asm_x2(dest, src, length, const_alpha); +} + +void QT_FASTCALL comp_func_solid_DestinationIn_mips_dsp(uint *dest, int length, uint color, uint const_alpha) +{ + uint a = qAlpha(color); + if (const_alpha != 255) { + a = BYTE_MUL(a, const_alpha) + 255 - const_alpha; + } + if (length%2 > 0) { + dest[0] = BYTE_MUL(dest[0], a); + length--; + dest++; + } + comp_func_solid_DestinationIn_dsp_asm_x2(dest, length, a); +} + +void QT_FASTCALL comp_func_DestinationIn_mips_dsp(uint *dest, const uint *src, int length, uint const_alpha) +{ + if (length%2 > 0) { + if (const_alpha == 255) { + dest[0] = BYTE_MUL(dest[0], qAlpha(src[0])); + } else { + int cia = 255 - const_alpha; + uint a = BYTE_MUL(qAlpha(src[0]), const_alpha) + cia; + dest[0] = BYTE_MUL(dest[0], a); + } + length--; + src++; + dest++; + } + comp_func_DestinationIn_dsp_asm_x2(dest, src, length, const_alpha); +} + +void QT_FASTCALL comp_func_solid_DestinationOut_mips_dsp(uint *dest, int length, uint color, uint const_alpha) +{ + uint a = qAlpha(~color); + if (const_alpha != 255) { + a = BYTE_MUL(a, const_alpha) + 255 - const_alpha; + } + if (length%2 > 0) { + dest[0] = BYTE_MUL(dest[0], a); + length--; + dest++; + } + comp_func_solid_DestinationIn_dsp_asm_x2(dest, length, a); +} + +void QT_FASTCALL comp_func_DestinationOut_mips_dsp(uint *dest, const uint *src, int length, uint const_alpha) +{ + if (length%2 > 0) { + if (const_alpha == 255) { + dest[0] = BYTE_MUL(dest[0], qAlpha(~src[0])); + } else { + int cia = 255 - const_alpha; + uint sia = BYTE_MUL(qAlpha(~src[0]), const_alpha) + cia; + dest[0] = BYTE_MUL(dest[0], sia); + } + length--; + dest++; + src++; + } + comp_func_DestinationOut_dsp_asm_x2(dest, src, length, const_alpha); +} + +void QT_FASTCALL comp_func_solid_SourceAtop_mips_dsp(uint *dest, int length, uint color, uint const_alpha) +{ + if (const_alpha != 255) { + color = BYTE_MUL(color, const_alpha); + } + uint sia = qAlpha(~color); + if (length%2 > 0) { + dest[0] = INTERPOLATE_PIXEL_255(color, qAlpha(dest[0]), dest[0], sia); + length--; + dest++; + } + comp_func_solid_SourceAtop_dsp_asm_x2(dest, length, color, sia); +} + +void QT_FASTCALL comp_func_SourceAtop_mips_dsp(uint *dest, const uint *src, int length, uint const_alpha) +{ + if (length%2 > 0) { + if (const_alpha == 255) { + uint s = src[0]; + uint d = dest[0]; + dest[0] = INTERPOLATE_PIXEL_255(s, qAlpha(d), d, qAlpha(~s)); + } else { + uint s = BYTE_MUL(src[0], const_alpha); + uint d = dest[0]; + dest[0] = INTERPOLATE_PIXEL_255(s, qAlpha(d), d, qAlpha(~s)); + } + length--; + dest++; + src++; + } + comp_func_SourceAtop_dsp_asm_x2(dest, src, length, const_alpha); +} + + +void QT_FASTCALL comp_func_solid_DestinationAtop_mips_dsp(uint *dest, int length, uint color, uint const_alpha) +{ + uint a = qAlpha(color); + if (const_alpha != 255) { + color = BYTE_MUL(color, const_alpha); + a = qAlpha(color) + 255 - const_alpha; + } + if (length%2 > 0) { + uint d = dest[0]; + dest[0] = INTERPOLATE_PIXEL_255(d, a, color, qAlpha(~d)); + length--; + dest++; + } + comp_func_solid_DestinationAtop_dsp_asm_x2(dest, length, color, a); +} + +void QT_FASTCALL comp_func_DestinationAtop_mips_dsp(uint *dest, const uint *src, int length, uint const_alpha) +{ + if (length%2 > 0) { + if (const_alpha == 255) { + uint s = src[0]; + uint d = dest[0]; + dest[0] = INTERPOLATE_PIXEL_255(d, qAlpha(s), s, qAlpha(~d)); + } else { + int cia = 255 - const_alpha; + uint s = BYTE_MUL(src[0], const_alpha); + uint d = dest[0]; + uint a = qAlpha(s) + cia; + dest[0] = INTERPOLATE_PIXEL_255(d, a, s, qAlpha(~d)); + } + length--; + dest++; + src++; + } + comp_func_DestinationAtop_dsp_asm_x2(dest, src, length, const_alpha); +} + +void QT_FASTCALL comp_func_solid_XOR_mips_dsp(uint *dest, int length, uint color, uint const_alpha) +{ + if (const_alpha != 255) + color = BYTE_MUL(color, const_alpha); + uint sia = qAlpha(~color); + + if (length%2 > 0) { + uint d = dest[0]; + dest[0] = INTERPOLATE_PIXEL_255(color, qAlpha(~d), d, sia); + length--; + dest++; + } + comp_func_solid_XOR_dsp_asm_x2(dest, length, color, sia); +} + +void QT_FASTCALL comp_func_XOR_mips_dsp(uint *dest, const uint *src, int length, uint const_alpha) +{ + if (length%2 > 0) { + if (const_alpha == 255) { + uint d = dest[0]; + uint s = src[0]; + dest[0] = INTERPOLATE_PIXEL_255(s, qAlpha(~d), d, qAlpha(~s)); + } else { + uint d = dest[0]; + uint s = BYTE_MUL(src[0], const_alpha); + dest[0] = INTERPOLATE_PIXEL_255(s, qAlpha(~d), d, qAlpha(~s)); + } + length--; + dest++; + src++; + } + comp_func_XOR_dsp_asm_x2(dest, src, length, const_alpha); +} + +void QT_FASTCALL comp_func_solid_SourceOut_mips_dsp(uint *dest, int length, uint color, uint const_alpha) +{ + if (length%2 > 0) { + if (const_alpha == 255) { + dest[0] = BYTE_MUL(color, qAlpha(~dest[0])); + } else { + uint tmp_color = BYTE_MUL(color, const_alpha); + int cia = 255 - const_alpha; + uint d = dest[0]; + dest[0] = INTERPOLATE_PIXEL_255(tmp_color, qAlpha(~d), d, cia); + } + length--; + dest++; + } + comp_func_solid_SourceOut_dsp_asm_x2(dest, length, color, const_alpha); +} + +void QT_FASTCALL comp_func_SourceOut_mips_dsp(uint *dest, const uint *src, int length, uint const_alpha) +{ + if (length%2 > 0) { + if (const_alpha == 255) { + dest[0] = BYTE_MUL(src[0], qAlpha(~dest[0])); + } else { + int cia = 255 - const_alpha; + uint s = BYTE_MUL(src[0], const_alpha); + uint d = dest[0]; + dest[0] = INTERPOLATE_PIXEL_255(s, qAlpha(~d), d, cia); + } + length--; + dest++; + src++; + } + comp_func_SourceOut_dsp_asm_x2(dest, src, length, const_alpha); +} + QT_END_NAMESPACE diff --git a/src/gui/painting/qdrawhelper_mips_dsp_asm.S b/src/gui/painting/qdrawhelper_mips_dsp_asm.S index f426905aad..58cc176529 100644 --- a/src/gui/painting/qdrawhelper_mips_dsp_asm.S +++ b/src/gui/painting/qdrawhelper_mips_dsp_asm.S @@ -41,67 +41,6 @@ #include "qt_mips_asm_dsp.h" -LEAF_MIPS_DSP(INTERPOLATE_PIXEL_255_asm_mips_dsp) -/* - * a0 - uint x (First value to multiply) - * a1 - uint a (Multiplicator byte for first value) - * a2 - uint y (Second value to multiply) - * a3 - uint b (Multiplicator byte for second value) - */ - - .set reorder - li t4, 8388736 - preceu.ph.qbra t0, a0 /* (x & 0xff00ff) */ - mul t0, t0, a1 /* (x & 0xff00ff) * a */ - preceu.ph.qbra t1, a2 /* (y & 0xff00ff) */ - mul t1, t1, a3 /* (y & 0xff00ff) * b */ - addu t0, t0, t1 /* (x & 0xff00ff) * a + - * (y & 0xff00ff) * b - */ - preceu.ph.qbla t1, t0 /* (t >> 8) & 0xff00ff */ - addu t0, t0, t1 /* t + ((t >> 8) & 0xff00ff */ - addu t0, t0, t4 /* t + ((t >> 8) & 0xff00ff) + 0x800080 */ - preceu.ph.qbla t0, t0 /* t >> 8 and t&=0xff00ff */ - preceu.ph.qbla t2, a0 /* (x>>8) & 0xff00ff */ - mul t2, t2, a1 /* ((x>>8) & 0xff00ff) * a */ - preceu.ph.qbla t3, a2 /* ((y>>8) & 0xff00ff) */ - mul t3, t3, a3 /* ((y>>8) & 0xff00ff) * b */ - addu t2, t2, t3 /* ((x>>8) & 0xff00ff) * a + - * ((y >> 8) & 0xff00ff) * b - */ - preceu.ph.qbla t3, t2 /* (x>>8) & 0xff00ff */ - addu t2, t2, t3 /* (x>>8) & 0xff00ff) + 0x800080 */ - addu t2, t2, t4 /* x + ((x>>8) & 0xff00ff) + 0x800080 */ - and t2, t2, 0xff00ff00 - or t1, t0, t2 - move v0, t1 - j ra - -END(INTERPOLATE_PIXEL_255_asm_mips_dsp) - -LEAF_MIPS_DSP(BYTE_MUL_asm_mips_dsp) -/* - * a0 - uint x (Value to multiply) - * a1 - uint a (Multiplicator byte) - */ - - .set reorder - replv.ph a1, a1 /* a1 = 0x00a00a */ - li t4, 8388736 /* t4 = 0x800080 */ - muleu_s.ph.qbl t0, a0, a1 - muleu_s.ph.qbr t2, a0, a1 - preceu.ph.qbla t1, t0 - addu t0, t0, t1 - addu t0, t0, t4 - preceu.ph.qbla t3, t2 - addu t2, t2, t3 - addu t2, t2, t4 - precrq.qb.ph t4, t0, t2 - move v0, t4 - j ra - -END(BYTE_MUL_asm_mips_dsp) - LEAF_MIPS_DSP(destfetchARGB32_asm_mips_dsp) /* * a0 - buffer address (dst) @@ -349,7 +288,7 @@ LEAF_MIPS_DSP(comp_func_SourceOver_asm_mips_dsp) END(comp_func_SourceOver_asm_mips_dsp) -LEAF_MIPS_DSP(qt_destStoreARGB32_asm_mips_dsp) +LEAF_MIPS_DSPR2(qt_destStoreARGB32_asm_mips_dsp) /* * a0 - uint * data * a1 - const uint *buffer @@ -422,3 +361,1243 @@ LEAF_MIPS_DSP(qt_destStoreARGB32_asm_mips_dsp) nop END(qt_destStoreARGB32_asm_mips_dsp) + +LEAF_MIPS_DSP(comp_func_solid_Source_dsp_asm_x2) +/* + * a0 - const uint *dest + * a1 - int length + * a2 - uint color + * a3 - uint ialpha + */ + + beqz a1, 2f + nop + replv.ph a3, a3 + li t9, 8388736 /* t9 = 0x800080 */ +1: + lw t0, 0(a0) + lw t1, 4(a0) + or t2, t0, t1 /* if both dest are zero, no computation needed */ + beqz t2, 12f + addiu a1, -2 + + BYTE_MUL_x2 t0, t1, t6, t7, a3, a3, t9, t2, t3, t4, t5, 0 +11: + addu t2, a2, t6 + addu t3, a2, t7 + sw t2, 0(a0) + sw t3, 4(a0) + bnez a1, 1b + addiu a0, 8 + b 2f +12: + addu t2, a2, t0 + addu t3, a2, t1 + sw t2, 0(a0) + sw t3, 4(a0) + bnez a1, 1b + addiu a0, 8 +2: + jr ra + nop + +END(comp_func_solid_Source_dsp_asm_x2) + +LEAF_MIPS_DSP(comp_func_solid_DestinationOver_dsp_asm_x2) +/* + * a0 - uint *dest + * a1 - int length + * a2 - uint color + */ + + addiu sp, sp, -8 + sw s0, 0(sp) + sw s1, 4(sp) + beqz a1, 2f + nop + beqz a2, 2f + nop + li t9, 8388736 /* t4 = 0x800080 */ + +1: + lw t0, 0(a0) + lw t1, 4(a0) + not t2, t0 + not t3, t1 + srl t4, t2, 24 + srl t5, t3, 24 + or t2, t4, t5 /* if both dest are zero, no computation needed */ + beqz t2, 11f + addiu a1, -2 + replv.ph t2, t4 + replv.ph t3, t5 + + BYTE_MUL_x2 a2, a2, t8, a3, t2, t3, t9, t4, t5, t6, t7 + + addu t0, t0, t8 + addu t1, t1, a3 +11: + sw t0, 0(a0) + sw t1, 4(a0) + bnez a1, 1b + addiu a0, 8 + +2: + lw s0, 0(sp) + lw s1, 4(sp) + addiu sp, sp, 8 + jr ra + nop + +END(comp_func_solid_DestinationOver_dsp_asm_x2) + +LEAF_MIPS_DSP(comp_func_DestinationOver_dsp_asm_x2) +/* + * a0 - uint *dest + * a1 - uint *src + * a2 - int length + * a3 - uint const_alpha + */ + + .set noat + addiu sp, sp, -8 + sw s0, 0(sp) + sw s1, 4(sp) + beqz a2, 3f + nop + li t9, 8388736 /* t4 = 0x800080 */ + li t0, 0xff + beq a3, t0, 2f + nop + +/* part where const_alpha != 255 */ +1: + replv.ph a3, a3 +11: + lw t0, 0(a1) # src_1 + lw t1, 4(a1) # src_2 + addiu a2, -2 + + BYTE_MUL_x2 t0, t1, t8, AT, a3, a3, t9, t4, t5, t6, t7, 0 + # t8 = s1 + # AT = s2 + lw t0, 0(a0) # dest_1 + lw t1, 4(a0) # dest_2 + addiu a1, 8 + not t2, t0 + not t3, t1 + srl t4, t2, 24 + srl t5, t3, 24 + replv.ph t2, t4 # qAlpha(~d) 1 + replv.ph t3, t5 # qAlpha(~d) 2 + + BYTE_MUL_x2 t8, AT, s0, s1, t2, t3, t9, t4, t5, t6, t7 + + addu t0, t0, s0 + addu t1, t1, s1 + sw t0, 0(a0) + sw t1, 4(a0) + bnez a2, 11b + addiu a0, 8 + b 3f + nop + +/* part where const_alpha = 255 */ +2: + lw t0, 0(a0) # dest 1 + lw t1, 4(a0) # dest 2 + lw s0, 0(a1) # src 1 + lw s1, 4(a1) # src 2 + not t2, t0 + not t3, t1 + srl t4, t2, 24 + srl t5, t3, 24 + replv.ph t2, t4 + replv.ph t3, t5 + addiu a1, 8 + addiu a2, -2 + + BYTE_MUL_x2 s0, s1, t8, AT, t2, t3, t9, t4, t5, t6, t7 + + addu t0, t0, t8 + addu t1, t1, AT + sw t0, 0(a0) + sw t1, 4(a0) + bnez a2, 2b + addiu a0, 8 + +3: + lw s0, 0(sp) + lw s1, 4(sp) + addiu sp, sp, 8 + jr ra + nop + .set at + +END(comp_func_DestinationOver_dsp_asm_x2) + +LEAF_MIPS_DSP(comp_func_solid_SourceIn_dsp_asm_x2) +/* + * a0 - uint *dest + * a1 - int length + * a2 - uint color + * a3 - uint const_alpha + */ + + .set noat + addiu sp, -12 + sw s0, 0(sp) + sw s1, 4(sp) + sw s2, 8(sp) + beqz a1, 3f + nop + li t9, 8388736 /* t9 = 0x800080 (rounding_factor) */ + lui t8, 0xff00 + li t0, 0xff + beq a3, t0, 2f + ori t8, t8, 0xff00 /* t8 = 0xff00ff00 (andi_factor) */ + +/* part where const_alpha != 255 */ +1: + replv.ph t0, a3 + li t5, 0xff + BYTE_MUL a2, a2, t0, t9, t1, t2, t3, t4 /* a2 = color ( = BYTE_MUL(color, const_alpha)); */ + subu t1, t5, a3 /* t1 = cia = 255 - const_alpha */ +11: + lw t2, 0(a0) /* t2 = d */ + lw s0, 4(a0) + addiu a1, -2 + srl t3, t2, 24 /* t3 = qAlpha(d) */ + srl s2, s0, 24 + + INTERPOLATE_PIXEL_255 a2, t3, t2, t1, AT, t9, t8, t4, t5, t6, t7 + INTERPOLATE_PIXEL_255 a2, s2, s0, t1, s1, t9, t8, t4, t5, t6, t7 + + sw AT, 0(a0) + sw s1, 4(a0) + bnez a1, 11b + addiu a0, 8 + b 3f + nop + +/* part where const_alpha = 255 */ +2: + lw t0, 0(a0) /* dest 1 */ + lw t1, 4(a0) /* dest 2 */ + srl t4, t0, 24 + srl t5, t1, 24 + replv.ph t2, t4 + replv.ph t3, t5 + addiu a1, -2 + + BYTE_MUL_x2 a2, a2, t8, AT, t2, t3, t9, t4, t5, t6, t7 + + sw t8, 0(a0) + sw AT, 4(a0) + bnez a1, 2b + addiu a0, 8 + +3: + lw s0, 0(sp) + lw s1, 4(sp) + lw s2, 8(sp) + addiu sp, 12 + jr ra + nop + .set at + +END(comp_func_solid_SourceIn_dsp_asm_x2) + +LEAF_MIPS_DSP(comp_func_SourceIn_dsp_asm_x2) +/* + * a0 - uint *dest + * a1 - const uint *src + * a2 - int length + * a3 - uint const_alpha + */ + + .set noat + addiu sp, -16 + sw s0, 0(sp) + sw s1, 4(sp) + sw s2, 8(sp) + sw s3, 12(sp) + beqz a2, 3f + nop + li t9, 8388736 /* t9 = 0x800080 (rounding_factor) */ + lui t8, 0xff00 + li t0, 0xff + beq a3, t0, 2f + ori t8, t8, 0xff00 /* t8 = 0xff00ff00 (andi_factor) */ + +/* part where const_alpha != 255 */ +1: + li t5, 0xff + subu t7, t5, a3 /* t7 = cia = 255 - const_alpha */ + replv.ph a3, a3 +11: + lw t0, 0(a1) /* t0 = src 1 */ + lw t1, 4(a1) /* t1 = src 2 */ + addiu a2, -2 + + BYTE_MUL_x2 t0, t1, AT, s0, a3, a3, t9, t3, t4, t5, t6, 0 + + lw t0, 0(a0) /* t0 = dest 1 */ + lw t1, 4(a0) /* t1 = dest 2 */ + addiu a1, 8 + + srl t2, t0, 24 /* t2 = qAlpha(d) 1 */ + srl t3, t1, 24 /* t3 = qAlpha(d) 2 */ + + INTERPOLATE_PIXEL_255 AT, t2, t0, t7, s1, t9, t8, t4, t5, t6, s3 + INTERPOLATE_PIXEL_255 s0, t3, t1, t7, s2, t9, t8, t4, t5, t6, s3 + + sw s1, 0(a0) + sw s2, 4(a0) + bnez a2, 11b + addiu a0, 8 + b 3f + nop + +/* part where const_alpha = 255 */ +2: + lw t2, 0(a0) /* dest 1 */ + lw t3, 4(a0) /* dest 2 */ + lw t0, 0(a1) /* src 1 */ + lw t1, 4(a1) /* src 2 */ + srl t4, t2, 24 + srl t5, t3, 24 + replv.ph t2, t4 + replv.ph t3, t5 + addiu a2, -2 + + BYTE_MUL_x2 t0, t1, t8, AT, t2, t3, t9, t4, t5, t6, t7 + + addiu a1, 8 + sw t8, 0(a0) + sw AT, 4(a0) + bnez a2, 2b + addiu a0, 8 + +3: + lw s0, 0(sp) + lw s1, 4(sp) + lw s2, 8(sp) + lw s3, 12(sp) + addiu sp, 16 + jr ra + nop + .set at + +END(comp_func_SourceIn_dsp_asm_x2) + +LEAF_MIPS_DSP(comp_func_solid_DestinationIn_dsp_asm_x2) +/* + * a0 - uint *dest + * a1 - int length + * a2 - uint a + */ + + .set noat + beqz a1, 2f + nop + li t9, 8388736 /* t9 = 0x800080 (rounding_factor) */ + replv.ph a2, a2 +1: + lw t0, 0(a0) + lw t1, 4(a0) + addiu a1, -2 + + BYTE_MUL_x2 t0, t1, t8, AT, a2, a2, t9, t4, t5, t6, t7, 0 + + sw t8, 0(a0) + sw AT, 4(a0) + bnez a1, 1b + addiu a0, 8 +2: + jr ra + nop + .set at + +END(comp_func_solid_DestinationIn_dsp_asm_x2) + +LEAF_MIPS_DSP(comp_func_DestinationIn_dsp_asm_x2) +/* + * a0 - uint *dest + * a1 - const uint *src + * a2 - int length + * a3 - uint const_alpha + */ + + addiu sp, -8 + sw s0, 0(sp) + sw s1, 4(sp) + beqz a2, 3f + nop + li t9, 8388736 /* t9 = 0x800080 (rounding_factor) */ + li t0, 0xff + beq a3, t0, 2f + nop + +/* part where const_alpha != 255 */ +1: + li t5, 0xff + subu t8, t5, a3 /* t8 = cia = 255 - const_alpha */ + replv.ph a3, a3 +11: + lw t0, 0(a1) /* t0 = src 1 */ + lw t1, 4(a1) /* t1 = src 2 */ + addiu a2, -2 + srl t0, t0, 24 + srl t1, t1, 24 + + BYTE_MUL_x2 t0, t1, s1, t7, a3, a3, t9, t3, t4, t5, t6, 0 + + lw t0, 0(a0) /* t0 = dest 1 */ + lw t1, 4(a0) /* t1 = dest 2 */ + addu s1, s1, t8 /* a 1 */ + addu t7, t7, t8 /* a 2 */ + replv.ph t2, s1 + replv.ph t3, t7 + + BYTE_MUL_x2 t0, t1, s1, t7, t2, t3, t9, t4, t5, t6, s0 + + addiu a1, 8 + sw s1, 0(a0) + sw t7, 4(a0) + bnez a2, 11b + addiu a0, 8 + b 3f + nop + +/* part where const_alpha = 255 */ +2: + lw t2, 0(a1) /* src 1 */ + lw t3, 4(a1) /* src 2 */ + lw t0, 0(a0) /* dest 1 */ + lw t1, 4(a0) /* dest 2 */ + srl t4, t2, 24 + srl t5, t3, 24 + replv.ph t2, t4 /* t2 = qAlpha(src 1) */ + replv.ph t3, t5 /* t3 = qAlpha(src 2) */ + addiu a2, -2 + + BYTE_MUL_x2 t0, t1, t8, s1, t2, t3, t9, t4, t5, t6, t7 + + addiu a1, 8 + sw t8, 0(a0) + sw s1, 4(a0) + bnez a2, 2b + addiu a0, 8 + +3: + lw s0, 0(sp) + lw s1, 4(sp) + addiu sp, 8 + jr ra + nop + +END(comp_func_DestinationIn_dsp_asm_x2) + +LEAF_MIPS_DSP(comp_func_DestinationOut_dsp_asm_x2) +/* + * a0 - uint *dest + * a1 - const uint *src + * a2 - int length + * a3 - uint const_alpha + */ + + .set noat + addiu sp, -4 + sw s0, 0(sp) + beqz a2, 3f + nop + li t9, 8388736 /* t9 = 0x800080 (rounding_factor) */ + li t0, 0xff + beq a3, t0, 2f + nop + +/* part where const_alpha != 255 */ +1: + li t5, 0xff + subu t8, t5, a3 /* t8 = cia = 255 - const_alpha */ + replv.ph a3, a3 +11: + lw t0, 0(a1) /* t0 = src 1 */ + lw t1, 4(a1) /* t1 = src 2 */ + not t0, t0 + not t1, t1 + addiu a2, -2 + srl t0, t0, 24 + srl t1, t1, 24 + + BYTE_MUL_x2 t0, t1, AT, t7, a3, a3, t9, t3, t4, t5, t6, 0 + + lw t0, 0(a0) /* t0 = dest 1 */ + lw t1, 4(a0) /* t1 = dest 2 */ + addu AT, AT, t8 /* a 1 */ + addu t7, t7, t8 /* a 2 */ + replv.ph t2, AT + replv.ph t3, t7 + + BYTE_MUL_x2 t0, t1, AT, t7, t2, t3, t9, t4, t5, t6, s0 + + addiu a1, 8 + sw AT, 0(a0) + sw t7, 4(a0) + bnez a2, 11b + addiu a0, 8 + b 3f + nop + +/* part where const_alpha = 255 */ +2: + lw t2, 0(a1) /* src 1 */ + lw t3, 4(a1) /* src 2 */ + not t2, t2 + not t3, t3 + lw t0, 0(a0) /* dest 1 */ + lw t1, 4(a0) /* dest 2 */ + srl t4, t2, 24 + srl t5, t3, 24 + replv.ph t2, t4 /* t2 = qAlpha(src 1) */ + replv.ph t3, t5 /* t3 = qAlpha(src 2) */ + addiu a2, -2 + + BYTE_MUL_x2 t0, t1, t8, AT, t2, t3, t9, t4, t5, t6, t7 + + addiu a1, 8 + sw t8, 0(a0) + sw AT, 4(a0) + bnez a2, 2b + addiu a0, 8 + +3: + lw s0, 0(sp) + addiu sp, 4 + jr ra + nop + .set at + +END(comp_func_DestinationOut_dsp_asm_x2) + +LEAF_MIPS_DSP(comp_func_solid_SourceAtop_dsp_asm_x2) +/* + * a0 - uint *dest + * a1 - int length + * a2 - uint color + * a3 - uint sia + */ + + .set noat + addu sp, -4 + sw s0, 0(sp) + beqz a1, 2f + nop + li t9, 8388736 /* t9 = 0x800080 (rounding_factor) */ + lui t8, 0xff00 + ori t8, t8, 0xff00 /* t8 = 0xff00ff00 (andi_factor) */ +1: + lw t0, 0(a0) /* t0 = dest 1 */ + lw t1, 4(a0) /* t1 = dest 2 */ + addiu a1, -2 + srl t2, t0, 24 /* t2 = qAlpha(dest 1) */ + srl t3, t1, 24 /* t3 = qAlpha(dest 2) */ + + INTERPOLATE_PIXEL_255 a2, t2, t0, a3, AT, t9, t8, t4, t5, t6, t7 + INTERPOLATE_PIXEL_255 a2, t3, t1, a3, s0, t9, t8, t4, t5, t6, t7 + + sw AT, 0(a0) + sw s0, 4(a0) + bnez a1, 1b + addiu a0, 8 +2: + lw s0, 0(sp) + addiu sp, 4 + jr ra + nop + .set at + +END(comp_func_solid_SourceAtop_dsp_asm_x2) + +LEAF_MIPS_DSP(comp_func_SourceAtop_dsp_asm_x2) +/* + * a0 - uint *dest + * a1 - const uint *src + * a2 - int length + * a3 - uint const_alpha + */ + + .set noat + addiu sp, -20 + sw s0, 0(sp) + sw s1, 4(sp) + sw s2, 8(sp) + sw s3, 12(sp) + sw s4, 16(sp) + beqz a2, 3f + nop + li t9, 8388736 /* t9 = 0x800080 (rounding_factor) */ + lui t8, 0xff00 + li t0, 0xff + beq a3, t0, 2f + ori t8, t8, 0xff00 /* t8 = 0xff00ff00 (andi_factor) */ + +/* part where const_alpha != 255 */ +1: + replv.ph a3, a3 +11: + lw AT, 0(a1) /* src 1 */ + lw s0, 4(a1) /* src 2 */ + + BYTE_MUL_x2 AT, s0, t0, t1, a3, a3, t9, t3, t4, t5, t6, 0 + /* t0 = s */ + + lw t2, 0(a0) /* t2 = dest 1 */ + lw t3, 4(a0) /* t3 = dest 2 */ + + srl t4, t2, 24 /* t4 = qAplpha(dest 1) */ + srl t5, t3, 24 + not t6, t0 + not t7, t1 + srl t6, t6, 24 /* t6 = qAlpha(~s) */ + srl t7, t7, 24 + addiu a2, -2 + + INTERPOLATE_PIXEL_255 t0, t4, t2, t6, AT, t9, t8, s1, s2, s3, s4 + INTERPOLATE_PIXEL_255 t1, t5, t3, t7, s0, t9, t8, s1, s2, s3, s4 + + addiu a1, 8 + sw AT, 0(a0) + sw s0, 4(a0) + bnez a2, 11b + addiu a0, 8 + b 3f + nop + +/* part where const_alpha = 255 */ +2: + lw t2, 0(a0) /* dest 1 */ + lw t3, 4(a0) /* dest 2 */ + lw t0, 0(a1) /* src 1 */ + lw t1, 4(a1) /* src 2 */ + srl t4, t2, 24 + srl t5, t3, 24 + not t6, t0 + not t7, t1 + srl t6, t6, 24 + srl t7, t7, 24 + addiu a2, -2 + + INTERPOLATE_PIXEL_255 t0, t4, t2, t6, AT, t9, t8, s1, s2, s3, s4 + INTERPOLATE_PIXEL_255 t1, t5, t3, t7, s0, t9, t8, s1, s2, s3, s4 + + addiu a1, 8 + sw AT, 0(a0) + sw s0, 4(a0) + bnez a2, 2b + addiu a0, 8 + +3: + lw s0, 0(sp) + lw s1, 4(sp) + lw s2, 8(sp) + lw s3, 12(sp) + lw s4, 16(sp) + addiu sp, 20 + jr ra + nop + .set at + +END(comp_func_SourceAtop_dsp_asm_x2) + +LEAF_MIPS_DSP(comp_func_solid_DestinationAtop_dsp_asm_x2) +/* + * a0 - uint *dest + * a1 - int length + * a2 - uint color + * a3 - uint a + */ + + .set noat + addiu sp, -4 + sw s0, 0(sp) + beqz a1, 2f + nop + li t9, 8388736 /* t9 = 0x800080 (rounding_factor) */ + lui t8, 0xff00 + ori t8, t8, 0xff00 /* t8 = 0xff00ff00 (andi_factor) */ +1: + lw t0, 0(a0) /* t0 = dest 1 */ + lw t1, 4(a0) /* t1 = dest 2 */ + addiu a1, -2 + not t2, t0 + not t3, t1 + srl t2, t2, 24 /* t2 = qAlpha(~(dest 1)) */ + srl t3, t3, 24 /* t3 = qAlpha(~(dest 2)) */ + + INTERPOLATE_PIXEL_255 t0, a3, a2, t2, AT, t9, t8, t4, t5, t6, t7 + INTERPOLATE_PIXEL_255 t1, a3, a2, t3, s0, t9, t8, t4, t5, t6, t7 + + sw AT, 0(a0) + sw s0, 4(a0) + bnez a1, 1b + addiu a0, 8 +2: + lw s0, 0(sp) + addiu sp, 4 + jr ra + nop + .set at + +END(comp_func_solid_DestinationAtop_dsp_asm_x2) + +LEAF_MIPS_DSP(comp_func_DestinationAtop_dsp_asm_x2) +/* + * a0 - uint *dest + * a1 - const uint *src + * a2 - int length + * a3 - uint const_alpha + */ + + .set noat + addiu sp, -24 + sw s0, 0(sp) + sw s1, 4(sp) + sw s2, 8(sp) + sw s3, 12(sp) + sw s4, 16(sp) + sw s5, 20(sp) + beqz a2, 3f + nop + li t9, 8388736 /* t9 = 0x800080 (rounding_factor) */ + lui t8, 0xff00 + li t0, 0xff + beq a3, t0, 2f + ori t8, t8, 0xff00 /* t8 = 0xff00ff00 (andi_factor) */ + +/* part where const_alpha != 255 */ +1: + li s5, 0xff + subu s5, s5, a3 /* s5 = cia = 255 - const_alpha */ + replv.ph a3, a3 +11: + lw AT, 0(a1) /* src 1 */ + lw s0, 4(a1) /* src 2 */ + + BYTE_MUL_x2 AT, s0, t0, t1, a3, a3, t9, t3, t4, t5, t6, 0 + /* t0 = s */ + + lw t2, 0(a0) /* t2 = dest 1 */ + lw t3, 4(a0) /* t3 = dest 2 */ + + not t4, t2 + not t5, t3 + srl t4, t4, 24 /* t4 = qAplpha(~(dest 1)) */ + srl t5, t5, 24 + srl t6, t0, 24 + srl t7, t1, 24 + addu t6, t6, s5 /* t6 = a = qAlpha(s1) + cia */ + addu t7, t7, s5 + addiu a2, -2 + + INTERPOLATE_PIXEL_255 t2, t6, t0, t4, AT, t9, t8, s1, s2, s3, s4 + INTERPOLATE_PIXEL_255 t3, t7, t1, t5, s0, t9, t8, s1, s2, s3, s4 + + addiu a1, 8 + sw AT, 0(a0) + sw s0, 4(a0) + bnez a2, 11b + addiu a0, 8 + b 3f + nop + +/* part where const_alpha = 255 */ +2: + lw t2, 0(a0) /* d1 */ + lw t3, 4(a0) /* d2 */ + lw t0, 0(a1) /* s1 */ + lw t1, 4(a1) /* s2 */ + srl t4, t0, 24 /* t4 = qAlpha(s1) */ + srl t5, t1, 24 + not t6, t2 + not t7, t3 + srl t6, t6, 24 /* qAlpha(~d1) */ + srl t7, t7, 24 + addiu a2, -2 + + INTERPOLATE_PIXEL_255 t2, t4, t0, t6, AT, t9, t8, s1, s2, s3, s4 + INTERPOLATE_PIXEL_255 t3, t5, t1, t7, s0, t9, t8, s1, s2, s3, s4 + + addiu a1, 8 + sw AT, 0(a0) + sw s0, 4(a0) + bnez a2, 2b + addiu a0, 8 + +3: + lw s0, 0(sp) + lw s1, 4(sp) + lw s2, 8(sp) + lw s3, 12(sp) + lw s4, 16(sp) + lw s5, 20(sp) + addiu sp, 24 + jr ra + nop + .set at + +END(comp_func_DestinationAtop_dsp_asm_x2) + +LEAF_MIPS_DSP(comp_func_solid_XOR_dsp_asm_x2) +/* + * a0 - uint *dest + * a1 - int length + * a2 - uint color + * a3 - uint sia + */ + + .set noat + addu sp, -4 + sw s0, 0(sp) + beqz a1, 2f + nop + li t9, 8388736 /* t9 = 0x800080 (rounding_factor) */ + lui t8, 0xff00 + ori t8, t8, 0xff00 /* t8 = 0xff00ff00 (andi_factor) */ +1: + lw t0, 0(a0) /* t0 = dest 1 */ + lw t1, 4(a0) /* t1 = dest 2 */ + addiu a1, -2 + not t2, t0 + not t3, t1 + srl t2, t2, 24 /* t2 = qAlpha(~(dest 1)) */ + srl t3, t3, 24 /* t3 = qAlpha(~(dest 2)) */ + + INTERPOLATE_PIXEL_255 a2, t2, t0, a3, AT, t9, t8, t4, t5, t6, t7 + INTERPOLATE_PIXEL_255 a2, t3, t1, a3, s0, t9, t8, t4, t5, t6, t7 + + sw AT, 0(a0) + sw s0, 4(a0) + bnez a1, 1b + addiu a0, 8 +2: + lw s0, 0(sp) + addu sp, 4 + jr ra + nop + .set at + +END(comp_func_solid_XOR_dsp_asm_x2) + +LEAF_MIPS_DSP(comp_func_XOR_dsp_asm_x2) +/* + * a0 - uint *dest + * a1 - const uint *src + * a2 - int length + * a3 - uint const_alpha + */ + + .set noat + addiu sp, -20 + sw s0, 0(sp) + sw s1, 4(sp) + sw s2, 8(sp) + sw s3, 12(sp) + sw s4, 16(sp) + beqz a2, 3f + nop + li t9, 8388736 /* t9 = 0x800080 (rounding_factor) */ + lui t8, 0xff00 + li t0, 0xff + beq a3, t0, 2f + ori t8, t8, 0xff00 /* t8 = 0xff00ff00 (andi_factor) */ + +/* part where const_alpha != 255 */ +1: + replv.ph a3, a3 +11: + lw AT, 0(a1) /* src 1 */ + lw s0, 4(a1) /* src 2 */ + + BYTE_MUL_x2 AT, s0, t0, t1, a3, a3, t9, t3, t4, t5, t6, 0 + /* t0 = s1 */ + /* t1 = s2 */ + + lw t2, 0(a0) /* t2 = dest 1 */ + lw t3, 4(a0) /* t3 = dest 2 */ + + not t4, t2 + not t5, t3 + srl t4, t4, 24 /* t4 = qAplpha(~(dest 1)) */ + srl t5, t5, 24 + not t6, t0 + not t7, t1 + srl t6, t6, 24 /* t6 = qAlpha(~s) */ + srl t7, t7, 24 + addiu a2, -2 + + INTERPOLATE_PIXEL_255 t0, t4, t2, t6, AT, t9, t8, s1, s2, s3, s4 + INTERPOLATE_PIXEL_255 t1, t5, t3, t7, s0, t9, t8, s1, s2, s3, s4 + + addiu a1, 8 + sw AT, 0(a0) + sw s0, 4(a0) + bnez a2, 11b + addiu a0, 8 + b 3f + nop + +/* part where const_alpha = 255 */ +2: + lw t2, 0(a0) /* d1 */ + lw t3, 4(a0) /* d2 */ + lw t0, 0(a1) /* s1 */ + lw t1, 4(a1) /* s2 */ + not t4, t0 + not t5, t1 + srl t4, t4, 24 /* t4 = qAlpha(~s1) */ + srl t5, t5, 24 + not t6, t2 + not t7, t3 + srl t6, t6, 24 /* qAlpha(~d1) */ + srl t7, t7, 24 + addiu a2, -2 + + INTERPOLATE_PIXEL_255 t0, t6, t2, t4, AT, t9, t8, s1, s2, s3, s4 + INTERPOLATE_PIXEL_255 t1, t7, t3, t5, s0, t9, t8, s1, s2, s3, s4 + + addiu a1, 8 + sw AT, 0(a0) + sw s0, 4(a0) + bnez a2, 2b + addiu a0, 8 + +3: + lw s0, 0(sp) + lw s1, 4(sp) + lw s2, 8(sp) + lw s3, 12(sp) + lw s4, 16(sp) + addiu sp, 20 + jr ra + nop + .set at + +END(comp_func_XOR_dsp_asm_x2) + +LEAF_MIPS_DSP(comp_func_solid_SourceOut_dsp_asm_x2) +/* + * a0 - uint *dest + * a1 - int length + * a2 - uint color + * a3 - uint const_alpha + */ + + .set noat + addiu sp, -12 + sw s0, 0(sp) + sw s1, 4(sp) + sw s2, 8(sp) + beqz a1, 3f + nop + li t9, 8388736 /* t9 = 0x800080 (rounding_factor) */ + lui t8, 0xff00 + li t0, 0xff + beq a3, t0, 2f + ori t8, t8, 0xff00 /* t8 = 0xff00ff00 (andi_factor) */ + +/* part where const_alpha != 255 */ +1: + replv.ph t0, a3 + li t5, 0xff + BYTE_MUL a2, a2, t0, t9, t1, t2, t3, t4 /* a2 = color ( = BYTE_MUL(color, const_alpha)); */ + subu t1, t5, a3 /* t1 = cia = 255 - const_alpha */ +11: + lw t2, 0(a0) /* t2 = d1 */ + lw s0, 4(a0) /* s0 = d2 */ + addiu a1, -2 + not t3, t2 + not s2, s0 + srl t3, t3, 24 /* t3 = qAlpha(~d1) */ + srl s2, s2, 24 /* s2 = qAlpha(~d2) */ + + INTERPOLATE_PIXEL_255 a2, t3, t2, t1, AT, t9, t8, t4, t5, t6, t7 + INTERPOLATE_PIXEL_255 a2, s2, s0, t1, s1, t9, t8, t4, t5, t6, t7 + + sw AT, 0(a0) + sw s1, 4(a0) + bnez a1, 11b + addiu a0, 8 + b 3f + nop + +/* part where const_alpha = 255 */ +2: + lw t0, 0(a0) /* dest 1 */ + lw t1, 4(a0) /* dest 2 */ + not t4, t0 + not t5, t1 + srl t4, t4, 24 + srl t5, t5, 24 + replv.ph t2, t4 + replv.ph t3, t5 + addiu a1, -2 + + BYTE_MUL_x2 a2, a2, t8, AT, t2, t3, t9, t4, t5, t6, t7 + + sw t8, 0(a0) + sw AT, 4(a0) + bnez a1, 2b + addiu a0, 8 + +3: + lw s0, 0(sp) + lw s1, 4(sp) + lw s2, 8(sp) + addiu sp, 12 + jr ra + nop + .set at + +END(comp_func_solid_SourceOut_dsp_asm_x2) + +LEAF_MIPS_DSP(comp_func_SourceOut_dsp_asm_x2) +/* + * a0 - uint *dest + * a1 - const uint *src + * a2 - int length + * a3 - uint const_alpha + */ + + .set noat + addiu sp, -16 + sw s0, 0(sp) + sw s1, 4(sp) + sw s2, 8(sp) + sw s3, 12(sp) + beqz a2, 3f + nop + li t9, 8388736 /* t9 = 0x800080 (rounding_factor) */ + lui t8, 0xff00 + li t0, 0xff + beq a3, t0, 2f + ori t8, t8, 0xff00 /* t8 = 0xff00ff00 (andi_factor) */ + +/* part where const_alpha != 255 */ +1: + li t5, 0xff + subu t7, t5, a3 /* t7 = cia = 255 - const_alpha */ + replv.ph a3, a3 +11: + lw t0, 0(a1) /* t0 = src 1 */ + lw t1, 4(a1) /* t1 = src 2 */ + addiu a2, -2 + + BYTE_MUL_x2 t0, t1, AT, s0, a3, a3, t9, t3, t4, t5, t6, 0 + + lw t0, 0(a0) /* t0 = dest 1 */ + lw t1, 4(a0) /* t1 = dest 2 */ + addiu a1, 8 + + not t2, t0 + not t3, t1 + srl t2, t2, 24 /* t2 = qAlpha(~d1) */ + srl t3, t3, 24 /* t3 = qAlpha(~d2) */ + + INTERPOLATE_PIXEL_255 AT, t2, t0, t7, s1, t9, t8, t4, t5, t6, s3 + INTERPOLATE_PIXEL_255 s0, t3, t1, t7, s2, t9, t8, t4, t5, t6, s3 + + sw s1, 0(a0) + sw s2, 4(a0) + bnez a2, 11b + addiu a0, 8 + b 3f + nop + +/* part where const_alpha = 255 */ +2: + lw t2, 0(a0) /* dest 1 */ + lw t3, 4(a0) /* dest 2 */ + lw t0, 0(a1) /* src 1 */ + lw t1, 4(a1) /* src 2 */ + not t4, t2 + not t5, t3 + srl t4, t4, 24 /* qAlpha(~d1) */ + srl t5, t5, 24 /* qAlpha(~d2) */ + replv.ph t2, t4 + replv.ph t3, t5 + addiu a2, -2 + + BYTE_MUL_x2 t0, t1, t8, AT, t2, t3, t9, t4, t5, t6, t7 + + addiu a1, 8 + sw t8, 0(a0) + sw AT, 4(a0) + bnez a2, 2b + addiu a0, 8 + +3: + lw s0, 0(sp) + lw s1, 4(sp) + lw s2, 8(sp) + lw s3, 12(sp) + addiu sp, 16 + jr ra + nop + .set at + +END(comp_func_SourceOut_dsp_asm_x2) + +LEAF_MIPS_DSP(comp_func_Source_dsp_asm_x2) +/* + * a0 - uint *dest + * a1 - const uint *src + * a2 - int length + * a3 - uint const_alpha + */ + + .set noat + addiu sp, -8 + sw s0, 0(sp) + sw s1, 4(sp) + beqz a2, 2f + nop + li t9, 8388736 /* t9 = 0x800080 (rounding_factor) */ + lui t8, 0xff00 + ori t8, t8, 0xff00 /* t8 = 0xff00ff00 (andi_factor) */ + li t7, 0xff + subu t7, t7, a3 /* t7 = ialpha */ +1: + lw t0, 0(a0) /* t0 = dest 1 */ + lw t1, 4(a0) /* t1 = dest 2 */ + lw t2, 0(a1) /* t2 = src 1 */ + lw t3, 4(a1) /* t3 = src 2 */ + addiu a2, -2 + addiu a1, 8 + + INTERPOLATE_PIXEL_255 t2, a3, t0, t7, AT, t9, t8, t4, t5, t6, s1 + INTERPOLATE_PIXEL_255 t3, a3, t1, t7, s0, t9, t8, t4, t5, t6, s1 + + sw AT, 0(a0) + sw s0, 4(a0) + bnez a2, 1b + addiu a0, 8 +2: + lw s0, 0(sp) + lw s1, 4(sp) + addiu sp, 8 + jr ra + nop + .set at + +END(comp_func_Source_dsp_asm_x2) + +LEAF_MIPS_DSP(qt_blend_argb32_on_argb32_mips_dsp_asm_x2) +/* + * a0 - uint *dest + * a1 - const uint *src + * a2 - int length + * a3 - uint const_alpha + */ + + .set noat + addiu sp, -12 + sw s0, 0(sp) + sw s1, 4(sp) + sw s2, 8(sp) + beqz a2, 2f + nop + replv.ph a3, a3 + li t9, 8388736 /* t9 = 0x800080 (rounding_factor) */ + +1: + lw t0, 0(a1) /* t0 = src 1 */ + lw t1, 4(a1) /* t1 = src 2 */ + addiu a2, -2 + + BYTE_MUL_x2 t0, t1, AT, t7, a3, a3, t9, t3, t4, t5, t6, 0 + + lw t0, 0(a0) /* t0 = dest 1 */ + lw t1, 4(a0) /* t1 = dest 2 */ + not s1, AT + not s2, t7 + srl s1, s1, 24 /* s1 = qAlpha(~s1) */ + srl s2, s2, 24 /* s2 = qAlpha(~s2) */ + replv.ph s1, s1 + replv.ph s2, s2 + + BYTE_MUL_x2 t0, t1, t2, t3, s1, s2, t9, t4, t5, t6, s0 + + addiu a1, 8 + addu AT, AT, t2 + addu t7, t7, t3 + sw AT, 0(a0) + sw t7, 4(a0) + bnez a2, 1b + addiu a0, 8 + +2: + lw s0, 0(sp) + lw s1, 4(sp) + lw s2, 8(sp) + addiu sp, 12 + jr ra + nop + .set at + +END(qt_blend_argb32_on_argb32_mips_dsp_asm_x2) + +LEAF_MIPS_DSP(qt_blend_argb32_on_argb32_const_alpha_256_mips_dsp_asm) +/* + * a0 - uint *dest + * a1 - const uint *src + * a2 - int length + */ + + beqz a2, 5f + nop + li t7, 8388736 /* t7 = 0x800080 */ + b 2f + nop +1: + addiu a0, a0, 4 + addiu a2, a2, -1 + beqz a2, 5f + nop +2: + lw t0, 0(a1) /* t0 = s = src[i] */ + addiu a1, a1, 4 + nor t1, t0, zero + srl t1, t1, 24 /* t1 = ~qAlpha(s) */ + bnez t1, 3f + nop + sw t0, 0(a0) /* dst[i] = src[i] */ + addiu a2, a2, -1 + bnez a2, 2b + addiu a0, a0, 4 + b 5f + nop +3: + beqz t0, 1b + replv.ph t6, t1 /* | 0 | qAlpha(~s) | 0 | qAlpha(~s) | */ + + lw t4, 0(a0) + addiu a2, a2, -1 + beqz t4, 31f + move t8, zero + + BYTE_MUL t4, t8, t6, t7, t1, t2, t3, t4 +31: + addu t8, t0, t8 /* dst[i] = + * s + BYTE_MUL(dst[i],~qAlpha(s)) */ + sw t8, 0(a0) + bnez a2, 2b + addiu a0, a0, 4 + b 5f + nop +5: + jr ra + nop + +END(qt_blend_argb32_on_argb32_const_alpha_256_mips_dsp_asm) diff --git a/src/gui/painting/qdrawhelper_mips_dsp_p.h b/src/gui/painting/qdrawhelper_mips_dsp_p.h index ed848485fc..818b27ce21 100644 --- a/src/gui/painting/qdrawhelper_mips_dsp_p.h +++ b/src/gui/painting/qdrawhelper_mips_dsp_p.h @@ -52,6 +52,48 @@ extern "C" void qt_memfill32_asm_mips_dsp(quint32 *dest, quint32 value, int coun extern "C" void comp_func_SourceOver_asm_mips_dsp(uint *dest, const uint *src, int length, uint const_alpha); +extern "C" void comp_func_solid_DestinationOver_dsp_asm_x2(uint *dest, int length, uint color); + +extern "C" void comp_func_solid_Source_dsp_asm_x2(uint *dest, int length, uint color, uint const_alpha); + +extern "C" void comp_func_DestinationOver_dsp_asm_x2(uint *dest, const uint *src, int length, uint const_alpha); + +extern "C" void comp_func_solid_SourceIn_dsp_asm_x2(uint *dest, int length, uint color, uint const_alpha); + +extern "C" void comp_func_SourceIn_dsp_asm_x2(uint *dest, const uint *src, int length, uint const_alpha); + +extern "C" void comp_func_solid_DestinationIn_dsp_asm_x2(uint *dest, int length, uint a); + +extern "C" void comp_func_DestinationIn_dsp_asm_x2(uint *dest, const uint *src, int length, uint const_alpha); + +extern "C" void comp_func_DestinationOut_dsp_asm_x2(uint *dest, const uint *src, int length, uint const_alpha); + +extern "C" void comp_func_solid_SourceAtop_dsp_asm_x2(uint *dest, int length, uint color, uint const_alpha); + +extern "C" void comp_func_SourceAtop_dsp_asm_x2(uint *dest, const uint *src, int length, uint const_alpha); + +extern "C" void comp_func_solid_DestinationAtop_dsp_asm_x2(uint *dest, int length, uint color, uint const_alpha); + +extern "C" void comp_func_DestinationAtop_dsp_asm_x2(uint *dest, const uint *src, int length, uint const_alpha); + +extern "C" void comp_func_solid_XOR_dsp_asm_x2(uint *dest, int length, uint color, uint const_alpha); + +extern "C" void comp_func_XOR_dsp_asm_x2(uint *dest, const uint *src, int length, uint const_alpha); + +extern "C" void comp_func_solid_SourceOut_dsp_asm_x2(uint *dest, int length, uint color, uint const_alpha); + +extern "C" void comp_func_SourceOut_dsp_asm_x2(uint *dest, const uint *src, int length, uint const_alpha); + +extern "C" void comp_func_Source_dsp_asm_x2(uint *dest, const uint *src, int length, uint const_alpha); + +extern "C" void qt_blend_argb32_on_argb32_mips_dsp_asm_x2(uint *dest, const uint *src, int length, uint const_alpha); + +extern "C" void qt_blend_argb32_on_argb32_const_alpha_256_mips_dsp_asm(uint *dest, const uint *src, int length); + +extern "C" uint * destfetchARGB32_asm_mips_dsp(uint *buffer, const uint *data, int length); + +extern "C" uint * qt_destStoreARGB32_asm_mips_dsp(uint *buffer, const uint *data, int length); + void qt_blend_argb32_on_argb32_mips_dsp(uchar *destPixels, int dbpl, const uchar *srcPixels, int sbpl, int w, int h, @@ -71,6 +113,46 @@ uint * QT_FASTCALL qt_destFetchARGB32_mips_dsp(uint *buffer, void QT_FASTCALL qt_destStoreARGB32_mips_dsp(QRasterBuffer *rasterBuffer, int x, int y, const uint *buffer, int length); +void QT_FASTCALL comp_func_solid_Source_mips_dsp(uint *dest, int length, uint color, uint const_alpha); + +void QT_FASTCALL comp_func_solid_SourceOver_mips_dsp(uint *dest, int length, uint color, uint const_alpha); + +void QT_FASTCALL comp_func_solid_DestinationOver_mips_dsp(uint *dest, int length, uint color, uint const_alpha); + +void QT_FASTCALL comp_func_solid_SourceOver_mips_dsp(uint *dest, int length, uint color, uint const_alpha); + +void QT_FASTCALL comp_func_solid_DestinationOver_mips_dsp(uint *dest, int length, uint color, uint const_alpha); + +void QT_FASTCALL comp_func_DestinationOver_mips_dsp(uint *dest, const uint *src, int length, uint const_alpha); + +void QT_FASTCALL comp_func_solid_SourceIn_mips_dsp(uint *dest, int length, uint color, uint const_alpha); + +void QT_FASTCALL comp_func_SourceIn_mips_dsp(uint *dest, const uint *src, int length, uint const_alpha); + +void QT_FASTCALL comp_func_solid_DestinationIn_mips_dsp(uint *dest, int length, uint color, uint const_alpha); + +void QT_FASTCALL comp_func_DestinationIn_mips_dsp(uint *dest, const uint *src, int length, uint const_alpha); + +void QT_FASTCALL comp_func_solid_DestinationOut_mips_dsp(uint *dest, int length, uint color, uint const_alpha); + +void QT_FASTCALL comp_func_DestinationOut_mips_dsp(uint *dest, const uint *src, int length, uint const_alpha); + +void QT_FASTCALL comp_func_solid_SourceAtop_mips_dsp(uint *dest, int length, uint color, uint const_alpha); + +void QT_FASTCALL comp_func_SourceAtop_mips_dsp(uint *dest, const uint *src, int length, uint const_alpha); + +void QT_FASTCALL comp_func_solid_DestinationAtop_mips_dsp(uint *dest, int length, uint color, uint const_alpha); + +void QT_FASTCALL comp_func_DestinationAtop_mips_dsp(uint *dest, const uint *src, int length, uint const_alpha); + +void QT_FASTCALL comp_func_solid_XOR_mips_dsp(uint *dest, int length, uint color, uint const_alpha); + +void QT_FASTCALL comp_func_solid_SourceOut_mips_dsp(uint *dest, int length, uint color, uint const_alpha); + +void QT_FASTCALL comp_func_SourceOut_mips_dsp(uint *dest, const uint *src, int length, uint const_alpha); + +void QT_FASTCALL comp_func_XOR_mips_dsp(uint *dest, const uint *src, int length, uint const_alpha); + #endif // QT_COMPILER_SUPPORTS_MIPS_DSP diff --git a/src/gui/painting/qdrawhelper_mips_dspr2_asm.S b/src/gui/painting/qdrawhelper_mips_dspr2_asm.S index 213fcf8a08..7e95410a7f 100644 --- a/src/gui/painting/qdrawhelper_mips_dspr2_asm.S +++ b/src/gui/painting/qdrawhelper_mips_dspr2_asm.S @@ -41,59 +41,6 @@ #include "qt_mips_asm_dsp.h" -LEAF_MIPS_DSPR2(INTERPOLATE_PIXEL_255_asm_mips_dspr2) -/* - * a0 - uint x (First value to multiply) - * a1 - uint a (Multiplicator byte for first value) - * a2 - uint y (Second value to multiply) - * a3 - uint b (Multiplicator byte for second value) - */ - - .set reorder - replv.ph a1, a1 - replv.ph a3, a3 - li t8, 8388736 - muleu_s.ph.qbl t0, a0, a1 - muleu_s.ph.qbl t1, a2, a3 - muleu_s.ph.qbr t2, a0, a1 - muleu_s.ph.qbr t3, a2, a3 - addu.ph t4, t0, t1 - addu.ph t5, t2, t3 - preceu.ph.qbla t0, t4 - addu t1, t0, t8 - addu t1, t4, t1 - preceu.ph.qbla t6, t5 - addu t7, t6, t8 - addu t7, t5, t7 - precrq.qb.ph t2, t1, t7 - move v0, t2 - j ra - -END(INTERPOLATE_PIXEL_255_asm_mips_dspr2) - -LEAF_MIPS_DSPR2(BYTE_MUL_asm_mips_dspr2) -/* - * a0 - uint x (Value to multiply) - * a1 - uint a (Multiplicator byte) - */ - - .set reorder - replv.ph a1, a1 /* a1 = 0x00a00a */ - li t4, 8388736 /* t4 = 0x800080 */ - muleu_s.ph.qbl t0, a0, a1 - muleu_s.ph.qbr t2, a0, a1 - preceu.ph.qbla t1, t0 - addu t0, t0, t1 - addu t0, t0, t4 - preceu.ph.qbla t3, t2 - addu t2, t2, t3 - addu t2, t2, t4 - precrq.qb.ph t4, t0, t2 - move v0, t4 - j ra - -END(BYTE_MUL_asm_mips_dspr2) - LEAF_MIPS_DSPR2(qConvertRgb16To32_asm_mips_dspr2) /* * a0 - dst (a8r8g8b8) diff --git a/src/gui/painting/qt_mips_asm_dsp.h b/src/gui/painting/qt_mips_asm_dsp.h index bcde7068a2..088831f6e4 100644 --- a/src/gui/painting/qt_mips_asm_dsp.h +++ b/src/gui/painting/qt_mips_asm_dsp.h @@ -110,4 +110,140 @@ LEAF_MIPS32R2(symbol) \ .end function; \ .size function,.-function +/* + * BYTE_MUL operation on two pixels (in_1 and in_2) with two + * multiplicator bytes, repl_a1 and repl_a2, which should be + * prepered with: + * replv.ph repl_a1, a1 + * replv.ph repl_a2, a2 + * to became such as: + * repl_a1 = | 00 | a1 | 00 | a1 | + * repl_a2 = | 00 | a2 | 00 | a2 | + * + * rounding_factor must have following value: + * li rounding_factor, 0x00800080 + * + * scratch(n) - temporary registers + * + * in_const: 1 -> (default) causes that in_1, in_2 + * registers will remain unchanged after usage + * 0 -> (or anything different then 1) causes + * that registers repl_a1, repl_a2 remain + * unchanged after usage + */ +.macro BYTE_MUL_x2 in_1, in_2, out_1, out_2 \ + repl_a1, repl_a2, rounding_factor, \ + scratch1, scratch2, scratch3, scratch4, \ + in_const = 1 + muleu_s.ph.qbl \scratch1, \in_1, \repl_a1 + muleu_s.ph.qbr \scratch2, \in_1, \repl_a1 + muleu_s.ph.qbl \scratch3, \in_2, \repl_a2 + muleu_s.ph.qbr \scratch4, \in_2, \repl_a2 + +.if \in_const == 1 + preceu.ph.qbla \repl_a1, \scratch1 + preceu.ph.qbla \repl_a2, \scratch2 + preceu.ph.qbla \out_1, \scratch3 + preceu.ph.qbla \out_2, \scratch4 + + addu \scratch1, \repl_a1, \scratch1 + addu \scratch2, \repl_a2, \scratch2 +.else + preceu.ph.qbla \in_1, \scratch1 + preceu.ph.qbla \in_2, \scratch2 + preceu.ph.qbla \out_1, \scratch3 + preceu.ph.qbla \out_2, \scratch4 + + addu \scratch1, \in_1, \scratch1 + addu \scratch2, \in_2, \scratch2 +.endif + + addu \out_1, \out_1, \scratch3 + addu \out_2, \out_2, \scratch4 + + addu \scratch1, \scratch1, \rounding_factor + addu \scratch2, \scratch2, \rounding_factor + addu \scratch3, \out_1, \rounding_factor + addu \scratch4, \out_2, \rounding_factor + + precrq.qb.ph \out_1, \scratch1, \scratch2 + precrq.qb.ph \out_2, \scratch3, \scratch4 + +.endm + +/* + * BYTE_MUL operation on one pixel (in_1) with + * multiplicator byte, repl_a1, which should be + * prepered with: + * replv.ph repl_a1, a1 + * to became such as: + * repl_a1 = | 00 | a1 | 00 | a1 | + * + * rounding_factor must have following value: + * li rounding_factor, 0x00800080 + * + * scratch(n) - temporary registers + */ +.macro BYTE_MUL in_1, out_1, \ + repl_a1, rounding_factor, \ + scratch1, scratch2, scratch3, scratch4 + muleu_s.ph.qbl \scratch1, \in_1, \repl_a1 + muleu_s.ph.qbr \scratch2, \in_1, \repl_a1 + + preceu.ph.qbla \scratch3, \scratch1 + preceu.ph.qbla \scratch4, \scratch2 + + addu \scratch1, \scratch1, \scratch3 + addu \scratch1, \scratch1, \rounding_factor + + addu \scratch2, \scratch2, \scratch4 + addu \scratch2, \scratch2, \rounding_factor + + precrq.qb.ph \out_1, \scratch1, \scratch2 + +.endm + +/* + * macro for INTERPOLATE_PIXEL_255 operation + * in_1 - First value to multiply + * mul_1 - Multiplicator byte for first value + * in_2 - Second value to multiply + * mul_2 - Multiplicator byte for second value + * rounding_factor and andi_factor should be prepared + * as: + * li rounding_factor, 0x00800080 + * li andi_factor, 0xff00ff00 + * scratch(n) - temporary registers + */ +.macro INTERPOLATE_PIXEL_255 in_1, mul_1, \ + in_2, mul_2, \ + out_1, \ + rounding_factor, andi_factor \ + scratch1, scratch2, scratch3, scratch4 +# x part + preceu.ph.qbra \scratch1, \in_1 + preceu.ph.qbra \scratch2, \in_2 + mul \scratch1, \scratch1, \mul_1 + mul \scratch2, \scratch2, \mul_2 +# x>>8 part + preceu.ph.qbla \scratch3, \in_1 + preceu.ph.qbla \scratch4, \in_2 + mul \scratch3, \scratch3, \mul_1 + mul \scratch4, \scratch4, \mul_2 +# x part + addu \scratch1, \scratch1, \scratch2 + preceu.ph.qbla \scratch2, \scratch1 + addu \scratch1, \scratch1, \scratch2 + addu \scratch1, \scratch1, \rounding_factor + preceu.ph.qbla \scratch1, \scratch1 +# x>>8 part + addu \scratch3, \scratch3, \scratch4 + preceu.ph.qbla \scratch4, \scratch3 + addu \scratch3, \scratch3, \scratch4 + addu \scratch3, \scratch3, \rounding_factor + and \scratch3, \scratch3, \andi_factor + + or \out_1, \scratch1, \scratch3 +.endm + #endif //QT_MIPS_DSP_H__ |