summaryrefslogtreecommitdiffstats
path: root/src/gui/painting
diff options
context:
space:
mode:
authorDamir Tatalovic <dtatalovic@mips.com>2012-06-22 18:13:02 +0200
committerQt by Nokia <qt-info@nokia.com>2012-07-03 01:55:44 +0200
commit614422c2e7396b27a9802b3a0d949dfd93ba688b (patch)
tree00ba3115c2d27f5bf5a95ea629e6618042624eaf /src/gui/painting
parent60829b4a5692ebdd68b12830d7c722ccd31921ad (diff)
MIPS DSP composition functions optimizations.
List of optimized routines: - comp_func_DestinationOver - comp_func_SourceIn - comp_func_DestinationIn - comp_func_DestinationOut - comp_func_SourceAtop - comp_func_DestinationAtop - comp_func_XOR - comp_func_SourceOut - comp_func_solid_SourceOver - comp_func_solid_DestinationOver - comp_func_solid_SourceIn - comp_func_solid_DestinationIn - comp_func_solid_SourceAtop - comp_func_solid_DestinationAtop - copm_func_solid_XOR - comp_func_solid_SourceOut Previously optimized routines qt_blend_argb32_on_argb32_mips_dsp and comp_func_Source_mips_dsp are redesigned and rewritten. Overall improvement by running tst_bench_blendbench benchmark app from tests/benchmarks/gui/image/blendbench/ is 27%. Change-Id: I6ab09b17cac10f4aded59787074ab4c89e72ccac Reviewed-by: Thiago Macieira <thiago.macieira@intel.com> Reviewed-by: Samuel Rødal <samuel.rodal@nokia.com>
Diffstat (limited to 'src/gui/painting')
-rw-r--r--src/gui/painting/qdrawhelper.cpp17
-rw-r--r--src/gui/painting/qdrawhelper_mips_dsp.cpp338
-rw-r--r--src/gui/painting/qdrawhelper_mips_dsp_asm.S1303
-rw-r--r--src/gui/painting/qdrawhelper_mips_dsp_p.h82
-rw-r--r--src/gui/painting/qdrawhelper_mips_dspr2_asm.S53
-rw-r--r--src/gui/painting/qt_mips_asm_dsp.h136
6 files changed, 1770 insertions, 159 deletions
diff --git a/src/gui/painting/qdrawhelper.cpp b/src/gui/painting/qdrawhelper.cpp
index 08975daf71..985ef68401 100644
--- a/src/gui/painting/qdrawhelper.cpp
+++ b/src/gui/painting/qdrawhelper.cpp
@@ -5971,6 +5971,23 @@ void qInitDrawhelperAsm()
#if defined(QT_COMPILER_SUPPORTS_MIPS_DSP)
functionForMode_C[QPainter::CompositionMode_SourceOver] = comp_func_SourceOver_asm_mips_dsp;
functionForMode_C[QPainter::CompositionMode_Source] = comp_func_Source_mips_dsp;
+ functionForMode_C[QPainter::CompositionMode_DestinationOver] = comp_func_DestinationOver_mips_dsp;
+ functionForMode_C[QPainter::CompositionMode_SourceIn] = comp_func_SourceIn_mips_dsp;
+ functionForMode_C[QPainter::CompositionMode_DestinationIn] = comp_func_DestinationIn_mips_dsp;
+ functionForMode_C[QPainter::CompositionMode_DestinationOut] = comp_func_DestinationOut_mips_dsp;
+ functionForMode_C[QPainter::CompositionMode_SourceAtop] = comp_func_SourceAtop_mips_dsp;
+ functionForMode_C[QPainter::CompositionMode_DestinationAtop] = comp_func_DestinationAtop_mips_dsp;
+ functionForMode_C[QPainter::CompositionMode_Xor] = comp_func_XOR_mips_dsp;
+ functionForMode_C[QPainter::CompositionMode_SourceOut] = comp_func_SourceOut_mips_dsp;
+
+ functionForModeSolid_C[QPainter::CompositionMode_SourceOver] = comp_func_solid_SourceOver_mips_dsp;
+ functionForModeSolid_C[QPainter::CompositionMode_DestinationOver] = comp_func_solid_DestinationOver_mips_dsp;
+ functionForModeSolid_C[QPainter::CompositionMode_SourceIn] = comp_func_solid_SourceIn_mips_dsp;
+ functionForModeSolid_C[QPainter::CompositionMode_DestinationIn] = comp_func_solid_DestinationIn_mips_dsp;
+ functionForModeSolid_C[QPainter::CompositionMode_SourceAtop] = comp_func_solid_SourceAtop_mips_dsp;
+ functionForModeSolid_C[QPainter::CompositionMode_DestinationAtop] = comp_func_solid_DestinationAtop_mips_dsp;
+ functionForModeSolid_C[QPainter::CompositionMode_Xor] = comp_func_solid_XOR_mips_dsp;
+ functionForModeSolid_C[QPainter::CompositionMode_SourceOut] = comp_func_solid_SourceOut_mips_dsp;
qt_memfill32 = qt_memfill32_asm_mips_dsp;
diff --git a/src/gui/painting/qdrawhelper_mips_dsp.cpp b/src/gui/painting/qdrawhelper_mips_dsp.cpp
index b33329c090..ec1d7d2a2a 100644
--- a/src/gui/painting/qdrawhelper_mips_dsp.cpp
+++ b/src/gui/painting/qdrawhelper_mips_dsp.cpp
@@ -45,24 +45,6 @@
QT_BEGIN_NAMESPACE
-#if defined(QT_COMPILER_SUPPORTS_MIPS_DSP)
-
-extern "C" uint INTERPOLATE_PIXEL_255_asm_mips_dsp(uint x, uint a, uint y, uint b);
-
-extern "C" uint BYTE_MUL_asm_mips_dsp(uint x, uint a);
-
-extern "C" uint * destfetchARGB32_asm_mips_dsp(uint *buffer, const uint *data, int length);
-
-extern "C" uint * qt_destStoreARGB32_asm_mips_dsp(uint *buffer, const uint *data, int length);
-
-#if defined(QT_COMPILER_SUPPORTS_MIPS_DSPR2)
-
-extern "C" uint INTERPOLATE_PIXEL_255_asm_mips_dspr2(uint x, uint a, uint y, uint b);
-
-extern "C" uint BYTE_MUL_asm_mips_dspr2(uint x, uint a);
-
-#endif // QT_COMPILER_SUPPORTS_MIPS_DSPR2
-
void qt_blend_argb32_on_argb32_mips_dsp(uchar *destPixels, int dbpl,
const uchar *srcPixels, int sbpl,
int w, int h,
@@ -80,32 +62,21 @@ void qt_blend_argb32_on_argb32_mips_dsp(uchar *destPixels, int dbpl,
uint *dst = (uint *) destPixels;
if (const_alpha == 256) {
for (int y=0; y<h; ++y) {
- for (int x=0; x<w; ++x) {
- uint s = src[x];
- if (s >= 0xff000000)
- dst[x] = s;
- else if (s != 0)
-#if !defined(QT_COMPILER_SUPPORTS_MIPS_DSPR2)
- dst[x] = s + BYTE_MUL_asm_mips_dsp(dst[x], qAlpha(~s));
-#else
- dst[x] = s + BYTE_MUL_asm_mips_dspr2(dst[x], qAlpha(~s));
-#endif
- }
+ qt_blend_argb32_on_argb32_const_alpha_256_mips_dsp_asm(dst, src, w);
dst = (quint32 *)(((uchar *) dst) + dbpl);
src = (const quint32 *)(((const uchar *) src) + sbpl);
}
} else if (const_alpha != 0) {
const_alpha = (const_alpha * 255) >> 8;
for (int y=0; y<h; ++y) {
- for (int x=0; x<w; ++x) {
-#if !defined(QT_COMPILER_SUPPORTS_MIPS_DSPR2)
- uint s = BYTE_MUL_asm_mips_dsp(src[x], const_alpha);
- dst[x] = s + BYTE_MUL_asm_mips_dsp(dst[x], qAlpha(~s));
-#else
- uint s = BYTE_MUL_asm_mips_dspr2(src[x], const_alpha);
- dst[x] = s + BYTE_MUL_asm_mips_dspr2(dst[x], qAlpha(~s));
-#endif
+ if (h%2 > 0) {
+ uint s = BYTE_MUL(src[0], const_alpha);
+ dst[0] = s + BYTE_MUL(dst[0], qAlpha(~s));
+ h--;
+ dst++;
+ src++;
}
+ qt_blend_argb32_on_argb32_mips_dsp_asm_x2(dst, src, h, const_alpha);
dst = (quint32 *)(((uchar *) dst) + dbpl);
src = (const quint32 *)(((const uchar *) src) + sbpl);
}
@@ -145,13 +116,13 @@ void comp_func_Source_mips_dsp(uint *dest, const uint *src, int length, uint con
::memcpy(dest, src, length * sizeof(uint));
} else {
int ialpha = 255 - const_alpha;
- for (int i = 0; i < length; ++i) {
-#if !defined(QT_COMPILER_SUPPORTS_MIPS_DSPR2)
- dest[i] = INTERPOLATE_PIXEL_255_asm_mips_dsp(src[i], const_alpha, dest[i], ialpha);
-#else
- dest[i] = INTERPOLATE_PIXEL_255_asm_mips_dspr2(src[i], const_alpha, dest[i], ialpha);
-#endif
+ if (length%2 > 0) {
+ dest[0] = INTERPOLATE_PIXEL_255(src[0], const_alpha, dest[0], ialpha);
+ length--;
+ dest++;
+ src++;
}
+ comp_func_Source_dsp_asm_x2(dest, src, length, const_alpha);
}
}
@@ -171,6 +142,285 @@ void QT_FASTCALL qt_destStoreARGB32_mips_dsp(QRasterBuffer *rasterBuffer, int x,
qt_destStoreARGB32_asm_mips_dsp(data, buffer, length);
}
-#endif // QT_COMPILER_SUPPORTS_MIPS_DSP
+void QT_FASTCALL comp_func_solid_SourceOver_mips_dsp(uint *dest, int length, uint color, uint const_alpha)
+{
+ if (const_alpha != 255)
+ color = BYTE_MUL(color, const_alpha);
+ if (length%2 > 0) {
+ dest[0] = color + BYTE_MUL(dest[0], qAlpha(~color));
+ length--;
+ dest++;
+ }
+ comp_func_solid_Source_dsp_asm_x2(dest, length, color, qAlpha(~color));
+}
+
+void QT_FASTCALL comp_func_solid_DestinationOver_mips_dsp(uint *dest, int length, uint color, uint const_alpha)
+{
+ if (const_alpha != 255)
+ color = BYTE_MUL(color, const_alpha);
+ if (length%2 > 0) {
+ uint d = dest[0];
+ dest[0] = d + BYTE_MUL(color, qAlpha(~d));
+ length--;
+ dest++;
+ }
+ comp_func_solid_DestinationOver_dsp_asm_x2(dest, length, color);
+}
+
+void QT_FASTCALL comp_func_DestinationOver_mips_dsp(uint *dest, const uint *src, int length, uint const_alpha)
+{
+ if (length%2 > 0) {
+ if (const_alpha == 255) {
+ uint d = dest[0];
+ dest[0] = d + BYTE_MUL(src[0], qAlpha(~d));
+ } else {
+ uint d = dest[0];
+ uint s = BYTE_MUL(src[0], const_alpha);
+ dest[0] = d + BYTE_MUL(s, qAlpha(~d));
+ }
+ length--;
+ dest++;
+ src++;
+ }
+ comp_func_DestinationOver_dsp_asm_x2(dest, src, length, const_alpha);
+}
+
+void QT_FASTCALL comp_func_solid_SourceIn_mips_dsp(uint *dest, int length, uint color, uint const_alpha)
+{
+ if (length%2 > 0) {
+ if (const_alpha == 255) {
+ dest[0] = BYTE_MUL(color, qAlpha(dest[0]));
+ } else {
+ uint tmp_color = BYTE_MUL(color, const_alpha);
+ uint cia = 255 - const_alpha;
+ uint d = dest[0];
+ dest[0] = INTERPOLATE_PIXEL_255(tmp_color, qAlpha(d), d, cia);
+ }
+ length--;
+ dest++;
+ }
+ comp_func_solid_SourceIn_dsp_asm_x2(dest, length, color, const_alpha);
+}
+
+void QT_FASTCALL comp_func_SourceIn_mips_dsp(uint *dest, const uint *src, int length, uint const_alpha)
+{
+ if (length%2 > 0) {
+ if (const_alpha == 255) {
+ dest[0] = BYTE_MUL(src[0], qAlpha(dest[0]));
+ } else {
+ uint cia = 255 - const_alpha;
+ uint d = dest[0];
+ uint s = BYTE_MUL(src[0], const_alpha);
+ dest[0] = INTERPOLATE_PIXEL_255(s, qAlpha(d), d, cia);
+ }
+ length--;
+ dest++;
+ src++;
+ }
+ comp_func_SourceIn_dsp_asm_x2(dest, src, length, const_alpha);
+}
+
+void QT_FASTCALL comp_func_solid_DestinationIn_mips_dsp(uint *dest, int length, uint color, uint const_alpha)
+{
+ uint a = qAlpha(color);
+ if (const_alpha != 255) {
+ a = BYTE_MUL(a, const_alpha) + 255 - const_alpha;
+ }
+ if (length%2 > 0) {
+ dest[0] = BYTE_MUL(dest[0], a);
+ length--;
+ dest++;
+ }
+ comp_func_solid_DestinationIn_dsp_asm_x2(dest, length, a);
+}
+
+void QT_FASTCALL comp_func_DestinationIn_mips_dsp(uint *dest, const uint *src, int length, uint const_alpha)
+{
+ if (length%2 > 0) {
+ if (const_alpha == 255) {
+ dest[0] = BYTE_MUL(dest[0], qAlpha(src[0]));
+ } else {
+ int cia = 255 - const_alpha;
+ uint a = BYTE_MUL(qAlpha(src[0]), const_alpha) + cia;
+ dest[0] = BYTE_MUL(dest[0], a);
+ }
+ length--;
+ src++;
+ dest++;
+ }
+ comp_func_DestinationIn_dsp_asm_x2(dest, src, length, const_alpha);
+}
+
+void QT_FASTCALL comp_func_solid_DestinationOut_mips_dsp(uint *dest, int length, uint color, uint const_alpha)
+{
+ uint a = qAlpha(~color);
+ if (const_alpha != 255) {
+ a = BYTE_MUL(a, const_alpha) + 255 - const_alpha;
+ }
+ if (length%2 > 0) {
+ dest[0] = BYTE_MUL(dest[0], a);
+ length--;
+ dest++;
+ }
+ comp_func_solid_DestinationIn_dsp_asm_x2(dest, length, a);
+}
+
+void QT_FASTCALL comp_func_DestinationOut_mips_dsp(uint *dest, const uint *src, int length, uint const_alpha)
+{
+ if (length%2 > 0) {
+ if (const_alpha == 255) {
+ dest[0] = BYTE_MUL(dest[0], qAlpha(~src[0]));
+ } else {
+ int cia = 255 - const_alpha;
+ uint sia = BYTE_MUL(qAlpha(~src[0]), const_alpha) + cia;
+ dest[0] = BYTE_MUL(dest[0], sia);
+ }
+ length--;
+ dest++;
+ src++;
+ }
+ comp_func_DestinationOut_dsp_asm_x2(dest, src, length, const_alpha);
+}
+
+void QT_FASTCALL comp_func_solid_SourceAtop_mips_dsp(uint *dest, int length, uint color, uint const_alpha)
+{
+ if (const_alpha != 255) {
+ color = BYTE_MUL(color, const_alpha);
+ }
+ uint sia = qAlpha(~color);
+ if (length%2 > 0) {
+ dest[0] = INTERPOLATE_PIXEL_255(color, qAlpha(dest[0]), dest[0], sia);
+ length--;
+ dest++;
+ }
+ comp_func_solid_SourceAtop_dsp_asm_x2(dest, length, color, sia);
+}
+
+void QT_FASTCALL comp_func_SourceAtop_mips_dsp(uint *dest, const uint *src, int length, uint const_alpha)
+{
+ if (length%2 > 0) {
+ if (const_alpha == 255) {
+ uint s = src[0];
+ uint d = dest[0];
+ dest[0] = INTERPOLATE_PIXEL_255(s, qAlpha(d), d, qAlpha(~s));
+ } else {
+ uint s = BYTE_MUL(src[0], const_alpha);
+ uint d = dest[0];
+ dest[0] = INTERPOLATE_PIXEL_255(s, qAlpha(d), d, qAlpha(~s));
+ }
+ length--;
+ dest++;
+ src++;
+ }
+ comp_func_SourceAtop_dsp_asm_x2(dest, src, length, const_alpha);
+}
+
+
+void QT_FASTCALL comp_func_solid_DestinationAtop_mips_dsp(uint *dest, int length, uint color, uint const_alpha)
+{
+ uint a = qAlpha(color);
+ if (const_alpha != 255) {
+ color = BYTE_MUL(color, const_alpha);
+ a = qAlpha(color) + 255 - const_alpha;
+ }
+ if (length%2 > 0) {
+ uint d = dest[0];
+ dest[0] = INTERPOLATE_PIXEL_255(d, a, color, qAlpha(~d));
+ length--;
+ dest++;
+ }
+ comp_func_solid_DestinationAtop_dsp_asm_x2(dest, length, color, a);
+}
+
+void QT_FASTCALL comp_func_DestinationAtop_mips_dsp(uint *dest, const uint *src, int length, uint const_alpha)
+{
+ if (length%2 > 0) {
+ if (const_alpha == 255) {
+ uint s = src[0];
+ uint d = dest[0];
+ dest[0] = INTERPOLATE_PIXEL_255(d, qAlpha(s), s, qAlpha(~d));
+ } else {
+ int cia = 255 - const_alpha;
+ uint s = BYTE_MUL(src[0], const_alpha);
+ uint d = dest[0];
+ uint a = qAlpha(s) + cia;
+ dest[0] = INTERPOLATE_PIXEL_255(d, a, s, qAlpha(~d));
+ }
+ length--;
+ dest++;
+ src++;
+ }
+ comp_func_DestinationAtop_dsp_asm_x2(dest, src, length, const_alpha);
+}
+
+void QT_FASTCALL comp_func_solid_XOR_mips_dsp(uint *dest, int length, uint color, uint const_alpha)
+{
+ if (const_alpha != 255)
+ color = BYTE_MUL(color, const_alpha);
+ uint sia = qAlpha(~color);
+
+ if (length%2 > 0) {
+ uint d = dest[0];
+ dest[0] = INTERPOLATE_PIXEL_255(color, qAlpha(~d), d, sia);
+ length--;
+ dest++;
+ }
+ comp_func_solid_XOR_dsp_asm_x2(dest, length, color, sia);
+}
+
+void QT_FASTCALL comp_func_XOR_mips_dsp(uint *dest, const uint *src, int length, uint const_alpha)
+{
+ if (length%2 > 0) {
+ if (const_alpha == 255) {
+ uint d = dest[0];
+ uint s = src[0];
+ dest[0] = INTERPOLATE_PIXEL_255(s, qAlpha(~d), d, qAlpha(~s));
+ } else {
+ uint d = dest[0];
+ uint s = BYTE_MUL(src[0], const_alpha);
+ dest[0] = INTERPOLATE_PIXEL_255(s, qAlpha(~d), d, qAlpha(~s));
+ }
+ length--;
+ dest++;
+ src++;
+ }
+ comp_func_XOR_dsp_asm_x2(dest, src, length, const_alpha);
+}
+
+void QT_FASTCALL comp_func_solid_SourceOut_mips_dsp(uint *dest, int length, uint color, uint const_alpha)
+{
+ if (length%2 > 0) {
+ if (const_alpha == 255) {
+ dest[0] = BYTE_MUL(color, qAlpha(~dest[0]));
+ } else {
+ uint tmp_color = BYTE_MUL(color, const_alpha);
+ int cia = 255 - const_alpha;
+ uint d = dest[0];
+ dest[0] = INTERPOLATE_PIXEL_255(tmp_color, qAlpha(~d), d, cia);
+ }
+ length--;
+ dest++;
+ }
+ comp_func_solid_SourceOut_dsp_asm_x2(dest, length, color, const_alpha);
+}
+
+void QT_FASTCALL comp_func_SourceOut_mips_dsp(uint *dest, const uint *src, int length, uint const_alpha)
+{
+ if (length%2 > 0) {
+ if (const_alpha == 255) {
+ dest[0] = BYTE_MUL(src[0], qAlpha(~dest[0]));
+ } else {
+ int cia = 255 - const_alpha;
+ uint s = BYTE_MUL(src[0], const_alpha);
+ uint d = dest[0];
+ dest[0] = INTERPOLATE_PIXEL_255(s, qAlpha(~d), d, cia);
+ }
+ length--;
+ dest++;
+ src++;
+ }
+ comp_func_SourceOut_dsp_asm_x2(dest, src, length, const_alpha);
+}
+
QT_END_NAMESPACE
diff --git a/src/gui/painting/qdrawhelper_mips_dsp_asm.S b/src/gui/painting/qdrawhelper_mips_dsp_asm.S
index f426905aad..58cc176529 100644
--- a/src/gui/painting/qdrawhelper_mips_dsp_asm.S
+++ b/src/gui/painting/qdrawhelper_mips_dsp_asm.S
@@ -41,67 +41,6 @@
#include "qt_mips_asm_dsp.h"
-LEAF_MIPS_DSP(INTERPOLATE_PIXEL_255_asm_mips_dsp)
-/*
- * a0 - uint x (First value to multiply)
- * a1 - uint a (Multiplicator byte for first value)
- * a2 - uint y (Second value to multiply)
- * a3 - uint b (Multiplicator byte for second value)
- */
-
- .set reorder
- li t4, 8388736
- preceu.ph.qbra t0, a0 /* (x & 0xff00ff) */
- mul t0, t0, a1 /* (x & 0xff00ff) * a */
- preceu.ph.qbra t1, a2 /* (y & 0xff00ff) */
- mul t1, t1, a3 /* (y & 0xff00ff) * b */
- addu t0, t0, t1 /* (x & 0xff00ff) * a +
- * (y & 0xff00ff) * b
- */
- preceu.ph.qbla t1, t0 /* (t >> 8) & 0xff00ff */
- addu t0, t0, t1 /* t + ((t >> 8) & 0xff00ff */
- addu t0, t0, t4 /* t + ((t >> 8) & 0xff00ff) + 0x800080 */
- preceu.ph.qbla t0, t0 /* t >> 8 and t&=0xff00ff */
- preceu.ph.qbla t2, a0 /* (x>>8) & 0xff00ff */
- mul t2, t2, a1 /* ((x>>8) & 0xff00ff) * a */
- preceu.ph.qbla t3, a2 /* ((y>>8) & 0xff00ff) */
- mul t3, t3, a3 /* ((y>>8) & 0xff00ff) * b */
- addu t2, t2, t3 /* ((x>>8) & 0xff00ff) * a +
- * ((y >> 8) & 0xff00ff) * b
- */
- preceu.ph.qbla t3, t2 /* (x>>8) & 0xff00ff */
- addu t2, t2, t3 /* (x>>8) & 0xff00ff) + 0x800080 */
- addu t2, t2, t4 /* x + ((x>>8) & 0xff00ff) + 0x800080 */
- and t2, t2, 0xff00ff00
- or t1, t0, t2
- move v0, t1
- j ra
-
-END(INTERPOLATE_PIXEL_255_asm_mips_dsp)
-
-LEAF_MIPS_DSP(BYTE_MUL_asm_mips_dsp)
-/*
- * a0 - uint x (Value to multiply)
- * a1 - uint a (Multiplicator byte)
- */
-
- .set reorder
- replv.ph a1, a1 /* a1 = 0x00a00a */
- li t4, 8388736 /* t4 = 0x800080 */
- muleu_s.ph.qbl t0, a0, a1
- muleu_s.ph.qbr t2, a0, a1
- preceu.ph.qbla t1, t0
- addu t0, t0, t1
- addu t0, t0, t4
- preceu.ph.qbla t3, t2
- addu t2, t2, t3
- addu t2, t2, t4
- precrq.qb.ph t4, t0, t2
- move v0, t4
- j ra
-
-END(BYTE_MUL_asm_mips_dsp)
-
LEAF_MIPS_DSP(destfetchARGB32_asm_mips_dsp)
/*
* a0 - buffer address (dst)
@@ -349,7 +288,7 @@ LEAF_MIPS_DSP(comp_func_SourceOver_asm_mips_dsp)
END(comp_func_SourceOver_asm_mips_dsp)
-LEAF_MIPS_DSP(qt_destStoreARGB32_asm_mips_dsp)
+LEAF_MIPS_DSPR2(qt_destStoreARGB32_asm_mips_dsp)
/*
* a0 - uint * data
* a1 - const uint *buffer
@@ -422,3 +361,1243 @@ LEAF_MIPS_DSP(qt_destStoreARGB32_asm_mips_dsp)
nop
END(qt_destStoreARGB32_asm_mips_dsp)
+
+LEAF_MIPS_DSP(comp_func_solid_Source_dsp_asm_x2)
+/*
+ * a0 - const uint *dest
+ * a1 - int length
+ * a2 - uint color
+ * a3 - uint ialpha
+ */
+
+ beqz a1, 2f
+ nop
+ replv.ph a3, a3
+ li t9, 8388736 /* t9 = 0x800080 */
+1:
+ lw t0, 0(a0)
+ lw t1, 4(a0)
+ or t2, t0, t1 /* if both dest are zero, no computation needed */
+ beqz t2, 12f
+ addiu a1, -2
+
+ BYTE_MUL_x2 t0, t1, t6, t7, a3, a3, t9, t2, t3, t4, t5, 0
+11:
+ addu t2, a2, t6
+ addu t3, a2, t7
+ sw t2, 0(a0)
+ sw t3, 4(a0)
+ bnez a1, 1b
+ addiu a0, 8
+ b 2f
+12:
+ addu t2, a2, t0
+ addu t3, a2, t1
+ sw t2, 0(a0)
+ sw t3, 4(a0)
+ bnez a1, 1b
+ addiu a0, 8
+2:
+ jr ra
+ nop
+
+END(comp_func_solid_Source_dsp_asm_x2)
+
+LEAF_MIPS_DSP(comp_func_solid_DestinationOver_dsp_asm_x2)
+/*
+ * a0 - uint *dest
+ * a1 - int length
+ * a2 - uint color
+ */
+
+ addiu sp, sp, -8
+ sw s0, 0(sp)
+ sw s1, 4(sp)
+ beqz a1, 2f
+ nop
+ beqz a2, 2f
+ nop
+ li t9, 8388736 /* t4 = 0x800080 */
+
+1:
+ lw t0, 0(a0)
+ lw t1, 4(a0)
+ not t2, t0
+ not t3, t1
+ srl t4, t2, 24
+ srl t5, t3, 24
+ or t2, t4, t5 /* if both dest are zero, no computation needed */
+ beqz t2, 11f
+ addiu a1, -2
+ replv.ph t2, t4
+ replv.ph t3, t5
+
+ BYTE_MUL_x2 a2, a2, t8, a3, t2, t3, t9, t4, t5, t6, t7
+
+ addu t0, t0, t8
+ addu t1, t1, a3
+11:
+ sw t0, 0(a0)
+ sw t1, 4(a0)
+ bnez a1, 1b
+ addiu a0, 8
+
+2:
+ lw s0, 0(sp)
+ lw s1, 4(sp)
+ addiu sp, sp, 8
+ jr ra
+ nop
+
+END(comp_func_solid_DestinationOver_dsp_asm_x2)
+
+LEAF_MIPS_DSP(comp_func_DestinationOver_dsp_asm_x2)
+/*
+ * a0 - uint *dest
+ * a1 - uint *src
+ * a2 - int length
+ * a3 - uint const_alpha
+ */
+
+ .set noat
+ addiu sp, sp, -8
+ sw s0, 0(sp)
+ sw s1, 4(sp)
+ beqz a2, 3f
+ nop
+ li t9, 8388736 /* t4 = 0x800080 */
+ li t0, 0xff
+ beq a3, t0, 2f
+ nop
+
+/* part where const_alpha != 255 */
+1:
+ replv.ph a3, a3
+11:
+ lw t0, 0(a1) # src_1
+ lw t1, 4(a1) # src_2
+ addiu a2, -2
+
+ BYTE_MUL_x2 t0, t1, t8, AT, a3, a3, t9, t4, t5, t6, t7, 0
+ # t8 = s1
+ # AT = s2
+ lw t0, 0(a0) # dest_1
+ lw t1, 4(a0) # dest_2
+ addiu a1, 8
+ not t2, t0
+ not t3, t1
+ srl t4, t2, 24
+ srl t5, t3, 24
+ replv.ph t2, t4 # qAlpha(~d) 1
+ replv.ph t3, t5 # qAlpha(~d) 2
+
+ BYTE_MUL_x2 t8, AT, s0, s1, t2, t3, t9, t4, t5, t6, t7
+
+ addu t0, t0, s0
+ addu t1, t1, s1
+ sw t0, 0(a0)
+ sw t1, 4(a0)
+ bnez a2, 11b
+ addiu a0, 8
+ b 3f
+ nop
+
+/* part where const_alpha = 255 */
+2:
+ lw t0, 0(a0) # dest 1
+ lw t1, 4(a0) # dest 2
+ lw s0, 0(a1) # src 1
+ lw s1, 4(a1) # src 2
+ not t2, t0
+ not t3, t1
+ srl t4, t2, 24
+ srl t5, t3, 24
+ replv.ph t2, t4
+ replv.ph t3, t5
+ addiu a1, 8
+ addiu a2, -2
+
+ BYTE_MUL_x2 s0, s1, t8, AT, t2, t3, t9, t4, t5, t6, t7
+
+ addu t0, t0, t8
+ addu t1, t1, AT
+ sw t0, 0(a0)
+ sw t1, 4(a0)
+ bnez a2, 2b
+ addiu a0, 8
+
+3:
+ lw s0, 0(sp)
+ lw s1, 4(sp)
+ addiu sp, sp, 8
+ jr ra
+ nop
+ .set at
+
+END(comp_func_DestinationOver_dsp_asm_x2)
+
+LEAF_MIPS_DSP(comp_func_solid_SourceIn_dsp_asm_x2)
+/*
+ * a0 - uint *dest
+ * a1 - int length
+ * a2 - uint color
+ * a3 - uint const_alpha
+ */
+
+ .set noat
+ addiu sp, -12
+ sw s0, 0(sp)
+ sw s1, 4(sp)
+ sw s2, 8(sp)
+ beqz a1, 3f
+ nop
+ li t9, 8388736 /* t9 = 0x800080 (rounding_factor) */
+ lui t8, 0xff00
+ li t0, 0xff
+ beq a3, t0, 2f
+ ori t8, t8, 0xff00 /* t8 = 0xff00ff00 (andi_factor) */
+
+/* part where const_alpha != 255 */
+1:
+ replv.ph t0, a3
+ li t5, 0xff
+ BYTE_MUL a2, a2, t0, t9, t1, t2, t3, t4 /* a2 = color ( = BYTE_MUL(color, const_alpha)); */
+ subu t1, t5, a3 /* t1 = cia = 255 - const_alpha */
+11:
+ lw t2, 0(a0) /* t2 = d */
+ lw s0, 4(a0)
+ addiu a1, -2
+ srl t3, t2, 24 /* t3 = qAlpha(d) */
+ srl s2, s0, 24
+
+ INTERPOLATE_PIXEL_255 a2, t3, t2, t1, AT, t9, t8, t4, t5, t6, t7
+ INTERPOLATE_PIXEL_255 a2, s2, s0, t1, s1, t9, t8, t4, t5, t6, t7
+
+ sw AT, 0(a0)
+ sw s1, 4(a0)
+ bnez a1, 11b
+ addiu a0, 8
+ b 3f
+ nop
+
+/* part where const_alpha = 255 */
+2:
+ lw t0, 0(a0) /* dest 1 */
+ lw t1, 4(a0) /* dest 2 */
+ srl t4, t0, 24
+ srl t5, t1, 24
+ replv.ph t2, t4
+ replv.ph t3, t5
+ addiu a1, -2
+
+ BYTE_MUL_x2 a2, a2, t8, AT, t2, t3, t9, t4, t5, t6, t7
+
+ sw t8, 0(a0)
+ sw AT, 4(a0)
+ bnez a1, 2b
+ addiu a0, 8
+
+3:
+ lw s0, 0(sp)
+ lw s1, 4(sp)
+ lw s2, 8(sp)
+ addiu sp, 12
+ jr ra
+ nop
+ .set at
+
+END(comp_func_solid_SourceIn_dsp_asm_x2)
+
+LEAF_MIPS_DSP(comp_func_SourceIn_dsp_asm_x2)
+/*
+ * a0 - uint *dest
+ * a1 - const uint *src
+ * a2 - int length
+ * a3 - uint const_alpha
+ */
+
+ .set noat
+ addiu sp, -16
+ sw s0, 0(sp)
+ sw s1, 4(sp)
+ sw s2, 8(sp)
+ sw s3, 12(sp)
+ beqz a2, 3f
+ nop
+ li t9, 8388736 /* t9 = 0x800080 (rounding_factor) */
+ lui t8, 0xff00
+ li t0, 0xff
+ beq a3, t0, 2f
+ ori t8, t8, 0xff00 /* t8 = 0xff00ff00 (andi_factor) */
+
+/* part where const_alpha != 255 */
+1:
+ li t5, 0xff
+ subu t7, t5, a3 /* t7 = cia = 255 - const_alpha */
+ replv.ph a3, a3
+11:
+ lw t0, 0(a1) /* t0 = src 1 */
+ lw t1, 4(a1) /* t1 = src 2 */
+ addiu a2, -2
+
+ BYTE_MUL_x2 t0, t1, AT, s0, a3, a3, t9, t3, t4, t5, t6, 0
+
+ lw t0, 0(a0) /* t0 = dest 1 */
+ lw t1, 4(a0) /* t1 = dest 2 */
+ addiu a1, 8
+
+ srl t2, t0, 24 /* t2 = qAlpha(d) 1 */
+ srl t3, t1, 24 /* t3 = qAlpha(d) 2 */
+
+ INTERPOLATE_PIXEL_255 AT, t2, t0, t7, s1, t9, t8, t4, t5, t6, s3
+ INTERPOLATE_PIXEL_255 s0, t3, t1, t7, s2, t9, t8, t4, t5, t6, s3
+
+ sw s1, 0(a0)
+ sw s2, 4(a0)
+ bnez a2, 11b
+ addiu a0, 8
+ b 3f
+ nop
+
+/* part where const_alpha = 255 */
+2:
+ lw t2, 0(a0) /* dest 1 */
+ lw t3, 4(a0) /* dest 2 */
+ lw t0, 0(a1) /* src 1 */
+ lw t1, 4(a1) /* src 2 */
+ srl t4, t2, 24
+ srl t5, t3, 24
+ replv.ph t2, t4
+ replv.ph t3, t5
+ addiu a2, -2
+
+ BYTE_MUL_x2 t0, t1, t8, AT, t2, t3, t9, t4, t5, t6, t7
+
+ addiu a1, 8
+ sw t8, 0(a0)
+ sw AT, 4(a0)
+ bnez a2, 2b
+ addiu a0, 8
+
+3:
+ lw s0, 0(sp)
+ lw s1, 4(sp)
+ lw s2, 8(sp)
+ lw s3, 12(sp)
+ addiu sp, 16
+ jr ra
+ nop
+ .set at
+
+END(comp_func_SourceIn_dsp_asm_x2)
+
+LEAF_MIPS_DSP(comp_func_solid_DestinationIn_dsp_asm_x2)
+/*
+ * a0 - uint *dest
+ * a1 - int length
+ * a2 - uint a
+ */
+
+ .set noat
+ beqz a1, 2f
+ nop
+ li t9, 8388736 /* t9 = 0x800080 (rounding_factor) */
+ replv.ph a2, a2
+1:
+ lw t0, 0(a0)
+ lw t1, 4(a0)
+ addiu a1, -2
+
+ BYTE_MUL_x2 t0, t1, t8, AT, a2, a2, t9, t4, t5, t6, t7, 0
+
+ sw t8, 0(a0)
+ sw AT, 4(a0)
+ bnez a1, 1b
+ addiu a0, 8
+2:
+ jr ra
+ nop
+ .set at
+
+END(comp_func_solid_DestinationIn_dsp_asm_x2)
+
+LEAF_MIPS_DSP(comp_func_DestinationIn_dsp_asm_x2)
+/*
+ * a0 - uint *dest
+ * a1 - const uint *src
+ * a2 - int length
+ * a3 - uint const_alpha
+ */
+
+ addiu sp, -8
+ sw s0, 0(sp)
+ sw s1, 4(sp)
+ beqz a2, 3f
+ nop
+ li t9, 8388736 /* t9 = 0x800080 (rounding_factor) */
+ li t0, 0xff
+ beq a3, t0, 2f
+ nop
+
+/* part where const_alpha != 255 */
+1:
+ li t5, 0xff
+ subu t8, t5, a3 /* t8 = cia = 255 - const_alpha */
+ replv.ph a3, a3
+11:
+ lw t0, 0(a1) /* t0 = src 1 */
+ lw t1, 4(a1) /* t1 = src 2 */
+ addiu a2, -2
+ srl t0, t0, 24
+ srl t1, t1, 24
+
+ BYTE_MUL_x2 t0, t1, s1, t7, a3, a3, t9, t3, t4, t5, t6, 0
+
+ lw t0, 0(a0) /* t0 = dest 1 */
+ lw t1, 4(a0) /* t1 = dest 2 */
+ addu s1, s1, t8 /* a 1 */
+ addu t7, t7, t8 /* a 2 */
+ replv.ph t2, s1
+ replv.ph t3, t7
+
+ BYTE_MUL_x2 t0, t1, s1, t7, t2, t3, t9, t4, t5, t6, s0
+
+ addiu a1, 8
+ sw s1, 0(a0)
+ sw t7, 4(a0)
+ bnez a2, 11b
+ addiu a0, 8
+ b 3f
+ nop
+
+/* part where const_alpha = 255 */
+2:
+ lw t2, 0(a1) /* src 1 */
+ lw t3, 4(a1) /* src 2 */
+ lw t0, 0(a0) /* dest 1 */
+ lw t1, 4(a0) /* dest 2 */
+ srl t4, t2, 24
+ srl t5, t3, 24
+ replv.ph t2, t4 /* t2 = qAlpha(src 1) */
+ replv.ph t3, t5 /* t3 = qAlpha(src 2) */
+ addiu a2, -2
+
+ BYTE_MUL_x2 t0, t1, t8, s1, t2, t3, t9, t4, t5, t6, t7
+
+ addiu a1, 8
+ sw t8, 0(a0)
+ sw s1, 4(a0)
+ bnez a2, 2b
+ addiu a0, 8
+
+3:
+ lw s0, 0(sp)
+ lw s1, 4(sp)
+ addiu sp, 8
+ jr ra
+ nop
+
+END(comp_func_DestinationIn_dsp_asm_x2)
+
+LEAF_MIPS_DSP(comp_func_DestinationOut_dsp_asm_x2)
+/*
+ * a0 - uint *dest
+ * a1 - const uint *src
+ * a2 - int length
+ * a3 - uint const_alpha
+ */
+
+ .set noat
+ addiu sp, -4
+ sw s0, 0(sp)
+ beqz a2, 3f
+ nop
+ li t9, 8388736 /* t9 = 0x800080 (rounding_factor) */
+ li t0, 0xff
+ beq a3, t0, 2f
+ nop
+
+/* part where const_alpha != 255 */
+1:
+ li t5, 0xff
+ subu t8, t5, a3 /* t8 = cia = 255 - const_alpha */
+ replv.ph a3, a3
+11:
+ lw t0, 0(a1) /* t0 = src 1 */
+ lw t1, 4(a1) /* t1 = src 2 */
+ not t0, t0
+ not t1, t1
+ addiu a2, -2
+ srl t0, t0, 24
+ srl t1, t1, 24
+
+ BYTE_MUL_x2 t0, t1, AT, t7, a3, a3, t9, t3, t4, t5, t6, 0
+
+ lw t0, 0(a0) /* t0 = dest 1 */
+ lw t1, 4(a0) /* t1 = dest 2 */
+ addu AT, AT, t8 /* a 1 */
+ addu t7, t7, t8 /* a 2 */
+ replv.ph t2, AT
+ replv.ph t3, t7
+
+ BYTE_MUL_x2 t0, t1, AT, t7, t2, t3, t9, t4, t5, t6, s0
+
+ addiu a1, 8
+ sw AT, 0(a0)
+ sw t7, 4(a0)
+ bnez a2, 11b
+ addiu a0, 8
+ b 3f
+ nop
+
+/* part where const_alpha = 255 */
+2:
+ lw t2, 0(a1) /* src 1 */
+ lw t3, 4(a1) /* src 2 */
+ not t2, t2
+ not t3, t3
+ lw t0, 0(a0) /* dest 1 */
+ lw t1, 4(a0) /* dest 2 */
+ srl t4, t2, 24
+ srl t5, t3, 24
+ replv.ph t2, t4 /* t2 = qAlpha(src 1) */
+ replv.ph t3, t5 /* t3 = qAlpha(src 2) */
+ addiu a2, -2
+
+ BYTE_MUL_x2 t0, t1, t8, AT, t2, t3, t9, t4, t5, t6, t7
+
+ addiu a1, 8
+ sw t8, 0(a0)
+ sw AT, 4(a0)
+ bnez a2, 2b
+ addiu a0, 8
+
+3:
+ lw s0, 0(sp)
+ addiu sp, 4
+ jr ra
+ nop
+ .set at
+
+END(comp_func_DestinationOut_dsp_asm_x2)
+
+LEAF_MIPS_DSP(comp_func_solid_SourceAtop_dsp_asm_x2)
+/*
+ * a0 - uint *dest
+ * a1 - int length
+ * a2 - uint color
+ * a3 - uint sia
+ */
+
+ .set noat
+ addu sp, -4
+ sw s0, 0(sp)
+ beqz a1, 2f
+ nop
+ li t9, 8388736 /* t9 = 0x800080 (rounding_factor) */
+ lui t8, 0xff00
+ ori t8, t8, 0xff00 /* t8 = 0xff00ff00 (andi_factor) */
+1:
+ lw t0, 0(a0) /* t0 = dest 1 */
+ lw t1, 4(a0) /* t1 = dest 2 */
+ addiu a1, -2
+ srl t2, t0, 24 /* t2 = qAlpha(dest 1) */
+ srl t3, t1, 24 /* t3 = qAlpha(dest 2) */
+
+ INTERPOLATE_PIXEL_255 a2, t2, t0, a3, AT, t9, t8, t4, t5, t6, t7
+ INTERPOLATE_PIXEL_255 a2, t3, t1, a3, s0, t9, t8, t4, t5, t6, t7
+
+ sw AT, 0(a0)
+ sw s0, 4(a0)
+ bnez a1, 1b
+ addiu a0, 8
+2:
+ lw s0, 0(sp)
+ addiu sp, 4
+ jr ra
+ nop
+ .set at
+
+END(comp_func_solid_SourceAtop_dsp_asm_x2)
+
+LEAF_MIPS_DSP(comp_func_SourceAtop_dsp_asm_x2)
+/*
+ * a0 - uint *dest
+ * a1 - const uint *src
+ * a2 - int length
+ * a3 - uint const_alpha
+ */
+
+ .set noat
+ addiu sp, -20
+ sw s0, 0(sp)
+ sw s1, 4(sp)
+ sw s2, 8(sp)
+ sw s3, 12(sp)
+ sw s4, 16(sp)
+ beqz a2, 3f
+ nop
+ li t9, 8388736 /* t9 = 0x800080 (rounding_factor) */
+ lui t8, 0xff00
+ li t0, 0xff
+ beq a3, t0, 2f
+ ori t8, t8, 0xff00 /* t8 = 0xff00ff00 (andi_factor) */
+
+/* part where const_alpha != 255 */
+1:
+ replv.ph a3, a3
+11:
+ lw AT, 0(a1) /* src 1 */
+ lw s0, 4(a1) /* src 2 */
+
+ BYTE_MUL_x2 AT, s0, t0, t1, a3, a3, t9, t3, t4, t5, t6, 0
+ /* t0 = s */
+
+ lw t2, 0(a0) /* t2 = dest 1 */
+ lw t3, 4(a0) /* t3 = dest 2 */
+
+ srl t4, t2, 24 /* t4 = qAplpha(dest 1) */
+ srl t5, t3, 24
+ not t6, t0
+ not t7, t1
+ srl t6, t6, 24 /* t6 = qAlpha(~s) */
+ srl t7, t7, 24
+ addiu a2, -2
+
+ INTERPOLATE_PIXEL_255 t0, t4, t2, t6, AT, t9, t8, s1, s2, s3, s4
+ INTERPOLATE_PIXEL_255 t1, t5, t3, t7, s0, t9, t8, s1, s2, s3, s4
+
+ addiu a1, 8
+ sw AT, 0(a0)
+ sw s0, 4(a0)
+ bnez a2, 11b
+ addiu a0, 8
+ b 3f
+ nop
+
+/* part where const_alpha = 255 */
+2:
+ lw t2, 0(a0) /* dest 1 */
+ lw t3, 4(a0) /* dest 2 */
+ lw t0, 0(a1) /* src 1 */
+ lw t1, 4(a1) /* src 2 */
+ srl t4, t2, 24
+ srl t5, t3, 24
+ not t6, t0
+ not t7, t1
+ srl t6, t6, 24
+ srl t7, t7, 24
+ addiu a2, -2
+
+ INTERPOLATE_PIXEL_255 t0, t4, t2, t6, AT, t9, t8, s1, s2, s3, s4
+ INTERPOLATE_PIXEL_255 t1, t5, t3, t7, s0, t9, t8, s1, s2, s3, s4
+
+ addiu a1, 8
+ sw AT, 0(a0)
+ sw s0, 4(a0)
+ bnez a2, 2b
+ addiu a0, 8
+
+3:
+ lw s0, 0(sp)
+ lw s1, 4(sp)
+ lw s2, 8(sp)
+ lw s3, 12(sp)
+ lw s4, 16(sp)
+ addiu sp, 20
+ jr ra
+ nop
+ .set at
+
+END(comp_func_SourceAtop_dsp_asm_x2)
+
+LEAF_MIPS_DSP(comp_func_solid_DestinationAtop_dsp_asm_x2)
+/*
+ * a0 - uint *dest
+ * a1 - int length
+ * a2 - uint color
+ * a3 - uint a
+ */
+
+ .set noat
+ addiu sp, -4
+ sw s0, 0(sp)
+ beqz a1, 2f
+ nop
+ li t9, 8388736 /* t9 = 0x800080 (rounding_factor) */
+ lui t8, 0xff00
+ ori t8, t8, 0xff00 /* t8 = 0xff00ff00 (andi_factor) */
+1:
+ lw t0, 0(a0) /* t0 = dest 1 */
+ lw t1, 4(a0) /* t1 = dest 2 */
+ addiu a1, -2
+ not t2, t0
+ not t3, t1
+ srl t2, t2, 24 /* t2 = qAlpha(~(dest 1)) */
+ srl t3, t3, 24 /* t3 = qAlpha(~(dest 2)) */
+
+ INTERPOLATE_PIXEL_255 t0, a3, a2, t2, AT, t9, t8, t4, t5, t6, t7
+ INTERPOLATE_PIXEL_255 t1, a3, a2, t3, s0, t9, t8, t4, t5, t6, t7
+
+ sw AT, 0(a0)
+ sw s0, 4(a0)
+ bnez a1, 1b
+ addiu a0, 8
+2:
+ lw s0, 0(sp)
+ addiu sp, 4
+ jr ra
+ nop
+ .set at
+
+END(comp_func_solid_DestinationAtop_dsp_asm_x2)
+
+LEAF_MIPS_DSP(comp_func_DestinationAtop_dsp_asm_x2)
+/*
+ * a0 - uint *dest
+ * a1 - const uint *src
+ * a2 - int length
+ * a3 - uint const_alpha
+ */
+
+ .set noat
+ addiu sp, -24
+ sw s0, 0(sp)
+ sw s1, 4(sp)
+ sw s2, 8(sp)
+ sw s3, 12(sp)
+ sw s4, 16(sp)
+ sw s5, 20(sp)
+ beqz a2, 3f
+ nop
+ li t9, 8388736 /* t9 = 0x800080 (rounding_factor) */
+ lui t8, 0xff00
+ li t0, 0xff
+ beq a3, t0, 2f
+ ori t8, t8, 0xff00 /* t8 = 0xff00ff00 (andi_factor) */
+
+/* part where const_alpha != 255 */
+1:
+ li s5, 0xff
+ subu s5, s5, a3 /* s5 = cia = 255 - const_alpha */
+ replv.ph a3, a3
+11:
+ lw AT, 0(a1) /* src 1 */
+ lw s0, 4(a1) /* src 2 */
+
+ BYTE_MUL_x2 AT, s0, t0, t1, a3, a3, t9, t3, t4, t5, t6, 0
+ /* t0 = s */
+
+ lw t2, 0(a0) /* t2 = dest 1 */
+ lw t3, 4(a0) /* t3 = dest 2 */
+
+ not t4, t2
+ not t5, t3
+ srl t4, t4, 24 /* t4 = qAplpha(~(dest 1)) */
+ srl t5, t5, 24
+ srl t6, t0, 24
+ srl t7, t1, 24
+ addu t6, t6, s5 /* t6 = a = qAlpha(s1) + cia */
+ addu t7, t7, s5
+ addiu a2, -2
+
+ INTERPOLATE_PIXEL_255 t2, t6, t0, t4, AT, t9, t8, s1, s2, s3, s4
+ INTERPOLATE_PIXEL_255 t3, t7, t1, t5, s0, t9, t8, s1, s2, s3, s4
+
+ addiu a1, 8
+ sw AT, 0(a0)
+ sw s0, 4(a0)
+ bnez a2, 11b
+ addiu a0, 8
+ b 3f
+ nop
+
+/* part where const_alpha = 255 */
+2:
+ lw t2, 0(a0) /* d1 */
+ lw t3, 4(a0) /* d2 */
+ lw t0, 0(a1) /* s1 */
+ lw t1, 4(a1) /* s2 */
+ srl t4, t0, 24 /* t4 = qAlpha(s1) */
+ srl t5, t1, 24
+ not t6, t2
+ not t7, t3
+ srl t6, t6, 24 /* qAlpha(~d1) */
+ srl t7, t7, 24
+ addiu a2, -2
+
+ INTERPOLATE_PIXEL_255 t2, t4, t0, t6, AT, t9, t8, s1, s2, s3, s4
+ INTERPOLATE_PIXEL_255 t3, t5, t1, t7, s0, t9, t8, s1, s2, s3, s4
+
+ addiu a1, 8
+ sw AT, 0(a0)
+ sw s0, 4(a0)
+ bnez a2, 2b
+ addiu a0, 8
+
+3:
+ lw s0, 0(sp)
+ lw s1, 4(sp)
+ lw s2, 8(sp)
+ lw s3, 12(sp)
+ lw s4, 16(sp)
+ lw s5, 20(sp)
+ addiu sp, 24
+ jr ra
+ nop
+ .set at
+
+END(comp_func_DestinationAtop_dsp_asm_x2)
+
+LEAF_MIPS_DSP(comp_func_solid_XOR_dsp_asm_x2)
+/*
+ * a0 - uint *dest
+ * a1 - int length
+ * a2 - uint color
+ * a3 - uint sia
+ */
+
+ .set noat
+ addu sp, -4
+ sw s0, 0(sp)
+ beqz a1, 2f
+ nop
+ li t9, 8388736 /* t9 = 0x800080 (rounding_factor) */
+ lui t8, 0xff00
+ ori t8, t8, 0xff00 /* t8 = 0xff00ff00 (andi_factor) */
+1:
+ lw t0, 0(a0) /* t0 = dest 1 */
+ lw t1, 4(a0) /* t1 = dest 2 */
+ addiu a1, -2
+ not t2, t0
+ not t3, t1
+ srl t2, t2, 24 /* t2 = qAlpha(~(dest 1)) */
+ srl t3, t3, 24 /* t3 = qAlpha(~(dest 2)) */
+
+ INTERPOLATE_PIXEL_255 a2, t2, t0, a3, AT, t9, t8, t4, t5, t6, t7
+ INTERPOLATE_PIXEL_255 a2, t3, t1, a3, s0, t9, t8, t4, t5, t6, t7
+
+ sw AT, 0(a0)
+ sw s0, 4(a0)
+ bnez a1, 1b
+ addiu a0, 8
+2:
+ lw s0, 0(sp)
+ addu sp, 4
+ jr ra
+ nop
+ .set at
+
+END(comp_func_solid_XOR_dsp_asm_x2)
+
+LEAF_MIPS_DSP(comp_func_XOR_dsp_asm_x2)
+/*
+ * a0 - uint *dest
+ * a1 - const uint *src
+ * a2 - int length
+ * a3 - uint const_alpha
+ */
+
+ .set noat
+ addiu sp, -20
+ sw s0, 0(sp)
+ sw s1, 4(sp)
+ sw s2, 8(sp)
+ sw s3, 12(sp)
+ sw s4, 16(sp)
+ beqz a2, 3f
+ nop
+ li t9, 8388736 /* t9 = 0x800080 (rounding_factor) */
+ lui t8, 0xff00
+ li t0, 0xff
+ beq a3, t0, 2f
+ ori t8, t8, 0xff00 /* t8 = 0xff00ff00 (andi_factor) */
+
+/* part where const_alpha != 255 */
+1:
+ replv.ph a3, a3
+11:
+ lw AT, 0(a1) /* src 1 */
+ lw s0, 4(a1) /* src 2 */
+
+ BYTE_MUL_x2 AT, s0, t0, t1, a3, a3, t9, t3, t4, t5, t6, 0
+ /* t0 = s1 */
+ /* t1 = s2 */
+
+ lw t2, 0(a0) /* t2 = dest 1 */
+ lw t3, 4(a0) /* t3 = dest 2 */
+
+ not t4, t2
+ not t5, t3
+ srl t4, t4, 24 /* t4 = qAplpha(~(dest 1)) */
+ srl t5, t5, 24
+ not t6, t0
+ not t7, t1
+ srl t6, t6, 24 /* t6 = qAlpha(~s) */
+ srl t7, t7, 24
+ addiu a2, -2
+
+ INTERPOLATE_PIXEL_255 t0, t4, t2, t6, AT, t9, t8, s1, s2, s3, s4
+ INTERPOLATE_PIXEL_255 t1, t5, t3, t7, s0, t9, t8, s1, s2, s3, s4
+
+ addiu a1, 8
+ sw AT, 0(a0)
+ sw s0, 4(a0)
+ bnez a2, 11b
+ addiu a0, 8
+ b 3f
+ nop
+
+/* part where const_alpha = 255 */
+2:
+ lw t2, 0(a0) /* d1 */
+ lw t3, 4(a0) /* d2 */
+ lw t0, 0(a1) /* s1 */
+ lw t1, 4(a1) /* s2 */
+ not t4, t0
+ not t5, t1
+ srl t4, t4, 24 /* t4 = qAlpha(~s1) */
+ srl t5, t5, 24
+ not t6, t2
+ not t7, t3
+ srl t6, t6, 24 /* qAlpha(~d1) */
+ srl t7, t7, 24
+ addiu a2, -2
+
+ INTERPOLATE_PIXEL_255 t0, t6, t2, t4, AT, t9, t8, s1, s2, s3, s4
+ INTERPOLATE_PIXEL_255 t1, t7, t3, t5, s0, t9, t8, s1, s2, s3, s4
+
+ addiu a1, 8
+ sw AT, 0(a0)
+ sw s0, 4(a0)
+ bnez a2, 2b
+ addiu a0, 8
+
+3:
+ lw s0, 0(sp)
+ lw s1, 4(sp)
+ lw s2, 8(sp)
+ lw s3, 12(sp)
+ lw s4, 16(sp)
+ addiu sp, 20
+ jr ra
+ nop
+ .set at
+
+END(comp_func_XOR_dsp_asm_x2)
+
+LEAF_MIPS_DSP(comp_func_solid_SourceOut_dsp_asm_x2)
+/*
+ * a0 - uint *dest
+ * a1 - int length
+ * a2 - uint color
+ * a3 - uint const_alpha
+ */
+
+ .set noat
+ addiu sp, -12
+ sw s0, 0(sp)
+ sw s1, 4(sp)
+ sw s2, 8(sp)
+ beqz a1, 3f
+ nop
+ li t9, 8388736 /* t9 = 0x800080 (rounding_factor) */
+ lui t8, 0xff00
+ li t0, 0xff
+ beq a3, t0, 2f
+ ori t8, t8, 0xff00 /* t8 = 0xff00ff00 (andi_factor) */
+
+/* part where const_alpha != 255 */
+1:
+ replv.ph t0, a3
+ li t5, 0xff
+ BYTE_MUL a2, a2, t0, t9, t1, t2, t3, t4 /* a2 = color ( = BYTE_MUL(color, const_alpha)); */
+ subu t1, t5, a3 /* t1 = cia = 255 - const_alpha */
+11:
+ lw t2, 0(a0) /* t2 = d1 */
+ lw s0, 4(a0) /* s0 = d2 */
+ addiu a1, -2
+ not t3, t2
+ not s2, s0
+ srl t3, t3, 24 /* t3 = qAlpha(~d1) */
+ srl s2, s2, 24 /* s2 = qAlpha(~d2) */
+
+ INTERPOLATE_PIXEL_255 a2, t3, t2, t1, AT, t9, t8, t4, t5, t6, t7
+ INTERPOLATE_PIXEL_255 a2, s2, s0, t1, s1, t9, t8, t4, t5, t6, t7
+
+ sw AT, 0(a0)
+ sw s1, 4(a0)
+ bnez a1, 11b
+ addiu a0, 8
+ b 3f
+ nop
+
+/* part where const_alpha = 255 */
+2:
+ lw t0, 0(a0) /* dest 1 */
+ lw t1, 4(a0) /* dest 2 */
+ not t4, t0
+ not t5, t1
+ srl t4, t4, 24
+ srl t5, t5, 24
+ replv.ph t2, t4
+ replv.ph t3, t5
+ addiu a1, -2
+
+ BYTE_MUL_x2 a2, a2, t8, AT, t2, t3, t9, t4, t5, t6, t7
+
+ sw t8, 0(a0)
+ sw AT, 4(a0)
+ bnez a1, 2b
+ addiu a0, 8
+
+3:
+ lw s0, 0(sp)
+ lw s1, 4(sp)
+ lw s2, 8(sp)
+ addiu sp, 12
+ jr ra
+ nop
+ .set at
+
+END(comp_func_solid_SourceOut_dsp_asm_x2)
+
+LEAF_MIPS_DSP(comp_func_SourceOut_dsp_asm_x2)
+/*
+ * a0 - uint *dest
+ * a1 - const uint *src
+ * a2 - int length
+ * a3 - uint const_alpha
+ */
+
+ .set noat
+ addiu sp, -16
+ sw s0, 0(sp)
+ sw s1, 4(sp)
+ sw s2, 8(sp)
+ sw s3, 12(sp)
+ beqz a2, 3f
+ nop
+ li t9, 8388736 /* t9 = 0x800080 (rounding_factor) */
+ lui t8, 0xff00
+ li t0, 0xff
+ beq a3, t0, 2f
+ ori t8, t8, 0xff00 /* t8 = 0xff00ff00 (andi_factor) */
+
+/* part where const_alpha != 255 */
+1:
+ li t5, 0xff
+ subu t7, t5, a3 /* t7 = cia = 255 - const_alpha */
+ replv.ph a3, a3
+11:
+ lw t0, 0(a1) /* t0 = src 1 */
+ lw t1, 4(a1) /* t1 = src 2 */
+ addiu a2, -2
+
+ BYTE_MUL_x2 t0, t1, AT, s0, a3, a3, t9, t3, t4, t5, t6, 0
+
+ lw t0, 0(a0) /* t0 = dest 1 */
+ lw t1, 4(a0) /* t1 = dest 2 */
+ addiu a1, 8
+
+ not t2, t0
+ not t3, t1
+ srl t2, t2, 24 /* t2 = qAlpha(~d1) */
+ srl t3, t3, 24 /* t3 = qAlpha(~d2) */
+
+ INTERPOLATE_PIXEL_255 AT, t2, t0, t7, s1, t9, t8, t4, t5, t6, s3
+ INTERPOLATE_PIXEL_255 s0, t3, t1, t7, s2, t9, t8, t4, t5, t6, s3
+
+ sw s1, 0(a0)
+ sw s2, 4(a0)
+ bnez a2, 11b
+ addiu a0, 8
+ b 3f
+ nop
+
+/* part where const_alpha = 255 */
+2:
+ lw t2, 0(a0) /* dest 1 */
+ lw t3, 4(a0) /* dest 2 */
+ lw t0, 0(a1) /* src 1 */
+ lw t1, 4(a1) /* src 2 */
+ not t4, t2
+ not t5, t3
+ srl t4, t4, 24 /* qAlpha(~d1) */
+ srl t5, t5, 24 /* qAlpha(~d2) */
+ replv.ph t2, t4
+ replv.ph t3, t5
+ addiu a2, -2
+
+ BYTE_MUL_x2 t0, t1, t8, AT, t2, t3, t9, t4, t5, t6, t7
+
+ addiu a1, 8
+ sw t8, 0(a0)
+ sw AT, 4(a0)
+ bnez a2, 2b
+ addiu a0, 8
+
+3:
+ lw s0, 0(sp)
+ lw s1, 4(sp)
+ lw s2, 8(sp)
+ lw s3, 12(sp)
+ addiu sp, 16
+ jr ra
+ nop
+ .set at
+
+END(comp_func_SourceOut_dsp_asm_x2)
+
+LEAF_MIPS_DSP(comp_func_Source_dsp_asm_x2)
+/*
+ * a0 - uint *dest
+ * a1 - const uint *src
+ * a2 - int length
+ * a3 - uint const_alpha
+ */
+
+ .set noat
+ addiu sp, -8
+ sw s0, 0(sp)
+ sw s1, 4(sp)
+ beqz a2, 2f
+ nop
+ li t9, 8388736 /* t9 = 0x800080 (rounding_factor) */
+ lui t8, 0xff00
+ ori t8, t8, 0xff00 /* t8 = 0xff00ff00 (andi_factor) */
+ li t7, 0xff
+ subu t7, t7, a3 /* t7 = ialpha */
+1:
+ lw t0, 0(a0) /* t0 = dest 1 */
+ lw t1, 4(a0) /* t1 = dest 2 */
+ lw t2, 0(a1) /* t2 = src 1 */
+ lw t3, 4(a1) /* t3 = src 2 */
+ addiu a2, -2
+ addiu a1, 8
+
+ INTERPOLATE_PIXEL_255 t2, a3, t0, t7, AT, t9, t8, t4, t5, t6, s1
+ INTERPOLATE_PIXEL_255 t3, a3, t1, t7, s0, t9, t8, t4, t5, t6, s1
+
+ sw AT, 0(a0)
+ sw s0, 4(a0)
+ bnez a2, 1b
+ addiu a0, 8
+2:
+ lw s0, 0(sp)
+ lw s1, 4(sp)
+ addiu sp, 8
+ jr ra
+ nop
+ .set at
+
+END(comp_func_Source_dsp_asm_x2)
+
+LEAF_MIPS_DSP(qt_blend_argb32_on_argb32_mips_dsp_asm_x2)
+/*
+ * a0 - uint *dest
+ * a1 - const uint *src
+ * a2 - int length
+ * a3 - uint const_alpha
+ */
+
+ .set noat
+ addiu sp, -12
+ sw s0, 0(sp)
+ sw s1, 4(sp)
+ sw s2, 8(sp)
+ beqz a2, 2f
+ nop
+ replv.ph a3, a3
+ li t9, 8388736 /* t9 = 0x800080 (rounding_factor) */
+
+1:
+ lw t0, 0(a1) /* t0 = src 1 */
+ lw t1, 4(a1) /* t1 = src 2 */
+ addiu a2, -2
+
+ BYTE_MUL_x2 t0, t1, AT, t7, a3, a3, t9, t3, t4, t5, t6, 0
+
+ lw t0, 0(a0) /* t0 = dest 1 */
+ lw t1, 4(a0) /* t1 = dest 2 */
+ not s1, AT
+ not s2, t7
+ srl s1, s1, 24 /* s1 = qAlpha(~s1) */
+ srl s2, s2, 24 /* s2 = qAlpha(~s2) */
+ replv.ph s1, s1
+ replv.ph s2, s2
+
+ BYTE_MUL_x2 t0, t1, t2, t3, s1, s2, t9, t4, t5, t6, s0
+
+ addiu a1, 8
+ addu AT, AT, t2
+ addu t7, t7, t3
+ sw AT, 0(a0)
+ sw t7, 4(a0)
+ bnez a2, 1b
+ addiu a0, 8
+
+2:
+ lw s0, 0(sp)
+ lw s1, 4(sp)
+ lw s2, 8(sp)
+ addiu sp, 12
+ jr ra
+ nop
+ .set at
+
+END(qt_blend_argb32_on_argb32_mips_dsp_asm_x2)
+
+LEAF_MIPS_DSP(qt_blend_argb32_on_argb32_const_alpha_256_mips_dsp_asm)
+/*
+ * a0 - uint *dest
+ * a1 - const uint *src
+ * a2 - int length
+ */
+
+ beqz a2, 5f
+ nop
+ li t7, 8388736 /* t7 = 0x800080 */
+ b 2f
+ nop
+1:
+ addiu a0, a0, 4
+ addiu a2, a2, -1
+ beqz a2, 5f
+ nop
+2:
+ lw t0, 0(a1) /* t0 = s = src[i] */
+ addiu a1, a1, 4
+ nor t1, t0, zero
+ srl t1, t1, 24 /* t1 = ~qAlpha(s) */
+ bnez t1, 3f
+ nop
+ sw t0, 0(a0) /* dst[i] = src[i] */
+ addiu a2, a2, -1
+ bnez a2, 2b
+ addiu a0, a0, 4
+ b 5f
+ nop
+3:
+ beqz t0, 1b
+ replv.ph t6, t1 /* | 0 | qAlpha(~s) | 0 | qAlpha(~s) | */
+
+ lw t4, 0(a0)
+ addiu a2, a2, -1
+ beqz t4, 31f
+ move t8, zero
+
+ BYTE_MUL t4, t8, t6, t7, t1, t2, t3, t4
+31:
+ addu t8, t0, t8 /* dst[i] =
+ * s + BYTE_MUL(dst[i],~qAlpha(s)) */
+ sw t8, 0(a0)
+ bnez a2, 2b
+ addiu a0, a0, 4
+ b 5f
+ nop
+5:
+ jr ra
+ nop
+
+END(qt_blend_argb32_on_argb32_const_alpha_256_mips_dsp_asm)
diff --git a/src/gui/painting/qdrawhelper_mips_dsp_p.h b/src/gui/painting/qdrawhelper_mips_dsp_p.h
index ed848485fc..818b27ce21 100644
--- a/src/gui/painting/qdrawhelper_mips_dsp_p.h
+++ b/src/gui/painting/qdrawhelper_mips_dsp_p.h
@@ -52,6 +52,48 @@ extern "C" void qt_memfill32_asm_mips_dsp(quint32 *dest, quint32 value, int coun
extern "C" void comp_func_SourceOver_asm_mips_dsp(uint *dest, const uint *src, int length, uint const_alpha);
+extern "C" void comp_func_solid_DestinationOver_dsp_asm_x2(uint *dest, int length, uint color);
+
+extern "C" void comp_func_solid_Source_dsp_asm_x2(uint *dest, int length, uint color, uint const_alpha);
+
+extern "C" void comp_func_DestinationOver_dsp_asm_x2(uint *dest, const uint *src, int length, uint const_alpha);
+
+extern "C" void comp_func_solid_SourceIn_dsp_asm_x2(uint *dest, int length, uint color, uint const_alpha);
+
+extern "C" void comp_func_SourceIn_dsp_asm_x2(uint *dest, const uint *src, int length, uint const_alpha);
+
+extern "C" void comp_func_solid_DestinationIn_dsp_asm_x2(uint *dest, int length, uint a);
+
+extern "C" void comp_func_DestinationIn_dsp_asm_x2(uint *dest, const uint *src, int length, uint const_alpha);
+
+extern "C" void comp_func_DestinationOut_dsp_asm_x2(uint *dest, const uint *src, int length, uint const_alpha);
+
+extern "C" void comp_func_solid_SourceAtop_dsp_asm_x2(uint *dest, int length, uint color, uint const_alpha);
+
+extern "C" void comp_func_SourceAtop_dsp_asm_x2(uint *dest, const uint *src, int length, uint const_alpha);
+
+extern "C" void comp_func_solid_DestinationAtop_dsp_asm_x2(uint *dest, int length, uint color, uint const_alpha);
+
+extern "C" void comp_func_DestinationAtop_dsp_asm_x2(uint *dest, const uint *src, int length, uint const_alpha);
+
+extern "C" void comp_func_solid_XOR_dsp_asm_x2(uint *dest, int length, uint color, uint const_alpha);
+
+extern "C" void comp_func_XOR_dsp_asm_x2(uint *dest, const uint *src, int length, uint const_alpha);
+
+extern "C" void comp_func_solid_SourceOut_dsp_asm_x2(uint *dest, int length, uint color, uint const_alpha);
+
+extern "C" void comp_func_SourceOut_dsp_asm_x2(uint *dest, const uint *src, int length, uint const_alpha);
+
+extern "C" void comp_func_Source_dsp_asm_x2(uint *dest, const uint *src, int length, uint const_alpha);
+
+extern "C" void qt_blend_argb32_on_argb32_mips_dsp_asm_x2(uint *dest, const uint *src, int length, uint const_alpha);
+
+extern "C" void qt_blend_argb32_on_argb32_const_alpha_256_mips_dsp_asm(uint *dest, const uint *src, int length);
+
+extern "C" uint * destfetchARGB32_asm_mips_dsp(uint *buffer, const uint *data, int length);
+
+extern "C" uint * qt_destStoreARGB32_asm_mips_dsp(uint *buffer, const uint *data, int length);
+
void qt_blend_argb32_on_argb32_mips_dsp(uchar *destPixels, int dbpl,
const uchar *srcPixels, int sbpl,
int w, int h,
@@ -71,6 +113,46 @@ uint * QT_FASTCALL qt_destFetchARGB32_mips_dsp(uint *buffer,
void QT_FASTCALL qt_destStoreARGB32_mips_dsp(QRasterBuffer *rasterBuffer, int x, int y,
const uint *buffer, int length);
+void QT_FASTCALL comp_func_solid_Source_mips_dsp(uint *dest, int length, uint color, uint const_alpha);
+
+void QT_FASTCALL comp_func_solid_SourceOver_mips_dsp(uint *dest, int length, uint color, uint const_alpha);
+
+void QT_FASTCALL comp_func_solid_DestinationOver_mips_dsp(uint *dest, int length, uint color, uint const_alpha);
+
+void QT_FASTCALL comp_func_solid_SourceOver_mips_dsp(uint *dest, int length, uint color, uint const_alpha);
+
+void QT_FASTCALL comp_func_solid_DestinationOver_mips_dsp(uint *dest, int length, uint color, uint const_alpha);
+
+void QT_FASTCALL comp_func_DestinationOver_mips_dsp(uint *dest, const uint *src, int length, uint const_alpha);
+
+void QT_FASTCALL comp_func_solid_SourceIn_mips_dsp(uint *dest, int length, uint color, uint const_alpha);
+
+void QT_FASTCALL comp_func_SourceIn_mips_dsp(uint *dest, const uint *src, int length, uint const_alpha);
+
+void QT_FASTCALL comp_func_solid_DestinationIn_mips_dsp(uint *dest, int length, uint color, uint const_alpha);
+
+void QT_FASTCALL comp_func_DestinationIn_mips_dsp(uint *dest, const uint *src, int length, uint const_alpha);
+
+void QT_FASTCALL comp_func_solid_DestinationOut_mips_dsp(uint *dest, int length, uint color, uint const_alpha);
+
+void QT_FASTCALL comp_func_DestinationOut_mips_dsp(uint *dest, const uint *src, int length, uint const_alpha);
+
+void QT_FASTCALL comp_func_solid_SourceAtop_mips_dsp(uint *dest, int length, uint color, uint const_alpha);
+
+void QT_FASTCALL comp_func_SourceAtop_mips_dsp(uint *dest, const uint *src, int length, uint const_alpha);
+
+void QT_FASTCALL comp_func_solid_DestinationAtop_mips_dsp(uint *dest, int length, uint color, uint const_alpha);
+
+void QT_FASTCALL comp_func_DestinationAtop_mips_dsp(uint *dest, const uint *src, int length, uint const_alpha);
+
+void QT_FASTCALL comp_func_solid_XOR_mips_dsp(uint *dest, int length, uint color, uint const_alpha);
+
+void QT_FASTCALL comp_func_solid_SourceOut_mips_dsp(uint *dest, int length, uint color, uint const_alpha);
+
+void QT_FASTCALL comp_func_SourceOut_mips_dsp(uint *dest, const uint *src, int length, uint const_alpha);
+
+void QT_FASTCALL comp_func_XOR_mips_dsp(uint *dest, const uint *src, int length, uint const_alpha);
+
#endif // QT_COMPILER_SUPPORTS_MIPS_DSP
diff --git a/src/gui/painting/qdrawhelper_mips_dspr2_asm.S b/src/gui/painting/qdrawhelper_mips_dspr2_asm.S
index 213fcf8a08..7e95410a7f 100644
--- a/src/gui/painting/qdrawhelper_mips_dspr2_asm.S
+++ b/src/gui/painting/qdrawhelper_mips_dspr2_asm.S
@@ -41,59 +41,6 @@
#include "qt_mips_asm_dsp.h"
-LEAF_MIPS_DSPR2(INTERPOLATE_PIXEL_255_asm_mips_dspr2)
-/*
- * a0 - uint x (First value to multiply)
- * a1 - uint a (Multiplicator byte for first value)
- * a2 - uint y (Second value to multiply)
- * a3 - uint b (Multiplicator byte for second value)
- */
-
- .set reorder
- replv.ph a1, a1
- replv.ph a3, a3
- li t8, 8388736
- muleu_s.ph.qbl t0, a0, a1
- muleu_s.ph.qbl t1, a2, a3
- muleu_s.ph.qbr t2, a0, a1
- muleu_s.ph.qbr t3, a2, a3
- addu.ph t4, t0, t1
- addu.ph t5, t2, t3
- preceu.ph.qbla t0, t4
- addu t1, t0, t8
- addu t1, t4, t1
- preceu.ph.qbla t6, t5
- addu t7, t6, t8
- addu t7, t5, t7
- precrq.qb.ph t2, t1, t7
- move v0, t2
- j ra
-
-END(INTERPOLATE_PIXEL_255_asm_mips_dspr2)
-
-LEAF_MIPS_DSPR2(BYTE_MUL_asm_mips_dspr2)
-/*
- * a0 - uint x (Value to multiply)
- * a1 - uint a (Multiplicator byte)
- */
-
- .set reorder
- replv.ph a1, a1 /* a1 = 0x00a00a */
- li t4, 8388736 /* t4 = 0x800080 */
- muleu_s.ph.qbl t0, a0, a1
- muleu_s.ph.qbr t2, a0, a1
- preceu.ph.qbla t1, t0
- addu t0, t0, t1
- addu t0, t0, t4
- preceu.ph.qbla t3, t2
- addu t2, t2, t3
- addu t2, t2, t4
- precrq.qb.ph t4, t0, t2
- move v0, t4
- j ra
-
-END(BYTE_MUL_asm_mips_dspr2)
-
LEAF_MIPS_DSPR2(qConvertRgb16To32_asm_mips_dspr2)
/*
* a0 - dst (a8r8g8b8)
diff --git a/src/gui/painting/qt_mips_asm_dsp.h b/src/gui/painting/qt_mips_asm_dsp.h
index bcde7068a2..088831f6e4 100644
--- a/src/gui/painting/qt_mips_asm_dsp.h
+++ b/src/gui/painting/qt_mips_asm_dsp.h
@@ -110,4 +110,140 @@ LEAF_MIPS32R2(symbol) \
.end function; \
.size function,.-function
+/*
+ * BYTE_MUL operation on two pixels (in_1 and in_2) with two
+ * multiplicator bytes, repl_a1 and repl_a2, which should be
+ * prepered with:
+ * replv.ph repl_a1, a1
+ * replv.ph repl_a2, a2
+ * to became such as:
+ * repl_a1 = | 00 | a1 | 00 | a1 |
+ * repl_a2 = | 00 | a2 | 00 | a2 |
+ *
+ * rounding_factor must have following value:
+ * li rounding_factor, 0x00800080
+ *
+ * scratch(n) - temporary registers
+ *
+ * in_const: 1 -> (default) causes that in_1, in_2
+ * registers will remain unchanged after usage
+ * 0 -> (or anything different then 1) causes
+ * that registers repl_a1, repl_a2 remain
+ * unchanged after usage
+ */
+.macro BYTE_MUL_x2 in_1, in_2, out_1, out_2 \
+ repl_a1, repl_a2, rounding_factor, \
+ scratch1, scratch2, scratch3, scratch4, \
+ in_const = 1
+ muleu_s.ph.qbl \scratch1, \in_1, \repl_a1
+ muleu_s.ph.qbr \scratch2, \in_1, \repl_a1
+ muleu_s.ph.qbl \scratch3, \in_2, \repl_a2
+ muleu_s.ph.qbr \scratch4, \in_2, \repl_a2
+
+.if \in_const == 1
+ preceu.ph.qbla \repl_a1, \scratch1
+ preceu.ph.qbla \repl_a2, \scratch2
+ preceu.ph.qbla \out_1, \scratch3
+ preceu.ph.qbla \out_2, \scratch4
+
+ addu \scratch1, \repl_a1, \scratch1
+ addu \scratch2, \repl_a2, \scratch2
+.else
+ preceu.ph.qbla \in_1, \scratch1
+ preceu.ph.qbla \in_2, \scratch2
+ preceu.ph.qbla \out_1, \scratch3
+ preceu.ph.qbla \out_2, \scratch4
+
+ addu \scratch1, \in_1, \scratch1
+ addu \scratch2, \in_2, \scratch2
+.endif
+
+ addu \out_1, \out_1, \scratch3
+ addu \out_2, \out_2, \scratch4
+
+ addu \scratch1, \scratch1, \rounding_factor
+ addu \scratch2, \scratch2, \rounding_factor
+ addu \scratch3, \out_1, \rounding_factor
+ addu \scratch4, \out_2, \rounding_factor
+
+ precrq.qb.ph \out_1, \scratch1, \scratch2
+ precrq.qb.ph \out_2, \scratch3, \scratch4
+
+.endm
+
+/*
+ * BYTE_MUL operation on one pixel (in_1) with
+ * multiplicator byte, repl_a1, which should be
+ * prepered with:
+ * replv.ph repl_a1, a1
+ * to became such as:
+ * repl_a1 = | 00 | a1 | 00 | a1 |
+ *
+ * rounding_factor must have following value:
+ * li rounding_factor, 0x00800080
+ *
+ * scratch(n) - temporary registers
+ */
+.macro BYTE_MUL in_1, out_1, \
+ repl_a1, rounding_factor, \
+ scratch1, scratch2, scratch3, scratch4
+ muleu_s.ph.qbl \scratch1, \in_1, \repl_a1
+ muleu_s.ph.qbr \scratch2, \in_1, \repl_a1
+
+ preceu.ph.qbla \scratch3, \scratch1
+ preceu.ph.qbla \scratch4, \scratch2
+
+ addu \scratch1, \scratch1, \scratch3
+ addu \scratch1, \scratch1, \rounding_factor
+
+ addu \scratch2, \scratch2, \scratch4
+ addu \scratch2, \scratch2, \rounding_factor
+
+ precrq.qb.ph \out_1, \scratch1, \scratch2
+
+.endm
+
+/*
+ * macro for INTERPOLATE_PIXEL_255 operation
+ * in_1 - First value to multiply
+ * mul_1 - Multiplicator byte for first value
+ * in_2 - Second value to multiply
+ * mul_2 - Multiplicator byte for second value
+ * rounding_factor and andi_factor should be prepared
+ * as:
+ * li rounding_factor, 0x00800080
+ * li andi_factor, 0xff00ff00
+ * scratch(n) - temporary registers
+ */
+.macro INTERPOLATE_PIXEL_255 in_1, mul_1, \
+ in_2, mul_2, \
+ out_1, \
+ rounding_factor, andi_factor \
+ scratch1, scratch2, scratch3, scratch4
+# x part
+ preceu.ph.qbra \scratch1, \in_1
+ preceu.ph.qbra \scratch2, \in_2
+ mul \scratch1, \scratch1, \mul_1
+ mul \scratch2, \scratch2, \mul_2
+# x>>8 part
+ preceu.ph.qbla \scratch3, \in_1
+ preceu.ph.qbla \scratch4, \in_2
+ mul \scratch3, \scratch3, \mul_1
+ mul \scratch4, \scratch4, \mul_2
+# x part
+ addu \scratch1, \scratch1, \scratch2
+ preceu.ph.qbla \scratch2, \scratch1
+ addu \scratch1, \scratch1, \scratch2
+ addu \scratch1, \scratch1, \rounding_factor
+ preceu.ph.qbla \scratch1, \scratch1
+# x>>8 part
+ addu \scratch3, \scratch3, \scratch4
+ preceu.ph.qbla \scratch4, \scratch3
+ addu \scratch3, \scratch3, \scratch4
+ addu \scratch3, \scratch3, \rounding_factor
+ and \scratch3, \scratch3, \andi_factor
+
+ or \out_1, \scratch1, \scratch3
+.endm
+
#endif //QT_MIPS_DSP_H__