From f10356ead13e39c9501b84ee5e92efe722a2d2c7 Mon Sep 17 00:00:00 2001 From: lpapuga Date: Wed, 20 Nov 2013 17:09:57 +0100 Subject: MIPS DSP build system fix and additional optimizations. Changed MIPS DSP portion of the mkspecs/features/simd.prf file in order to fix the corrupted build system for MIPS platforms. List of the additionally optimized functions from file src/gui/painting/qdrawhelper.cpp: - qt_blend_rgb16_on_rgb16 - qt_fetchUntransformed_888 - qt_fetchUntransformed_444 - qt_fetchUntransformed_argb8565 from file src/gui/image/qimage.cpp: - convert_ARGB_to_ARGB_PM_inplace from file src/corelib/qstring.cpp: - ucstrncmp - toLatin1_helper - fromLatin1_helper Change-Id: I5c47a69784917eee29a8dbd2718828a390b27c93 Reviewed-by: Thiago Macieira --- src/corelib/corelib.pro | 14 + src/corelib/tools/qstring.cpp | 35 +++ src/corelib/tools/qstring_mips_dsp_asm.S | 449 +++++++++++++++++++++++++++++++ src/corelib/tools/tools.pri | 4 + 4 files changed, 502 insertions(+) create mode 100644 src/corelib/tools/qstring_mips_dsp_asm.S (limited to 'src/corelib') diff --git a/src/corelib/corelib.pro b/src/corelib/corelib.pro index b513149e7c..df28183fdc 100644 --- a/src/corelib/corelib.pro +++ b/src/corelib/corelib.pro @@ -111,3 +111,17 @@ ctest_qt5_module_files.files += $$ctest_macros_file.output $$cmake_extras_mkspec ctest_qt5_module_files.path = $$[QT_INSTALL_LIBS]/cmake/Qt5Core INSTALLS += ctest_qt5_module_files cmake_qt5_umbrella_module_files + +mips_dsp:*-g++* { + HEADERS += $$MIPS_DSP_HEADERS + + mips_dsp_corelib_assembler.commands = $$QMAKE_CXX -c + mips_dsp_corelib_assembler.commands += $(CXXFLAGS) $(INCPATH) -mips32r2 -mdsp ${QMAKE_FILE_IN} -o ${QMAKE_FILE_OUT} + mips_dsp_corelib_assembler.dependency_type = TYPE_C + mips_dsp_corelib_assembler.output = ${QMAKE_VAR_OBJECTS_DIR}${QMAKE_FILE_BASE}$${first(QMAKE_EXT_OBJ)} + mips_dsp_corelib_assembler.input = MIPS_DSP_ASM + mips_dsp_corelib_assembler.variable_out = OBJECTS + mips_dsp_corelib_assembler.name = assembling[mips_dsp] ${QMAKE_FILE_IN} + silent:mips_dsp_corelib_assembler.commands = @echo assembling[mips_dsp] ${QMAKE_FILE_IN} && $$mips_dsp_corelib_assembler.commands + QMAKE_EXTRA_COMPILERS += mips_dsp_corelib_assembler +} diff --git a/src/corelib/tools/qstring.cpp b/src/corelib/tools/qstring.cpp index 3976f2cb6f..d682207314 100644 --- a/src/corelib/tools/qstring.cpp +++ b/src/corelib/tools/qstring.cpp @@ -188,9 +188,23 @@ static int ucstricmp(const ushort *a, const ushort *ae, const uchar *b, const uc return 1; } +#if defined(__mips_dsp) +// From qstring_mips_dsp_asm.S +extern "C" int qt_ucstrncmp_mips_dsp_asm(const ushort *a, + const ushort *b, + unsigned len); +#endif + // Unicode case-sensitive compare two same-sized strings static int ucstrncmp(const QChar *a, const QChar *b, int l) { +#if defined(__mips_dsp) + if (l >= 8) { + return qt_ucstrncmp_mips_dsp_asm(reinterpret_cast(a), + reinterpret_cast(b), + l); + } +#endif // __mips_dsp while (l-- && *a == *b) a++,b++; if (l==-1) @@ -3937,6 +3951,10 @@ static inline __m128i mergeQuestionMarks(__m128i chunk) } #endif +#if defined(__mips_dsp) +extern "C" void qt_toLatin1_mips_dsp_asm(uchar *dst, const ushort *src, int length); +#endif + static QByteArray toLatin1_helper(const QChar *data, int length) { QByteArray ba; @@ -3989,10 +4007,14 @@ static QByteArray toLatin1_helper(const QChar *data, int length) length = length % 8; } #endif +#if defined(__mips_dsp) + qt_toLatin1_mips_dsp_asm(dst, src, length); +#else while (length--) { *dst++ = (*src>0xff) ? '?' : (uchar) *src; ++src; } +#endif } return ba; } @@ -4104,6 +4126,12 @@ QVector QString::toUcs4() const return v; } +#if defined(__mips_dsp) +// From qstring_mips_dsp_asm.S +extern "C" void qt_fromlatin1_mips_asm_unroll4 (ushort*, const char*, uint); +extern "C" void qt_fromlatin1_mips_asm_unroll8 (ushort*, const char*, uint); +#endif + QString::Data *QString::fromLatin1_helper(const char *str, int size) { Data *d; @@ -4144,8 +4172,15 @@ QString::Data *QString::fromLatin1_helper(const char *str, int size) size = size % 16; } #endif +#if defined(__mips_dsp) + if (size > 20) + qt_fromlatin1_mips_asm_unroll8(dst, str, size); + else + qt_fromlatin1_mips_asm_unroll4(dst, str, size); +#else while (size--) *dst++ = (uchar)*str++; +#endif } return d; } diff --git a/src/corelib/tools/qstring_mips_dsp_asm.S b/src/corelib/tools/qstring_mips_dsp_asm.S new file mode 100644 index 0000000000..aee162c290 --- /dev/null +++ b/src/corelib/tools/qstring_mips_dsp_asm.S @@ -0,0 +1,449 @@ +/**************************************************************************** +** +** Copyright (C) 2013 Imagination Technologies Limited, www.imgtec.com +** Contact: http://www.qt-project.org/legal +** +** This file is part of the QtGui module of the Qt Toolkit. +** +** $QT_BEGIN_LICENSE:LGPL$ +** Commercial License Usage +** Licensees holding valid commercial Qt licenses may use this file in +** accordance with the commercial license agreement provided with the +** Software or, alternatively, in accordance with the terms contained in +** a written agreement between you and Digia. For licensing terms and +** conditions see http://qt.digia.com/licensing. For further information +** use the contact form at http://qt.digia.com/contact-us. +** +** GNU Lesser General Public License Usage +** Alternatively, this file may be used under the terms of the GNU Lesser +** General Public License version 2.1 as published by the Free Software +** Foundation and appearing in the file LICENSE.LGPL included in the +** packaging of this file. Please review the following information to +** ensure the GNU Lesser General Public License version 2.1 requirements +** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html. +** +** In addition, as a special exception, Digia gives you certain additional +** rights. These rights are described in the Digia Qt LGPL Exception +** version 1.1, included in the file LGPL_EXCEPTION.txt in this package. +** +** GNU General Public License Usage +** Alternatively, this file may be used under the terms of the GNU +** General Public License version 3.0 as published by the Free Software +** Foundation and appearing in the file LICENSE.GPL included in the +** packaging of this file. Please review the following information to +** ensure the GNU General Public License version 3.0 requirements will be +** met: http://www.gnu.org/copyleft/gpl.html. +** +** +** $QT_END_LICENSE$ +** +****************************************************************************/ + +#include "../../gui/painting/qt_mips_asm_dsp_p.h" + +.macro EXTRACT_HALVES_x2 src1, src2, hi_1, hi_2 +#if defined(__MIPSEL) && __MIPSEL + srl \hi_1, \src1, 16 + ext \src1, \src1, 0, 16 + srl \hi_2, \src2, 16 + ext \src2, \src2, 0, 16 +#else + ext \hi_1, \src1, 0, 16 + srl \src1, \src1, 16 + ext \hi_2, \src2, 0, 16 + srl \src2, \src2, 16 +#endif +.endm + + +LEAF_MIPS_DSP(qt_ucstrncmp_mips_dsp_asm) +/* + * Arguments: + * a0 - string_a (uint16_t*) + * a1 - string_b (uint16_t*) + * a2 - length (uint32_t) + * + * Register usage: + * t0 - batches + */ + + move v0, zero /* result = 0 */ + andi t0, a0, 0x3 /* t1 = string_a % 4 */ + andi t1, a1, 0x3 /* t0 = string_b % 4 */ + or t2, t0, t1 /* t2 = t0 | t1 */ + + beqz t2, 5f /* both aligned */ + and t2, t0 ,t1 + beqz t2, 6f /* one aligned */ + nop + + /* + * Both strings are unaligned: read 1 halfword from each, + * then fall-off to continue with the both-aligned case. + */ + lhu t0, 0 (a0) + lhu t1, 0 (a1) + addiu a2, a2, -1 /* len-- */ + sub v0, t0, t1 /* v0 = t0-t1 */ + addiu a0, a0, 2 /* string_a++ */ + bnez v0, 0f /* if (t0-t1): return */ + addiu a1, a1, 2 /* string_b++ */ + beqz a2, 0f /* if !len: return */ + /* next instruction (srl) fills delay branch slot */ + +5: /* Both string pointers are aligned */ + srl t0, a2, 3 /* batches = length / 8 */ + beqz t0, 9f /* if !batches: tail */ + andi a2, a2, 0x7 /* length = length % 8 */ + + SAVE_REGS_ON_STACK 0, s0, s1, s2, s3 + +1: lw t1, 0 (a0) /* [a0 a1] */ + lw t3, 4 (a0) /* [a2 a3] */ + lw t5, 8 (a0) /* [a4 a5] */ + lw t7, 12 (a0) /* [a6 a7] */ + + lw t2, 0 (a1) /* [b0 b1] */ + lw t4, 4 (a1) /* [b2 b3] */ + lw t6, 8 (a1) /* [b4 b5] */ + lw t8, 12 (a1) /* [b6 b7] */ + + /* + * Subtract elements one by one, if the result is zero + * both halves of the registers (shorts) are equal. + */ + subq.ph s0, t1, t2 /* [a0-b0 a1-b1] */ + subq.ph s1, t3, t4 /* [a2-b2 a3-b3] */ + + bnez s0, 1f + subq.ph s2, t5, t6 /* [a4-b4 a5-b5] */ + bnez s1, 2f + subq.ph s3, t7, t8 /* [a6-b6 a7-b7] */ + bnez s2, 3f + addiu t0, t0, -1 /* batches-- */ + bnez s3, 4f + addiu a0, a0, 8*2 /* string_a += 8 */ + + bnez t0, 1b /* if batches: loop */ + addiu a1, a1, 8*2 /* string_b += 8 */ + + RESTORE_REGS_FROM_STACK 0, s0, s1, s2, s3 + + bnez a2, 9f /* if length: tail */ + nop + jr ra + nop + + + 1: /* Check t1 [a0 a1] vs. t2 [b0 b1] */ + EXTRACT_HALVES_x2 t1, t2, t3, t4 /* a0, b0, a1, b1 */ + sub v0, t1, t2 + RESTORE_REGS_FROM_STACK 0, s0, s1, s2, s3 + bnez v0, 0f /* if (a0-b0): return */ + nop + jr ra + sub v0, t3, t4 /* return a1-b1 */ + + 2: /* Check t3 [a2 a3] vs. t4 [b2 b3] */ + EXTRACT_HALVES_x2 t3, t4, t1, t2 /* a2, b2, a3, b3 */ + sub v0, t3, t4 + RESTORE_REGS_FROM_STACK 0, s0, s1, s2, s3 + bnez v0, 0f /* if (a2-b2): return */ + nop + jr ra + sub v0, t1, t2 /* return a3-b3 */ + + 3: /* Check t5 [a4 a5] vs. t6 [b4 b5] */ + EXTRACT_HALVES_x2 t5, t6, t1, t2 /* a4, b4, a5, b5 */ + sub v0, t5, t6 + RESTORE_REGS_FROM_STACK 0, s0, s1, s2, s3 + bnez v0, 0f /* if (a4-b4): return */ + nop + jr ra + sub v0, t1, t2 /* return a5-b5 */ + + 4: /* Check t7 [a6 a7] vs. t8 [b6 b7] */ + EXTRACT_HALVES_x2 t7, t8, t1, t2 /* a6, b6, a7, b7 */ + sub v0, t7, t8 + RESTORE_REGS_FROM_STACK 0, s0, s1, s2, s3 + bnez v0, 0f /* if (a6-b6): return */ + nop + jr ra + sub v0, t1, t2 + + + /* Process remaining tail items. */ +9: lhu t1, 0 (a0) /* a */ + lhu t2, 0 (a1) /* b */ + addiu a2, a2, -1 /* length-- */ + sub v0, t1, t2 /* result = (a - b) */ + + bnez v0, 0f /* if (a - b): return */ + addiu a0, a0, 2 /* string_a++ */ + + bnez a2, 9b /* if length: loop */ + addiu a1, a1, 2 /* string_b++ */ + +0: jr ra + nop + + + /* One of the inputs is unaligned, do unrolled half-word loads */ +6: srl t0, a2, 3 /* batches = length / 8 */ + andi a2, a2, 0x7 /* length = length % 8 */ + +1: lhu t1, 0 (a0) + lhu t2, 0 (a1) + lhu t3, 2 (a0) + lhu t4, 2 (a1) + lhu t5, 4 (a0) + lhu t6, 4 (a1) + lhu t7, 6 (a0) + lhu t8, 6 (a1) + + sub v0, t1, t2 + sub t1, t3, t4 + + bnez v0, 0f + sub t2, t5, t6 + bnez t1, 2f + sub t3, t7, t8 + bnez t2, 3f + lhu t1, 8 (a0) + bnez t3, 4f + lhu t2, 8 (a1) + + lhu t3, 10 (a0) + lhu t4, 10 (a1) + lhu t5, 12 (a0) + lhu t6, 12 (a1) + lhu t7, 14 (a0) + lhu t8, 14 (a1) + + sub v0, t1, t2 + sub t1, t3, t4 + + bnez v0, 0f + sub t2, t5, t6 + bnez t1, 2f + sub t3, t7, t8 + bnez t2, 3f + addiu t0, t0, -1 /* batches-- */ + bnez t3, 4f + addiu a0, a0, 8*2 /* string_a += 8 */ + + bnez t0, 1b + addiu a1, a1, 8*2 /* string_b += 8 */ + + bnez a2, 9b /* if length: tail */ + nop + +0: jr ra + nop +2: jr ra + move v0, t1 +3: jr ra + move v0, t2 +4: jr ra + move v0, t3 + +END(qt_ucstrncmp_mips_dsp_asm) + + +#if defined(__mips_dspr2) +LEAF_MIPS_DSPR2(qt_fromlatin1_mips_asm_unroll8) +#else +LEAF_MIPS_DSP(qt_fromlatin1_mips_asm_unroll8) +#endif + + andi t0, a1, 0x3 + beqz t0, 9f /* check that src is aligned */ + nop + +1: lbu t1, 0 (a1) + addiu a1, a1, 1 + addiu a2, a2, -1 + sh t1, 0 (a0) + beqz a2, 0f + andi t0, a1, 0x3 + bnez t0, 1b + addiu a0, a0, 2 + +9: /* source pointer is aligned: do batches of 8 elements */ + andi t0, a0, 3 /* check if dst is aligned */ + bnez t0, 6f + srl t0, a2, 3 /* batches = len / 8 */ + andi a2, a2, 0x7 /* tail = len % 8 */ + + beqz t0, 8f /* if !batches: tail */ + nop + +1: lw t1, 0 (a1) + lw t2, 4 (a1) + + addiu a1, a1, 8*1 + addiu t0, t0, -1 + + preceu.ph.qbl t3, t1 + preceu.ph.qbr t1, t1 + preceu.ph.qbl t4, t2 + preceu.ph.qbr t2, t2 + +#if defined(__MIPSEL) && __MIPSEL + sw t1, 0 (a0) + sw t3, 4 (a0) + sw t2, 8 (a0) + sw t4, 12 (a0) +#else + sw t3, 0 (a0) + sw t1, 4 (a0) + sw t4, 8 (a0) + sw t2, 12 (a0) +#endif + + bnez t0, 1b + addiu a0, a0, 8*2 + +8: /* process tail items */ + beqz a2, 0f + nop + +1: lbu t1, 0 (a1) + addiu a2, a2, -1 + sh t1, 0 (a0) + addiu a1, a1, 1 + bnez a2, 1b + addiu a0, a0, 2 + +0: jr ra + nop + +6: beqz t0, 8b + andi a2, a2, 7 +7: lw t1, 0(a1) + lw t2, 4(a1) + addiu t0, t0, -1 + addiu a1, a1, 8 + andi t3, t1, 0xff +#if defined(__mips_dspr2) + prepend t1, t2, 8 +#else + sll t4, t4, 24 + srl t1, t1, 8 + or t1, t1, t4 +#endif + srl t2, t2, 8 + preceu.ph.qbr t4, t1 + preceu.ph.qbl t1, t1 + preceu.ph.qbr t5, t2 + srl t2, t2, 16 + sh t3, 0(a0) + sw t4, 2(a0) + sw t1, 6(a0) + sw t5, 10(a0) + sh t2, 14(a0) + bnez t0, 7b + addiu a0, a0, 16 + bnez a2, 1b + nop + + jr ra + nop + +END(qt_fromlatin1_mips_asm_unroll8) + + +LEAF_MIPS_DSP(qt_fromlatin1_mips_asm_unroll4) +/* + * Arguments: + * a0 - dst (uint16_t*) + * a1 - src (const char*) + * a2 - len (unsigned int) + */ + + /* + * QString::fromLatin1_helper() already handles the len==0 + * case: assume that len is never zero. + */ + srl t0, a2, 2 + beqz t0, 9f + andi a2, a2, 0x3 + +1: lbu t1, 0(a1) + lbu t2, 1(a1) + lbu t3, 2(a1) + lbu t4, 3(a1) + sh t1, 0(a0) + sh t2, 2(a0) + sh t3, 4(a0) + sh t4, 6(a0) + addiu t0, t0, -1 + addiu a1, a1, 4 + bnez t0, 1b + addiu a0, a0, 8 + +8: beqz a2, 0f + nop + +9: lbu t1, 0(a1) + addiu a2, a2, -1 + addiu a1, a1, 1 + sh t1, 0(a0) + bnez a2, 9b + addiu a0, a0, 2 + +0: jr ra + nop + +END(qt_fromlatin1_mips_asm_unroll4) + + +LEAF_MIPS_DSP(qt_toLatin1_mips_dsp_asm) + /* + * a0 - dst + * a1 - src + * a2 - length + */ + + addiu t9, zero, 0x3f + srl t8, a2, 2 + beqz t8, 2f + andi a2, a2, 3 +1: + lhu t0, 0(a1) + lhu t1, 2(a1) + lhu t2, 4(a1) + lhu t3, 6(a1) + srl t4, t0, 8 + srl t5, t1, 8 + srl t6, t2, 8 + srl t7, t3, 8 + movn t0, t9, t4 + movn t1, t9, t5 + movn t2, t9, t6 + movn t3, t9, t7 + addiu a1, a1, 8 + addiu t8, t8, -1 + sb t0, 0(a0) + sb t1, 1(a0) + sb t2, 2(a0) + sb t3, 3(a0) + bgtz t8, 1b + addiu a0, a0, 4 +2: beqz a2, 4f + nop +3: + lhu t0, 0(a1) + addiu a1, a1, 2 + addiu a2, a2, -1 + srl t1, t0, 8 + movn t0, t9, t1 + sb t0, 0(a0) + bgtz a2, 3b + addiu a0, a0, 1 +4: + jr ra + nop + +END(qt_toLatin1_mips_dsp_asm) + diff --git a/src/corelib/tools/tools.pri b/src/corelib/tools/tools.pri index e4a7b02aee..cac596f0bc 100644 --- a/src/corelib/tools/tools.pri +++ b/src/corelib/tools/tools.pri @@ -192,3 +192,7 @@ INCLUDEPATH += ../3rdparty/md5 \ !macx-icc:!vxworks:unix:LIBS_PRIVATE += -lm TR_EXCLUDE += ../3rdparty/* + +# MIPS DSP +MIPS_DSP_ASM += tools/qstring_mips_dsp_asm.S +MIPS_DSP_HEADERS += ../gui/painting/qt_mips_asm_dsp_p.h -- cgit v1.2.3