diff options
-rw-r--r-- | src/corelib/tools/qbytearray.cpp | 119 | ||||
-rw-r--r-- | tests/auto/corelib/tools/qbytearray/tst_qbytearray.cpp | 2 | ||||
-rw-r--r-- | tests/benchmarks/corelib/tools/qbytearray/main.cpp | 189 | ||||
-rw-r--r-- | tests/benchmarks/corelib/tools/qbytearray/qbytearray.pro | 1 |
4 files changed, 290 insertions, 21 deletions
diff --git a/src/corelib/tools/qbytearray.cpp b/src/corelib/tools/qbytearray.cpp index 088a7c1769..150da82cb8 100644 --- a/src/corelib/tools/qbytearray.cpp +++ b/src/corelib/tools/qbytearray.cpp @@ -62,6 +62,64 @@ QT_BEGIN_NAMESPACE +// Latin 1 case system: +/* +#!/usr/bin/perl -l +use feature "unicode_strings"; +for (0..255) { + $up = uc(chr($_)); + $up = chr($_) if ord($up) > 0x100 || length $up > 1; + printf "0x%02x,", ord($up); + print "" if ($_ & 0xf) == 0xf; +} +*/ +static const uchar latin1_uppercased[256] = { + 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f, + 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f, + 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2a,0x2b,0x2c,0x2d,0x2e,0x2f, + 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3a,0x3b,0x3c,0x3d,0x3e,0x3f, + 0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4a,0x4b,0x4c,0x4d,0x4e,0x4f, + 0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5a,0x5b,0x5c,0x5d,0x5e,0x5f, + 0x60,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4a,0x4b,0x4c,0x4d,0x4e,0x4f, + 0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5a,0x7b,0x7c,0x7d,0x7e,0x7f, + 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,0x88,0x89,0x8a,0x8b,0x8c,0x8d,0x8e,0x8f, + 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9a,0x9b,0x9c,0x9d,0x9e,0x9f, + 0xa0,0xa1,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,0xaa,0xab,0xac,0xad,0xae,0xaf, + 0xb0,0xb1,0xb2,0xb3,0xb4,0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0xbb,0xbc,0xbd,0xbe,0xbf, + 0xc0,0xc1,0xc2,0xc3,0xc4,0xc5,0xc6,0xc7,0xc8,0xc9,0xca,0xcb,0xcc,0xcd,0xce,0xcf, + 0xd0,0xd1,0xd2,0xd3,0xd4,0xd5,0xd6,0xd7,0xd8,0xd9,0xda,0xdb,0xdc,0xdd,0xde,0xdf, + 0xc0,0xc1,0xc2,0xc3,0xc4,0xc5,0xc6,0xc7,0xc8,0xc9,0xca,0xcb,0xcc,0xcd,0xce,0xcf, + 0xd0,0xd1,0xd2,0xd3,0xd4,0xd5,0xd6,0xf7,0xd8,0xd9,0xda,0xdb,0xdc,0xdd,0xde,0xff +}; + +/* +#!/usr/bin/perl -l +use feature "unicode_strings"; +for (0..255) { + $up = lc(chr($_)); + $up = chr($_) if ord($up) > 0x100 || length $up > 1; + printf "0x%02x,", ord($up); + print "" if ($_ & 0xf) == 0xf; +} +*/ +static const uchar latin1_lowercased[256] = { + 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f, + 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f, + 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2a,0x2b,0x2c,0x2d,0x2e,0x2f, + 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3a,0x3b,0x3c,0x3d,0x3e,0x3f, + 0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x6b,0x6c,0x6d,0x6e,0x6f, + 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,0x5b,0x5c,0x5d,0x5e,0x5f, + 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x6b,0x6c,0x6d,0x6e,0x6f, + 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,0x7b,0x7c,0x7d,0x7e,0x7f, + 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,0x88,0x89,0x8a,0x8b,0x8c,0x8d,0x8e,0x8f, + 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9a,0x9b,0x9c,0x9d,0x9e,0x9f, + 0xa0,0xa1,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,0xaa,0xab,0xac,0xad,0xae,0xaf, + 0xb0,0xb1,0xb2,0xb3,0xb4,0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0xbb,0xbc,0xbd,0xbe,0xbf, + 0xe0,0xe1,0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,0xe9,0xea,0xeb,0xec,0xed,0xee,0xef, + 0xf0,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xd7,0xf8,0xf9,0xfa,0xfb,0xfc,0xfd,0xfe,0xdf, + 0xe0,0xe1,0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,0xe9,0xea,0xeb,0xec,0xed,0xee,0xef, + 0xf0,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,0xf8,0xf9,0xfa,0xfb,0xfc,0xfd,0xfe,0xff +}; int qFindByteArray( const char *haystack0, int haystackLen, int from, @@ -2685,6 +2743,8 @@ QByteArray QByteArray::mid(int pos, int len) const } /*! + \fn QByteArray QByteArray::toLower() const + Returns a lowercase copy of the byte array. The bytearray is interpreted as a Latin-1 encoded string. @@ -2693,20 +2753,49 @@ QByteArray QByteArray::mid(int pos, int len) const \sa toUpper(), {8-bit Character Comparisons} */ -QByteArray QByteArray::toLower() const + +// noinline so that the compiler won't inline the function in each of +// toLower and toUpper when the only difference is the table being used +// (even with constant propagation, there's no gain in performance). +template <typename T> +#ifdef Q_CC_MSVC +__declspec(noinline) +#elif defined(Q_CC_GNU) +__attribute__((noinline)) +#endif +static QByteArray toCase_template(T &input, const uchar * table) { - QByteArray s(*this); - uchar *p = reinterpret_cast<uchar *>(s.data()); - uchar *e = reinterpret_cast<uchar *>(s.end()); - if (p) { - while (p != e) { - *p = QChar::toLower((ushort)*p); - p++; - } + // find the first bad character in input + const char *orig_begin = input.constBegin(); + const char *firstBad = orig_begin; + const char *e = input.constEnd(); + for ( ; firstBad != e ; ++firstBad) { + uchar ch = uchar(*firstBad); + uchar converted = table[ch]; + if (ch != converted) + break; + } + + if (firstBad == e) + return input; + + // transform the rest + QByteArray s = input; + char *b = s.begin(); // will detach if necessary + char *p = b + (firstBad - orig_begin); + e = b + s.size(); + for ( ; p != e; ++p) { + *p = char(uchar(table[uchar(*p)])); } return s; } + +QByteArray QByteArray::toLower() const +{ + return toCase_template(*this, latin1_lowercased); +} + /*! Returns an uppercase copy of the byte array. The bytearray is interpreted as a Latin-1 encoded string. @@ -2716,19 +2805,9 @@ QByteArray QByteArray::toLower() const \sa toLower(), {8-bit Character Comparisons} */ - QByteArray QByteArray::toUpper() const { - QByteArray s(*this); - uchar *p = reinterpret_cast<uchar *>(s.data()); - uchar *e = reinterpret_cast<uchar *>(s.end()); - if (p) { - while (p != e) { - *p = QChar::toUpper((ushort)*p); - p++; - } - } - return s; + return toCase_template(*this, latin1_uppercased); } /*! \fn void QByteArray::clear() diff --git a/tests/auto/corelib/tools/qbytearray/tst_qbytearray.cpp b/tests/auto/corelib/tools/qbytearray/tst_qbytearray.cpp index 52e1850c87..8670e4b3ef 100644 --- a/tests/auto/corelib/tools/qbytearray/tst_qbytearray.cpp +++ b/tests/auto/corelib/tools/qbytearray/tst_qbytearray.cpp @@ -2022,6 +2022,8 @@ void tst_QByteArray::toUpperLower() QFETCH(QByteArray, input); QFETCH(QByteArray, upper); QFETCH(QByteArray, lower); + QCOMPARE(lower.toLower(), lower); + QCOMPARE(upper.toUpper(), upper); QCOMPARE(input.toUpper(), upper); QCOMPARE(input.toLower(), lower); } diff --git a/tests/benchmarks/corelib/tools/qbytearray/main.cpp b/tests/benchmarks/corelib/tools/qbytearray/main.cpp index 009ffa12e3..830910d664 100644 --- a/tests/benchmarks/corelib/tools/qbytearray/main.cpp +++ b/tests/benchmarks/corelib/tools/qbytearray/main.cpp @@ -49,11 +49,25 @@ class tst_qbytearray : public QObject { Q_OBJECT + QByteArray sourcecode; private slots: + void initTestCase(); void append(); void append_data(); + + void latin1Uppercasing_qt54(); + void latin1Uppercasing_xlate(); + void latin1Uppercasing_xlate_checked(); + void latin1Uppercasing_category(); + void latin1Uppercasing_bitcheck(); }; +void tst_qbytearray::initTestCase() +{ + QFile self(QFINDTESTDATA("main.cpp")); + QVERIFY(self.open(QIODevice::ReadOnly)); + sourcecode = self.readAll(); +} void tst_qbytearray::append_data() { @@ -81,6 +95,181 @@ void tst_qbytearray::append() } } +void tst_qbytearray::latin1Uppercasing_qt54() +{ + QByteArray s = sourcecode; + s.detach(); + + // the following was copied from qbytearray.cpp (except for the QBENCHMARK macro): + uchar *p_orig = reinterpret_cast<uchar *>(s.data()); + uchar *e = reinterpret_cast<uchar *>(s.end()); + + QBENCHMARK { + uchar *p = p_orig; + if (p) { + while (p != e) { + *p = QChar::toLower((ushort)*p); + p++; + } + } + } +} + + +/* +#!/usr/bin/perl -l +use feature "unicode_strings" +for (0..255) { + $up = uc(chr($_)); + $up = chr($_) if ord($up) > 0x100 || length $up > 1; + printf "0x%02x,", ord($up); + print "" if ($_ & 0xf) == 0xf; +} +*/ +static const uchar uppercased[256] = { + 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f, + 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f, + 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2a,0x2b,0x2c,0x2d,0x2e,0x2f, + 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3a,0x3b,0x3c,0x3d,0x3e,0x3f, + 0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4a,0x4b,0x4c,0x4d,0x4e,0x4f, + 0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5a,0x5b,0x5c,0x5d,0x5e,0x5f, + 0x60,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4a,0x4b,0x4c,0x4d,0x4e,0x4f, + 0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5a,0x7b,0x7c,0x7d,0x7e,0x7f, + 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,0x88,0x89,0x8a,0x8b,0x8c,0x8d,0x8e,0x8f, + 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9a,0x9b,0x9c,0x9d,0x9e,0x9f, + 0xa0,0xa1,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,0xaa,0xab,0xac,0xad,0xae,0xaf, + 0xb0,0xb1,0xb2,0xb3,0xb4,0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0xbb,0xbc,0xbd,0xbe,0xbf, + 0xc0,0xc1,0xc2,0xc3,0xc4,0xc5,0xc6,0xc7,0xc8,0xc9,0xca,0xcb,0xcc,0xcd,0xce,0xcf, + 0xd0,0xd1,0xd2,0xd3,0xd4,0xd5,0xd6,0xd7,0xd8,0xd9,0xda,0xdb,0xdc,0xdd,0xde,0xdf, + 0xc0,0xc1,0xc2,0xc3,0xc4,0xc5,0xc6,0xc7,0xc8,0xc9,0xca,0xcb,0xcc,0xcd,0xce,0xcf, + 0xd0,0xd1,0xd2,0xd3,0xd4,0xd5,0xd6,0xf7,0xd8,0xd9,0xda,0xdb,0xdc,0xdd,0xde,0xff +}; +void tst_qbytearray::latin1Uppercasing_xlate() +{ + QByteArray output = sourcecode; + output.detach(); + char *dst_orig = output.data(); + const char *src_orig = sourcecode.constBegin(); + const char *end = sourcecode.constEnd(); + QBENCHMARK { + char *dst = dst_orig; + for (const char *src = src_orig; src != end; ++src, ++dst) + *dst = uppercased[uchar(*src)]; + } +} + +void tst_qbytearray::latin1Uppercasing_xlate_checked() +{ + QByteArray output = sourcecode; + output.detach(); + char *dst_orig = output.data(); + const char *src_orig = sourcecode.constBegin(); + const char *end = sourcecode.constEnd(); + QBENCHMARK { + char *dst = dst_orig; + for (const char *src = src_orig; src != end; ++src, ++dst) { + uchar ch = uchar(*src); + uchar converted = uppercased[ch]; + if (ch != converted) + *dst = converted; + } + } +} + +/* +#!/bin/perl -l +use feature "unicode_strings"; +sub categorize($) { + # 'ß' and 'ÿ' are lowercase, but we cannot uppercase them + return 0 if $_[0] == 0xDF || $_[0] == 0xFF; + $ch = chr($_[0]); + return 2 if uc($ch) ne $ch; + return 1 if lc($ch) ne $ch; + return 0; +} +for (0..255) { + printf "%d,", categorize($_); + print "" if ($_ & 0xf) == 0xf; +} +*/ +static const char categories[256] = { + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0, + 0,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, + 2,2,2,2,2,2,2,2,2,2,2,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,0, + 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, + 2,2,2,2,2,2,2,0,2,2,2,2,2,2,2,0 +}; + +void tst_qbytearray::latin1Uppercasing_category() +{ + QByteArray output = sourcecode; + output.detach(); + char *dst_orig = output.data(); + const char *src_orig = sourcecode.constBegin(); + const char *end = sourcecode.constEnd(); + QBENCHMARK { + char *dst = dst_orig; + for (const char *src = src_orig; src != end; ++src, ++dst) + *dst = categories[uchar(*src)] == 1 ? *src & ~0x20 : *src; + } +} + +/* +#!/bin/perl -l +use feature "unicode_strings"; +sub categorize($) { + # 'ß' and 'ÿ' are lowercase, but we cannot uppercase them + return 0 if $_[0] == 0xDF || $_[0] == 0xFF; + $ch = chr($_[0]); + return 2 if uc($ch) ne $ch; + return 1 if lc($ch) ne $ch; + return 0; +} +for $row (0..7) { + $val = 0; + for $col (0..31) { + $val |= (1<<$col) + if categorize($row * 31 + $col) == 2; + } + printf "0x%08x,", $val; +} +*/ + +static const quint32 shouldUppercase[8] = { + 0x00000000,0x00000000,0x00000000,0x3ffffff0,0x00000000,0x04000000,0x00000000,0xbfffff80 +}; + +static bool bittest(const quint32 *data, uchar bit) +{ + static const unsigned bitsperelem = sizeof(*data) * CHAR_BIT; + return data[bit / bitsperelem] & (1 << (bit & (bitsperelem - 1))); +} + +void tst_qbytearray::latin1Uppercasing_bitcheck() +{ + QByteArray output = sourcecode; + output.detach(); + char *dst_orig = output.data(); + const char *src_orig = sourcecode.constBegin(); + const char *end = sourcecode.constEnd(); + QBENCHMARK { + char *dst = dst_orig; + for (const char *src = src_orig; src != end; ++src, ++dst) + *dst = bittest(shouldUppercase, *src) ? uchar(*src) & ~0x20 : uchar(*src); + } +} + QTEST_MAIN(tst_qbytearray) diff --git a/tests/benchmarks/corelib/tools/qbytearray/qbytearray.pro b/tests/benchmarks/corelib/tools/qbytearray/qbytearray.pro index 14bf1d8272..0d5e7646ad 100644 --- a/tests/benchmarks/corelib/tools/qbytearray/qbytearray.pro +++ b/tests/benchmarks/corelib/tools/qbytearray/qbytearray.pro @@ -2,7 +2,6 @@ TEMPLATE = app TARGET = tst_bench_qbytearray QT = core testlib -CONFIG += release SOURCES += main.cpp DEFINES += QT_DISABLE_DEPRECATED_BEFORE=0 |