Unify and optimize QByteArray::to{Upper,Lower}

Do a check first if we need to transform before doing the transform. This means we won't detach when transforming data that is already correct. And instead of using QChar, use our own hand-rolled table. In a proper LTO build, the QChar calls would be resolved to a lookup of the Unicode data, but not many people do LTO builds, Therefore, this means a great speed-up is achieved by simply avoiding the function call. The extra gain in performance comes from the simpler translation table instead of the more complex full-Unicode data. Also as a consequence, this changes the handling of two characters in Latin 1: 'ß' should be uppercased to "SS" but we won't do it, and 'ÿ' can't be uppercased in Latin 1 ('Ÿ' is outside the range). Benchmarking is included. Comparing the Qt 5.4 algorithm to the new code is almost 20x faster. Other alternatives are included in the benchmark and are all faster than the current code, though slower than the new one. While all of them could compress the tables to be smaller or shared between uppercasing and lowercasing, they would also expand to more code (though probably less than the extra bytes required in the full translation table). In the trade-off, I decided to go with simplicity and most efficient code. Change-Id: I002d98318d236de0d27ffbea39d662cbed359985 Reviewed-by: Marc Mutz <marc.mutz@kdab.com>
author: Thiago Macieira <thiago.macieira@intel.com> 2014-07-29 14:35:11 -0700
committer: Thiago Macieira <thiago.macieira@intel.com> 2014-08-19 03:39:05 +0200
commit: c250a0ec3a196247dde372cde3757913226386b0 (patch)
tree: b94c2b1c39cbd45d8ac351243f761503f34f5758
parent: f7af3e61716691a46c872c33edaf44481428abc0 (diff)
4 files changed, 290 insertions, 21 deletions
diff --git a/src/corelib/tools/qbytearray.cpp b/src/corelib/tools/qbytearray.cpp
index 088a7c1769..150da82cb8 100644
--- a/src/corelib/tools/qbytearray.cpp
+++ b/src/corelib/tools/qbytearray.cpp
@@ -62,6 +62,64 @@
 
 QT_BEGIN_NAMESPACE
 
+// Latin 1 case system:
+/*
+#!/usr/bin/perl -l
+use feature "unicode_strings";
+for (0..255) {
+    $up = uc(chr($_));
+    $up = chr($_) if ord($up) > 0x100 || length $up > 1;
+    printf "0x%02x,", ord($up);
+    print "" if ($_ & 0xf) == 0xf;
+}
+*/
+static const uchar latin1_uppercased[256] = {
+    0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f,
+    0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f,
+    0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2a,0x2b,0x2c,0x2d,0x2e,0x2f,
+    0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3a,0x3b,0x3c,0x3d,0x3e,0x3f,
+    0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4a,0x4b,0x4c,0x4d,0x4e,0x4f,
+    0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5a,0x5b,0x5c,0x5d,0x5e,0x5f,
+    0x60,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4a,0x4b,0x4c,0x4d,0x4e,0x4f,
+    0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5a,0x7b,0x7c,0x7d,0x7e,0x7f,
+    0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,0x88,0x89,0x8a,0x8b,0x8c,0x8d,0x8e,0x8f,
+    0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9a,0x9b,0x9c,0x9d,0x9e,0x9f,
+    0xa0,0xa1,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,0xaa,0xab,0xac,0xad,0xae,0xaf,
+    0xb0,0xb1,0xb2,0xb3,0xb4,0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0xbb,0xbc,0xbd,0xbe,0xbf,
+    0xc0,0xc1,0xc2,0xc3,0xc4,0xc5,0xc6,0xc7,0xc8,0xc9,0xca,0xcb,0xcc,0xcd,0xce,0xcf,
+    0xd0,0xd1,0xd2,0xd3,0xd4,0xd5,0xd6,0xd7,0xd8,0xd9,0xda,0xdb,0xdc,0xdd,0xde,0xdf,
+    0xc0,0xc1,0xc2,0xc3,0xc4,0xc5,0xc6,0xc7,0xc8,0xc9,0xca,0xcb,0xcc,0xcd,0xce,0xcf,
+    0xd0,0xd1,0xd2,0xd3,0xd4,0xd5,0xd6,0xf7,0xd8,0xd9,0xda,0xdb,0xdc,0xdd,0xde,0xff
+};
+
+/*
+#!/usr/bin/perl -l
+use feature "unicode_strings";
+for (0..255) {
+    $up = lc(chr($_));
+    $up = chr($_) if ord($up) > 0x100 || length $up > 1;
+    printf "0x%02x,", ord($up);
+    print "" if ($_ & 0xf) == 0xf;
+}
+*/
+static const uchar latin1_lowercased[256] = {
+    0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f,
+    0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f,
+    0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2a,0x2b,0x2c,0x2d,0x2e,0x2f,
+    0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3a,0x3b,0x3c,0x3d,0x3e,0x3f,
+    0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x6b,0x6c,0x6d,0x6e,0x6f,
+    0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,0x5b,0x5c,0x5d,0x5e,0x5f,
+    0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x6b,0x6c,0x6d,0x6e,0x6f,
+    0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,0x7b,0x7c,0x7d,0x7e,0x7f,
+    0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,0x88,0x89,0x8a,0x8b,0x8c,0x8d,0x8e,0x8f,
+    0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9a,0x9b,0x9c,0x9d,0x9e,0x9f,
+    0xa0,0xa1,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,0xaa,0xab,0xac,0xad,0xae,0xaf,
+    0xb0,0xb1,0xb2,0xb3,0xb4,0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0xbb,0xbc,0xbd,0xbe,0xbf,
+    0xe0,0xe1,0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,0xe9,0xea,0xeb,0xec,0xed,0xee,0xef,
+    0xf0,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xd7,0xf8,0xf9,0xfa,0xfb,0xfc,0xfd,0xfe,0xdf,
+    0xe0,0xe1,0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,0xe9,0xea,0xeb,0xec,0xed,0xee,0xef,
+    0xf0,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,0xf8,0xf9,0xfa,0xfb,0xfc,0xfd,0xfe,0xff
+};
 
 int qFindByteArray(
     const char *haystack0, int haystackLen, int from,
@@ -2685,6 +2743,8 @@ QByteArray QByteArray::mid(int pos, int len) const
 }
 
 /*!
+    \fn QByteArray QByteArray::toLower() const
+
     Returns a lowercase copy of the byte array. The bytearray is
     interpreted as a Latin-1 encoded string.
 
@@ -2693,20 +2753,49 @@ QByteArray QByteArray::mid(int pos, int len) const
 
     \sa toUpper(), {8-bit Character Comparisons}
 */
-QByteArray QByteArray::toLower() const
+
+// noinline so that the compiler won't inline the function in each of
+// toLower and toUpper when the only difference is the table being used
+// (even with constant propagation, there's no gain in performance).
+template <typename T>
+#ifdef Q_CC_MSVC
+__declspec(noinline)
+#elif defined(Q_CC_GNU)
+__attribute__((noinline))
+#endif
+static QByteArray toCase_template(T &input, const uchar * table)
 {
-    QByteArray s(*this);
-    uchar *p = reinterpret_cast<uchar *>(s.data());
-    uchar *e = reinterpret_cast<uchar *>(s.end());
-    if (p) {
-        while (p != e) {
-            *p = QChar::toLower((ushort)*p);
-            p++;
-        }
+    // find the first bad character in input
+    const char *orig_begin = input.constBegin();
+    const char *firstBad = orig_begin;
+    const char *e = input.constEnd();
+    for ( ; firstBad != e ; ++firstBad) {
+        uchar ch = uchar(*firstBad);
+        uchar converted = table[ch];
+        if (ch != converted)
+            break;
+    }
+
+    if (firstBad == e)
+        return input;
+
+    // transform the rest
+    QByteArray s = input;
+    char *b = s.begin();            // will detach if necessary
+    char *p = b + (firstBad - orig_begin);
+    e = b + s.size();
+    for ( ; p != e; ++p) {
+        *p = char(uchar(table[uchar(*p)]));
     }
     return s;
 }
 
+
+QByteArray QByteArray::toLower() const
+{
+    return toCase_template(*this, latin1_lowercased);
+}
+
 /*!
     Returns an uppercase copy of the byte array. The bytearray is
     interpreted as a Latin-1 encoded string.
@@ -2716,19 +2805,9 @@ QByteArray QByteArray::toLower() const
 
     \sa toLower(), {8-bit Character Comparisons}
 */
-
 QByteArray QByteArray::toUpper() const
 {
-    QByteArray s(*this);
-    uchar *p = reinterpret_cast<uchar *>(s.data());
-    uchar *e = reinterpret_cast<uchar *>(s.end());
-    if (p) {
-        while (p != e) {
-            *p = QChar::toUpper((ushort)*p);
-            p++;
-        }
-    }
-    return s;
+    return toCase_template(*this, latin1_uppercased);
 }
 
 /*! \fn void QByteArray::clear()
diff --git a/tests/auto/corelib/tools/qbytearray/tst_qbytearray.cpp b/tests/auto/corelib/tools/qbytearray/tst_qbytearray.cpp
index 52e1850c87..8670e4b3ef 100644
--- a/tests/auto/corelib/tools/qbytearray/tst_qbytearray.cpp
+++ b/tests/auto/corelib/tools/qbytearray/tst_qbytearray.cpp
@@ -2022,6 +2022,8 @@ void tst_QByteArray::toUpperLower()
     QFETCH(QByteArray, input);
     QFETCH(QByteArray, upper);
     QFETCH(QByteArray, lower);
+    QCOMPARE(lower.toLower(), lower);
+    QCOMPARE(upper.toUpper(), upper);
     QCOMPARE(input.toUpper(), upper);
     QCOMPARE(input.toLower(), lower);
 }
diff --git a/tests/benchmarks/corelib/tools/qbytearray/main.cpp b/tests/benchmarks/corelib/tools/qbytearray/main.cpp
index 009ffa12e3..830910d664 100644
--- a/tests/benchmarks/corelib/tools/qbytearray/main.cpp
+++ b/tests/benchmarks/corelib/tools/qbytearray/main.cpp
@@ -49,11 +49,25 @@
 class tst_qbytearray : public QObject
 {
     Q_OBJECT
+    QByteArray sourcecode;
 private slots:
+    void initTestCase();
     void append();
     void append_data();
+
+    void latin1Uppercasing_qt54();
+    void latin1Uppercasing_xlate();
+    void latin1Uppercasing_xlate_checked();
+    void latin1Uppercasing_category();
+    void latin1Uppercasing_bitcheck();
 };
 
+void tst_qbytearray::initTestCase()
+{
+    QFile self(QFINDTESTDATA("main.cpp"));
+    QVERIFY(self.open(QIODevice::ReadOnly));
+    sourcecode = self.readAll();
+}
 
 void tst_qbytearray::append_data()
 {
@@ -81,6 +95,181 @@ void tst_qbytearray::append()
     }
 }
 
+void tst_qbytearray::latin1Uppercasing_qt54()
+{
+    QByteArray s = sourcecode;
+    s.detach();
+
+    // the following was copied from qbytearray.cpp (except for the QBENCHMARK macro):
+    uchar *p_orig = reinterpret_cast<uchar *>(s.data());
+    uchar *e = reinterpret_cast<uchar *>(s.end());
+
+    QBENCHMARK {
+        uchar *p = p_orig;
+        if (p) {
+            while (p != e) {
+                *p = QChar::toLower((ushort)*p);
+                p++;
+            }
+        }
+    }
+}
+
+
+/*
+#!/usr/bin/perl -l
+use feature "unicode_strings"
+for (0..255) {
+    $up = uc(chr($_));
+    $up = chr($_) if ord($up) > 0x100 || length $up > 1;
+    printf "0x%02x,", ord($up);
+    print "" if ($_ & 0xf) == 0xf;
+}
+*/
+static const uchar uppercased[256] = {
+    0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f,
+    0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f,
+    0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2a,0x2b,0x2c,0x2d,0x2e,0x2f,
+    0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3a,0x3b,0x3c,0x3d,0x3e,0x3f,
+    0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4a,0x4b,0x4c,0x4d,0x4e,0x4f,
+    0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5a,0x5b,0x5c,0x5d,0x5e,0x5f,
+    0x60,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4a,0x4b,0x4c,0x4d,0x4e,0x4f,
+    0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5a,0x7b,0x7c,0x7d,0x7e,0x7f,
+    0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,0x88,0x89,0x8a,0x8b,0x8c,0x8d,0x8e,0x8f,
+    0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9a,0x9b,0x9c,0x9d,0x9e,0x9f,
+    0xa0,0xa1,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,0xaa,0xab,0xac,0xad,0xae,0xaf,
+    0xb0,0xb1,0xb2,0xb3,0xb4,0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0xbb,0xbc,0xbd,0xbe,0xbf,
+    0xc0,0xc1,0xc2,0xc3,0xc4,0xc5,0xc6,0xc7,0xc8,0xc9,0xca,0xcb,0xcc,0xcd,0xce,0xcf,
+    0xd0,0xd1,0xd2,0xd3,0xd4,0xd5,0xd6,0xd7,0xd8,0xd9,0xda,0xdb,0xdc,0xdd,0xde,0xdf,
+    0xc0,0xc1,0xc2,0xc3,0xc4,0xc5,0xc6,0xc7,0xc8,0xc9,0xca,0xcb,0xcc,0xcd,0xce,0xcf,
+    0xd0,0xd1,0xd2,0xd3,0xd4,0xd5,0xd6,0xf7,0xd8,0xd9,0xda,0xdb,0xdc,0xdd,0xde,0xff
+};
+void tst_qbytearray::latin1Uppercasing_xlate()
+{
+    QByteArray output = sourcecode;
+    output.detach();
+    char *dst_orig = output.data();
+    const char *src_orig = sourcecode.constBegin();
+    const char *end = sourcecode.constEnd();
+    QBENCHMARK {
+        char *dst = dst_orig;
+        for (const char *src = src_orig; src != end; ++src, ++dst)
+            *dst = uppercased[uchar(*src)];
+    }
+}
+
+void tst_qbytearray::latin1Uppercasing_xlate_checked()
+{
+    QByteArray output = sourcecode;
+    output.detach();
+    char *dst_orig = output.data();
+    const char *src_orig = sourcecode.constBegin();
+    const char *end = sourcecode.constEnd();
+    QBENCHMARK {
+        char *dst = dst_orig;
+        for (const char *src = src_orig; src != end; ++src, ++dst) {
+            uchar ch = uchar(*src);
+            uchar converted = uppercased[ch];
+            if (ch != converted)
+                *dst = converted;
+        }
+    }
+}
+
+/*
+#!/bin/perl -l
+use feature "unicode_strings";
+sub categorize($) {
+    # 'ß' and 'ÿ' are lowercase, but we cannot uppercase them
+    return 0 if $_[0] == 0xDF || $_[0] == 0xFF;
+    $ch = chr($_[0]);
+    return 2 if uc($ch) ne $ch;
+    return 1 if lc($ch) ne $ch;
+    return 0;
+}
+for (0..255) {
+    printf "%d,", categorize($_);
+    print "" if ($_ & 0xf) == 0xf;
+}
+*/
+static const char categories[256] = {
+    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+    0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+    1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,
+    0,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+    2,2,2,2,2,2,2,2,2,2,2,0,0,0,0,0,
+    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+    0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,
+    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+    1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,0,
+    2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+    2,2,2,2,2,2,2,0,2,2,2,2,2,2,2,0
+};
+
+void tst_qbytearray::latin1Uppercasing_category()
+{
+    QByteArray output = sourcecode;
+    output.detach();
+    char *dst_orig = output.data();
+    const char *src_orig = sourcecode.constBegin();
+    const char *end = sourcecode.constEnd();
+    QBENCHMARK {
+        char *dst = dst_orig;
+        for (const char *src = src_orig; src != end; ++src, ++dst)
+            *dst = categories[uchar(*src)] == 1 ? *src & ~0x20 : *src;
+    }
+}
+
+/*
+#!/bin/perl -l
+use feature "unicode_strings";
+sub categorize($) {
+    # 'ß' and 'ÿ' are lowercase, but we cannot uppercase them
+    return 0 if $_[0] == 0xDF || $_[0] == 0xFF;
+    $ch = chr($_[0]);
+    return 2 if uc($ch) ne $ch;
+    return 1 if lc($ch) ne $ch;
+    return 0;
+}
+for $row (0..7) {
+    $val = 0;
+    for $col (0..31) {
+        $val |= (1<<$col)
+            if categorize($row * 31 + $col) == 2;
+    }
+    printf "0x%08x,", $val;
+}
+*/
+
+static const quint32 shouldUppercase[8] = {
+    0x00000000,0x00000000,0x00000000,0x3ffffff0,0x00000000,0x04000000,0x00000000,0xbfffff80
+};
+
+static bool bittest(const quint32 *data, uchar bit)
+{
+    static const unsigned bitsperelem = sizeof(*data) * CHAR_BIT;
+    return data[bit / bitsperelem] & (1 << (bit & (bitsperelem - 1)));
+}
+
+void tst_qbytearray::latin1Uppercasing_bitcheck()
+{
+    QByteArray output = sourcecode;
+    output.detach();
+    char *dst_orig = output.data();
+    const char *src_orig = sourcecode.constBegin();
+    const char *end = sourcecode.constEnd();
+    QBENCHMARK {
+        char *dst = dst_orig;
+        for (const char *src = src_orig; src != end; ++src, ++dst)
+            *dst = bittest(shouldUppercase, *src) ? uchar(*src) & ~0x20 : uchar(*src);
+    }
+}
+
 
 QTEST_MAIN(tst_qbytearray)
 
diff --git a/tests/benchmarks/corelib/tools/qbytearray/qbytearray.pro b/tests/benchmarks/corelib/tools/qbytearray/qbytearray.pro
index 14bf1d8272..0d5e7646ad 100644
--- a/tests/benchmarks/corelib/tools/qbytearray/qbytearray.pro
+++ b/tests/benchmarks/corelib/tools/qbytearray/qbytearray.pro
@@ -2,7 +2,6 @@ TEMPLATE = app
 TARGET = tst_bench_qbytearray
 
 QT = core testlib
-CONFIG += release
 
 SOURCES += main.cpp
 DEFINES += QT_DISABLE_DEPRECATED_BEFORE=0
author	Thiago Macieira <thiago.macieira@intel.com>	2014-07-29 14:35:11 -0700
committer	Thiago Macieira <thiago.macieira@intel.com>	2014-08-19 03:39:05 +0200
commit	c250a0ec3a196247dde372cde3757913226386b0 (patch)
tree	b94c2b1c39cbd45d8ac351243f761503f34f5758
parent	f7af3e61716691a46c872c33edaf44481428abc0 (diff)