4 files changed, 290 insertions, 21 deletions
diff --git a/src/corelib/tools/qbytearray.cpp b/src/corelib/tools/qbytearray.cpp
index 088a7c1769..150da82cb8 100644
--- a/src/corelib/tools/qbytearray.cpp
+++ b/src/corelib/tools/qbytearray.cpp
@@ -62,6 +62,64 @@
 
 QT_BEGIN_NAMESPACE
 
+// Latin 1 case system:
+/*
+#!/usr/bin/perl -l
+use feature "unicode_strings";
+for (0..255) {
+    $up = uc(chr($_));
+    $up = chr($_) if ord($up) > 0x100 || length $up > 1;
+    printf "0x%02x,", ord($up);
+    print "" if ($_ & 0xf) == 0xf;
+}
+*/
+static const uchar latin1_uppercased[256] = {
+    0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f,
+    0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f,
+    0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2a,0x2b,0x2c,0x2d,0x2e,0x2f,
+    0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3a,0x3b,0x3c,0x3d,0x3e,0x3f,
+    0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4a,0x4b,0x4c,0x4d,0x4e,0x4f,
+    0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5a,0x5b,0x5c,0x5d,0x5e,0x5f,
+    0x60,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4a,0x4b,0x4c,0x4d,0x4e,0x4f,
+    0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5a,0x7b,0x7c,0x7d,0x7e,0x7f,
+    0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,0x88,0x89,0x8a,0x8b,0x8c,0x8d,0x8e,0x8f,
+    0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9a,0x9b,0x9c,0x9d,0x9e,0x9f,
+    0xa0,0xa1,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,0xaa,0xab,0xac,0xad,0xae,0xaf,
+    0xb0,0xb1,0xb2,0xb3,0xb4,0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0xbb,0xbc,0xbd,0xbe,0xbf,
+    0xc0,0xc1,0xc2,0xc3,0xc4,0xc5,0xc6,0xc7,0xc8,0xc9,0xca,0xcb,0xcc,0xcd,0xce,0xcf,
+    0xd0,0xd1,0xd2,0xd3,0xd4,0xd5,0xd6,0xd7,0xd8,0xd9,0xda,0xdb,0xdc,0xdd,0xde,0xdf,
+    0xc0,0xc1,0xc2,0xc3,0xc4,0xc5,0xc6,0xc7,0xc8,0xc9,0xca,0xcb,0xcc,0xcd,0xce,0xcf,
+    0xd0,0xd1,0xd2,0xd3,0xd4,0xd5,0xd6,0xf7,0xd8,0xd9,0xda,0xdb,0xdc,0xdd,0xde,0xff
+};
+
+/*
+#!/usr/bin/perl -l
+use feature "unicode_strings";
+for (0..255) {
+    $up = lc(chr($_));
+    $up = chr($_) if ord($up) > 0x100 || length $up > 1;
+    printf "0x%02x,", ord($up);
+    print "" if ($_ & 0xf) == 0xf;
+}
+*/
+static const uchar latin1_lowercased[256] = {
+    0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f,
+    0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f,
+    0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2a,0x2b,0x2c,0x2d,0x2e,0x2f,
+    0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3a,0x3b,0x3c,0x3d,0x3e,0x3f,
+    0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x6b,0x6c,0x6d,0x6e,0x6f,
+    0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,0x5b,0x5c,0x5d,0x5e,0x5f,
+    0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x6b,0x6c,0x6d,0x6e,0x6f,
+    0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,0x7b,0x7c,0x7d,0x7e,0x7f,
+    0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,0x88,0x89,0x8a,0x8b,0x8c,0x8d,0x8e,0x8f,
+    0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9a,0x9b,0x9c,0x9d,0x9e,0x9f,
+    0xa0,0xa1,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,0xaa,0xab,0xac,0xad,0xae,0xaf,
+    0xb0,0xb1,0xb2,0xb3,0xb4,0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0xbb,0xbc,0xbd,0xbe,0xbf,
+    0xe0,0xe1,0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,0xe9,0xea,0xeb,0xec,0xed,0xee,0xef,
+    0xf0,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xd7,0xf8,0xf9,0xfa,0xfb,0xfc,0xfd,0xfe,0xdf,
+    0xe0,0xe1,0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,0xe9,0xea,0xeb,0xec,0xed,0xee,0xef,
+    0xf0,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,0xf8,0xf9,0xfa,0xfb,0xfc,0xfd,0xfe,0xff
+};
 
 int qFindByteArray(
     const char *haystack0, int haystackLen, int from,
@@ -2685,6 +2743,8 @@ QByteArray QByteArray::mid(int pos, int len) const
 }
 
 /*!
+    \fn QByteArray QByteArray::toLower() const
+
     Returns a lowercase copy of the byte array. The bytearray is
     interpreted as a Latin-1 encoded string.
 
@@ -2693,20 +2753,49 @@ QByteArray QByteArray::mid(int pos, int len) const
 
     \sa toUpper(), {8-bit Character Comparisons}
 */
-QByteArray QByteArray::toLower() const
+
+// noinline so that the compiler won't inline the function in each of
+// toLower and toUpper when the only difference is the table being used
+// (even with constant propagation, there's no gain in performance).
+template <typename T>
+#ifdef Q_CC_MSVC
+__declspec(noinline)
+#elif defined(Q_CC_GNU)
+__attribute__((noinline))
+#endif
+static QByteArray toCase_template(T &input, const uchar * table)
 {
-    QByteArray s(*this);
-    uchar *p = reinterpret_cast<uchar *>(s.data());
-    uchar *e = reinterpret_cast<uchar *>(s.end());
-    if (p) {
-        while (p != e) {
-            *p = QChar::toLower((ushort)*p);
-            p++;
-        }
+    // find the first bad character in input
+    const char *orig_begin = input.constBegin();
+    const char *firstBad = orig_begin;
+    const char *e = input.constEnd();
+    for ( ; firstBad != e ; ++firstBad) {
+        uchar ch = uchar(*firstBad);
+        uchar converted = table[ch];
+        if (ch != converted)
+            break;
+    }
+
+    if (firstBad == e)
+        return input;
+
+    // transform the rest
+    QByteArray s = input;
+    char *b = s.begin();            // will detach if necessary
+    char *p = b + (firstBad - orig_begin);
+    e = b + s.size();
+    for ( ; p != e; ++p) {
+        *p = char(uchar(table[uchar(*p)]));
     }
     return s;
 }
 
+
+QByteArray QByteArray::toLower() const
+{
+    return toCase_template(*this, latin1_lowercased);
+}
+
 /*!
     Returns an uppercase copy of the byte array. The bytearray is
     interpreted as a Latin-1 encoded string.
@@ -2716,19 +2805,9 @@ QByteArray QByteArray::toLower() const
 
     \sa toLower(), {8-bit Character Comparisons}
 */
-
 QByteArray QByteArray::toUpper() const
 {
-    QByteArray s(*this);
-    uchar *p = reinterpret_cast<uchar *>(s.data());
-    uchar *e = reinterpret_cast<uchar *>(s.end());
-    if (p) {
-        while (p != e) {
-            *p = QChar::toUpper((ushort)*p);
-            p++;
-        }
-    }
-    return s;
+    return toCase_template(*this, latin1_uppercased);
 }
 
 /*! \fn void QByteArray::clear()
diff --git a/tests/auto/corelib/tools/qbytearray/tst_qbytearray.cpp b/tests/auto/corelib/tools/qbytearray/tst_qbytearray.cpp
index 52e1850c87..8670e4b3ef 100644
--- a/tests/auto/corelib/tools/qbytearray/tst_qbytearray.cpp
+++ b/tests/auto/corelib/tools/qbytearray/tst_qbytearray.cpp
@@ -2022,6 +2022,8 @@ void tst_QByteArray::toUpperLower()
     QFETCH(QByteArray, input);
     QFETCH(QByteArray, upper);
     QFETCH(QByteArray, lower);
+    QCOMPARE(lower.toLower(), lower);
+    QCOMPARE(upper.toUpper(), upper);
     QCOMPARE(input.toUpper(), upper);
     QCOMPARE(input.toLower(), lower);
 }
diff --git a/tests/benchmarks/corelib/tools/qbytearray/main.cpp b/tests/benchmarks/corelib/tools/qbytearray/main.cpp
index 009ffa12e3..830910d664 100644
--- a/tests/benchmarks/corelib/tools/qbytearray/main.cpp
+++ b/tests/benchmarks/corelib/tools/qbytearray/main.cpp
@@ -49,11 +49,25 @@
 class tst_qbytearray : public QObject
 {
     Q_OBJECT
+    QByteArray sourcecode;
 private slots:
+    void initTestCase();
     void append();
     void append_data();
+
+    void latin1Uppercasing_qt54();
+    void latin1Uppercasing_xlate();
+    void latin1Uppercasing_xlate_checked();
+    void latin1Uppercasing_category();
+    void latin1Uppercasing_bitcheck();
 };
 
+void tst_qbytearray::initTestCase()
+{
+    QFile self(QFINDTESTDATA("main.cpp"));
+    QVERIFY(self.open(QIODevice::ReadOnly));
+    sourcecode = self.readAll();
+}
 
 void tst_qbytearray::append_data()
 {
@@ -81,6 +95,181 @@ void tst_qbytearray::append()
     }
 }
 
+void tst_qbytearray::latin1Uppercasing_qt54()
+{
+    QByteArray s = sourcecode;
+    s.detach();
+
+    // the following was copied from qbytearray.cpp (except for the QBENCHMARK macro):
+    uchar *p_orig = reinterpret_cast<uchar *>(s.data());
+    uchar *e = reinterpret_cast<uchar *>(s.end());
+
+    QBENCHMARK {
+        uchar *p = p_orig;
+        if (p) {
+            while (p != e) {
+                *p = QChar::toLower((ushort)*p);
+                p++;
+            }
+        }
+    }
+}
+
+
+/*
+#!/usr/bin/perl -l
+use feature "unicode_strings"
+for (0..255) {
+    $up = uc(chr($_));
+    $up = chr($_) if ord($up) > 0x100 || length $up > 1;
+    printf "0x%02x,", ord($up);
+    print "" if ($_ & 0xf) == 0xf;
+}
+*/
+static const uchar uppercased[256] = {
+    0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f,
+    0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f,
+    0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2a,0x2b,0x2c,0x2d,0x2e,0x2f,
+    0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3a,0x3b,0x3c,0x3d,0x3e,0x3f,
+    0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4a,0x4b,0x4c,0x4d,0x4e,0x4f,
+    0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5a,0x5b,0x5c,0x5d,0x5e,0x5f,
+    0x60,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4a,0x4b,0x4c,0x4d,0x4e,0x4f,
+    0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5a,0x7b,0x7c,0x7d,0x7e,0x7f,
+    0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,0x88,0x89,0x8a,0x8b,0x8c,0x8d,0x8e,0x8f,
+    0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9a,0x9b,0x9c,0x9d,0x9e,0x9f,
+    0xa0,0xa1,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,0xaa,0xab,0xac,0xad,0xae,0xaf,
+    0xb0,0xb1,0xb2,0xb3,0xb4,0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0xbb,0xbc,0xbd,0xbe,0xbf,
+    0xc0,0xc1,0xc2,0xc3,0xc4,0xc5,0xc6,0xc7,0xc8,0xc9,0xca,0xcb,0xcc,0xcd,0xce,0xcf,
+    0xd0,0xd1,0xd2,0xd3,0xd4,0xd5,0xd6,0xd7,0xd8,0xd9,0xda,0xdb,0xdc,0xdd,0xde,0xdf,
+    0xc0,0xc1,0xc2,0xc3,0xc4,0xc5,0xc6,0xc7,0xc8,0xc9,0xca,0xcb,0xcc,0xcd,0xce,0xcf,
+    0xd0,0xd1,0xd2,0xd3,0xd4,0xd5,0xd6,0xf7,0xd8,0xd9,0xda,0xdb,0xdc,0xdd,0xde,0xff
+};
+void tst_qbytearray::latin1Uppercasing_xlate()
+{
+    QByteArray output = sourcecode;
+    output.detach();
+    char *dst_orig = output.data();
+    const char *src_orig = sourcecode.constBegin();
+    const char *end = sourcecode.constEnd();
+    QBENCHMARK {
+        char *dst = dst_orig;
+        for (const char *src = src_orig; src != end; ++src, ++dst)
+            *dst = uppercased[uchar(*src)];
+    }
+}
+
+void tst_qbytearray::latin1Uppercasing_xlate_checked()
+{
+    QByteArray output = sourcecode;
+    output.detach();
+    char *dst_orig = output.data();
+    const char *src_orig = sourcecode.constBegin();
+    const char *end = sourcecode.constEnd();
+    QBENCHMARK {
+        char *dst = dst_orig;
+        for (const char *src = src_orig; src != end; ++src, ++dst) {
+            uchar ch = uchar(*src);
+            uchar converted = uppercased[ch];
+            if (ch != converted)
+                *dst = converted;
+        }
+    }
+}
+
+/*
+#!/bin/perl -l
+use feature "unicode_strings";
+sub categorize($) {
+    # 'ß' and 'ÿ' are lowercase, but we cannot uppercase them
+    return 0 if $_[0] == 0xDF || $_[0] == 0xFF;
+    $ch = chr($_[0]);
+    return 2 if uc($ch) ne $ch;
+    return 1 if lc($ch) ne $ch;
+    return 0;
+}
+for (0..255) {
+    printf "%d,", categorize($_);
+    print "" if ($_ & 0xf) == 0xf;
+}
+*/
+static const char categories[256] = {
+    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+    0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+    1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,
+    0,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+    2,2,2,2,2,2,2,2,2,2,2,0,0,0,0,0,
+    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+    0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,
+    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+    1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,0,
+    2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+    2,2,2,2,2,2,2,0,2,2,2,2,2,2,2,0
+};
+
+void tst_qbytearray::latin1Uppercasing_category()
+{
+    QByteArray output = sourcecode;
+    output.detach();
+    char *dst_orig = output.data();
+    const char *src_orig = sourcecode.constBegin();
+    const char *end = sourcecode.constEnd();
+    QBENCHMARK {
+        char *dst = dst_orig;
+        for (const char *src = src_orig; src != end; ++src, ++dst)
+            *dst = categories[uchar(*src)] == 1 ? *src & ~0x20 : *src;
+    }
+}
+
+/*
+#!/bin/perl -l
+use feature "unicode_strings";
+sub categorize($) {
+    # 'ß' and 'ÿ' are lowercase, but we cannot uppercase them
+    return 0 if $_[0] == 0xDF || $_[0] == 0xFF;
+    $ch = chr($_[0]);
+    return 2 if uc($ch) ne $ch;
+    return 1 if lc($ch) ne $ch;
+    return 0;
+}
+for $row (0..7) {
+    $val = 0;
+    for $col (0..31) {
+        $val |= (1<<$col)
+            if categorize($row * 31 + $col) == 2;
+    }
+    printf "0x%08x,", $val;
+}
+*/
+
+static const quint32 shouldUppercase[8] = {
+    0x00000000,0x00000000,0x00000000,0x3ffffff0,0x00000000,0x04000000,0x00000000,0xbfffff80
+};
+
+static bool bittest(const quint32 *data, uchar bit)
+{
+    static const unsigned bitsperelem = sizeof(*data) * CHAR_BIT;
+    return data[bit / bitsperelem] & (1 << (bit & (bitsperelem - 1)));
+}
+
+void tst_qbytearray::latin1Uppercasing_bitcheck()
+{
+    QByteArray output = sourcecode;
+    output.detach();
+    char *dst_orig = output.data();
+    const char *src_orig = sourcecode.constBegin();
+    const char *end = sourcecode.constEnd();
+    QBENCHMARK {
+        char *dst = dst_orig;
+        for (const char *src = src_orig; src != end; ++src, ++dst)
+            *dst = bittest(shouldUppercase, *src) ? uchar(*src) & ~0x20 : uchar(*src);
+    }
+}
+
 
 QTEST_MAIN(tst_qbytearray)
 
diff --git a/tests/benchmarks/corelib/tools/qbytearray/qbytearray.pro b/tests/benchmarks/corelib/tools/qbytearray/qbytearray.pro
index 14bf1d8272..0d5e7646ad 100644
--- a/tests/benchmarks/corelib/tools/qbytearray/qbytearray.pro
+++ b/tests/benchmarks/corelib/tools/qbytearray/qbytearray.pro
@@ -2,7 +2,6 @@ TEMPLATE = app
 TARGET = tst_bench_qbytearray
 
 QT = core testlib
-CONFIG += release
 
 SOURCES += main.cpp
 DEFINES += QT_DISABLE_DEPRECATED_BEFORE=0