diff options
Diffstat (limited to 'src/corelib/tools/qbytearray.cpp')
-rw-r--r-- | src/corelib/tools/qbytearray.cpp | 205 |
1 files changed, 139 insertions, 66 deletions
diff --git a/src/corelib/tools/qbytearray.cpp b/src/corelib/tools/qbytearray.cpp index 6ac442d27b..26bf7d047d 100644 --- a/src/corelib/tools/qbytearray.cpp +++ b/src/corelib/tools/qbytearray.cpp @@ -1,6 +1,7 @@ /**************************************************************************** ** ** Copyright (C) 2014 Digia Plc and/or its subsidiary(-ies). +** Copyright (C) 2014 Intel Corporation. ** Contact: http://www.qt-project.org/legal ** ** This file is part of the QtCore module of the Qt Toolkit. @@ -38,6 +39,7 @@ #include "qlist.h" #include "qlocale.h" #include "qlocale_p.h" +#include "qstringalgorithms_p.h" #include "qscopedpointer.h" #include <qdatastream.h> #include <qmath.h> @@ -54,6 +56,64 @@ QT_BEGIN_NAMESPACE +// Latin 1 case system, used by QByteArray::to{Upper,Lower}() and qstr(n)icmp(): +/* +#!/usr/bin/perl -l +use feature "unicode_strings"; +for (0..255) { + $up = uc(chr($_)); + $up = chr($_) if ord($up) > 0x100 || length $up > 1; + printf "0x%02x,", ord($up); + print "" if ($_ & 0xf) == 0xf; +} +*/ +static const uchar latin1_uppercased[256] = { + 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f, + 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f, + 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2a,0x2b,0x2c,0x2d,0x2e,0x2f, + 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3a,0x3b,0x3c,0x3d,0x3e,0x3f, + 0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4a,0x4b,0x4c,0x4d,0x4e,0x4f, + 0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5a,0x5b,0x5c,0x5d,0x5e,0x5f, + 0x60,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4a,0x4b,0x4c,0x4d,0x4e,0x4f, + 0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5a,0x7b,0x7c,0x7d,0x7e,0x7f, + 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,0x88,0x89,0x8a,0x8b,0x8c,0x8d,0x8e,0x8f, + 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9a,0x9b,0x9c,0x9d,0x9e,0x9f, + 0xa0,0xa1,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,0xaa,0xab,0xac,0xad,0xae,0xaf, + 0xb0,0xb1,0xb2,0xb3,0xb4,0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0xbb,0xbc,0xbd,0xbe,0xbf, + 0xc0,0xc1,0xc2,0xc3,0xc4,0xc5,0xc6,0xc7,0xc8,0xc9,0xca,0xcb,0xcc,0xcd,0xce,0xcf, + 0xd0,0xd1,0xd2,0xd3,0xd4,0xd5,0xd6,0xd7,0xd8,0xd9,0xda,0xdb,0xdc,0xdd,0xde,0xdf, + 0xc0,0xc1,0xc2,0xc3,0xc4,0xc5,0xc6,0xc7,0xc8,0xc9,0xca,0xcb,0xcc,0xcd,0xce,0xcf, + 0xd0,0xd1,0xd2,0xd3,0xd4,0xd5,0xd6,0xf7,0xd8,0xd9,0xda,0xdb,0xdc,0xdd,0xde,0xff +}; + +/* +#!/usr/bin/perl -l +use feature "unicode_strings"; +for (0..255) { + $up = lc(chr($_)); + $up = chr($_) if ord($up) > 0x100 || length $up > 1; + printf "0x%02x,", ord($up); + print "" if ($_ & 0xf) == 0xf; +} +*/ +static const uchar latin1_lowercased[256] = { + 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f, + 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f, + 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2a,0x2b,0x2c,0x2d,0x2e,0x2f, + 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3a,0x3b,0x3c,0x3d,0x3e,0x3f, + 0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x6b,0x6c,0x6d,0x6e,0x6f, + 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,0x5b,0x5c,0x5d,0x5e,0x5f, + 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x6b,0x6c,0x6d,0x6e,0x6f, + 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,0x7b,0x7c,0x7d,0x7e,0x7f, + 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,0x88,0x89,0x8a,0x8b,0x8c,0x8d,0x8e,0x8f, + 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9a,0x9b,0x9c,0x9d,0x9e,0x9f, + 0xa0,0xa1,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,0xaa,0xab,0xac,0xad,0xae,0xaf, + 0xb0,0xb1,0xb2,0xb3,0xb4,0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0xbb,0xbc,0xbd,0xbe,0xbf, + 0xe0,0xe1,0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,0xe9,0xea,0xeb,0xec,0xed,0xee,0xef, + 0xf0,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xd7,0xf8,0xf9,0xfa,0xfb,0xfc,0xfd,0xfe,0xdf, + 0xe0,0xe1,0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,0xe9,0xea,0xeb,0xec,0xed,0xee,0xef, + 0xf0,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,0xf8,0xf9,0xfa,0xfb,0xfc,0xfd,0xfe,0xff +}; int qFindByteArray( const char *haystack0, int haystackLen, int from, @@ -63,7 +123,7 @@ int qFindByteArray( int qAllocMore(int alloc, int extra) Q_DECL_NOTHROW { Q_ASSERT(alloc >= 0 && extra >= 0); - Q_ASSERT_X(alloc < (1 << 30) - extra, "qAllocMore", "Requested size is too large!"); + Q_ASSERT_X(alloc <= MaxAllocSize - extra, "qAllocMore", "Requested size is too large!"); unsigned nalloc = qNextPowerOfTwo(alloc + extra); @@ -248,7 +308,7 @@ int qstricmp(const char *str1, const char *str2) uchar c; if (!s1 || !s2) return s1 ? 1 : (s2 ? -1 : 0); - for (; !(res = (c = QChar::toLower((ushort)*s1)) - QChar::toLower((ushort)*s2)); s1++, s2++) + for (; !(res = (c = latin1_lowercased[*s1]) - latin1_lowercased[*s2]); s1++, s2++) if (!c) // strings are equal break; return res; @@ -283,7 +343,7 @@ int qstrnicmp(const char *str1, const char *str2, uint len) if (!s1 || !s2) return s1 ? 1 : (s2 ? -1 : 0); for (; len--; s1++, s2++) { - if ((res = (c = QChar::toLower((ushort)*s1)) - QChar::toLower((ushort)*s2))) + if ((res = (c = latin1_lowercased[*s1]) - latin1_lowercased[*s2])) return res; if (!c) // strings are equal break; @@ -1485,8 +1545,11 @@ void QByteArray::reallocData(uint alloc, Data::AllocationOptions options) Data::deallocate(d); d = x; } else { - if (options & Data::Grow) + if (options & Data::Grow) { + if (alloc > uint(MaxAllocSize) - uint(sizeof(Data))) + qBadAlloc(); alloc = qAllocMore(alloc, sizeof(Data)); + } Data *x = static_cast<Data *>(::realloc(d, sizeof(Data) + alloc)); Q_CHECK_PTR(x); x->alloc = alloc; @@ -2677,6 +2740,8 @@ QByteArray QByteArray::mid(int pos, int len) const } /*! + \fn QByteArray QByteArray::toLower() const + Returns a lowercase copy of the byte array. The bytearray is interpreted as a Latin-1 encoded string. @@ -2685,21 +2750,56 @@ QByteArray QByteArray::mid(int pos, int len) const \sa toUpper(), {8-bit Character Comparisons} */ -QByteArray QByteArray::toLower() const + +// noinline so that the compiler won't inline the function in each of +// toLower and toUpper when the only difference is the table being used +// (even with constant propagation, there's no gain in performance). +template <typename T> +#ifdef Q_CC_MSVC +__declspec(noinline) +#elif defined(Q_CC_GNU) +__attribute__((noinline)) +#endif +static QByteArray toCase_template(T &input, const uchar * table) { - QByteArray s(*this); - uchar *p = reinterpret_cast<uchar *>(s.data()); - uchar *e = reinterpret_cast<uchar *>(s.end()); - if (p) { - while (p != e) { - *p = QChar::toLower((ushort)*p); - p++; - } + // find the first bad character in input + const char *orig_begin = input.constBegin(); + const char *firstBad = orig_begin; + const char *e = input.constEnd(); + for ( ; firstBad != e ; ++firstBad) { + uchar ch = uchar(*firstBad); + uchar converted = table[ch]; + if (ch != converted) + break; + } + + if (firstBad == e) + return qMove(input); + + // transform the rest + QByteArray s = qMove(input); // will copy if T is const QByteArray + char *b = s.begin(); // will detach if necessary + char *p = b + (firstBad - orig_begin); + e = b + s.size(); + for ( ; p != e; ++p) { + *p = char(uchar(table[uchar(*p)])); } return s; } +QByteArray QByteArray::toLower_helper(const QByteArray &a) +{ + return toCase_template(a, latin1_lowercased); +} + +QByteArray QByteArray::toLower_helper(QByteArray &a) +{ + return toCase_template(a, latin1_lowercased); +} + /*! + \fn QByteArray QByteArray::toUpper() const + Returns an uppercase copy of the byte array. The bytearray is interpreted as a Latin-1 encoded string. @@ -2709,18 +2809,14 @@ QByteArray QByteArray::toLower() const \sa toLower(), {8-bit Character Comparisons} */ -QByteArray QByteArray::toUpper() const +QByteArray QByteArray::toUpper_helper(const QByteArray &a) { - QByteArray s(*this); - uchar *p = reinterpret_cast<uchar *>(s.data()); - uchar *e = reinterpret_cast<uchar *>(s.end()); - if (p) { - while (p != e) { - *p = QChar::toUpper((ushort)*p); - p++; - } - } - return s; + return toCase_template(a, latin1_uppercased); +} + +QByteArray QByteArray::toUpper_helper(QByteArray &a) +{ + return toCase_template(a, latin1_uppercased); } /*! \fn void QByteArray::clear() @@ -3100,6 +3196,8 @@ QDataStream &operator>>(QDataStream &in, QByteArray &ba) */ /*! + \fn QByteArray QByteArray::simplified() const + Returns a byte array that has whitespace removed from the start and the end, and which has each sequence of internal whitespace replaced with a single space. @@ -3114,32 +3212,19 @@ QDataStream &operator>>(QDataStream &in, QByteArray &ba) \sa trimmed() */ -QByteArray QByteArray::simplified() const +QByteArray QByteArray::simplified_helper(const QByteArray &a) { - if (d->size == 0) - return *this; - QByteArray result(d->size, Qt::Uninitialized); - const char *from = d->data(); - const char *fromend = from + d->size; - int outc=0; - char *to = result.d->data(); - for (;;) { - while (from!=fromend && ascii_isspace(uchar(*from))) - from++; - while (from!=fromend && !ascii_isspace(uchar(*from))) - to[outc++] = *from++; - if (from!=fromend) - to[outc++] = ' '; - else - break; - } - if (outc > 0 && to[outc-1] == ' ') - outc--; - result.resize(outc); - return result; + return QStringAlgorithms<const QByteArray>::simplified_helper(a); +} + +QByteArray QByteArray::simplified_helper(QByteArray &a) +{ + return QStringAlgorithms<QByteArray>::simplified_helper(a); } /*! + \fn QByteArray QByteArray::trimmed() const + Returns a byte array that has whitespace removed from the start and the end. @@ -3154,29 +3239,17 @@ QByteArray QByteArray::simplified() const \sa simplified() */ -QByteArray QByteArray::trimmed() const +QByteArray QByteArray::trimmed_helper(const QByteArray &a) { - if (d->size == 0) - return *this; - const char *s = d->data(); - if (!ascii_isspace(uchar(*s)) && !ascii_isspace(uchar(s[d->size-1]))) - return *this; - int start = 0; - int end = d->size - 1; - while (start<=end && ascii_isspace(uchar(s[start]))) // skip white space from start - start++; - if (start <= end) { // only white space - while (end && ascii_isspace(uchar(s[end]))) // skip white space from end - end--; - } - int l = end - start + 1; - if (l <= 0) { - QByteArrayDataPtr empty = { Data::allocate(0) }; - return QByteArray(empty); - } - return QByteArray(s+start, l); + return QStringAlgorithms<const QByteArray>::trimmed_helper(a); +} + +QByteArray QByteArray::trimmed_helper(QByteArray &a) +{ + return QStringAlgorithms<QByteArray>::trimmed_helper(a); } + /*! Returns a byte array of size \a width that contains this byte array padded by the \a fill character. |