diff options
-rw-r--r-- | src/corelib/text/qbytearray.cpp | 241 | ||||
-rw-r--r-- | src/corelib/text/qstring.cpp | 42 | ||||
-rw-r--r-- | tests/auto/corelib/text/qbytearray/tst_qbytearray.cpp | 44 | ||||
-rw-r--r-- | tests/auto/corelib/text/qstringapisymmetry/tst_qstringapisymmetry.cpp | 23 |
4 files changed, 166 insertions, 184 deletions
diff --git a/src/corelib/text/qbytearray.cpp b/src/corelib/text/qbytearray.cpp index 16635c4dd9..ae73c5977d 100644 --- a/src/corelib/text/qbytearray.cpp +++ b/src/corelib/text/qbytearray.cpp @@ -1,6 +1,6 @@ /**************************************************************************** ** -** Copyright (C) 2019 The Qt Company Ltd. +** Copyright (C) 2020 The Qt Company Ltd. ** Copyright (C) 2016 Intel Corporation. ** Copyright (C) 2019 Klarälvdalens Datakonsult AB, a KDAB Group company, info@kdab.com, author Giuseppe D'Angelo <giuseppe.dangelo@kdab.com> ** Contact: https://www.qt.io/licensing/ @@ -69,64 +69,16 @@ QT_BEGIN_NAMESPACE -// Latin 1 case system, used by QByteArray::to{Upper,Lower}() and qstr(n)icmp(): -/* -#!/usr/bin/perl -l -use feature "unicode_strings"; -for (0..255) { - $up = uc(chr($_)); - $up = chr($_) if ord($up) > 0x100 || length $up > 1; - printf "0x%02x,", ord($up); - print "" if ($_ & 0xf) == 0xf; -} -*/ -static const uchar latin1_uppercased[256] = { - 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f, - 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f, - 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2a,0x2b,0x2c,0x2d,0x2e,0x2f, - 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3a,0x3b,0x3c,0x3d,0x3e,0x3f, - 0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4a,0x4b,0x4c,0x4d,0x4e,0x4f, - 0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5a,0x5b,0x5c,0x5d,0x5e,0x5f, - 0x60,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4a,0x4b,0x4c,0x4d,0x4e,0x4f, - 0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5a,0x7b,0x7c,0x7d,0x7e,0x7f, - 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,0x88,0x89,0x8a,0x8b,0x8c,0x8d,0x8e,0x8f, - 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9a,0x9b,0x9c,0x9d,0x9e,0x9f, - 0xa0,0xa1,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,0xaa,0xab,0xac,0xad,0xae,0xaf, - 0xb0,0xb1,0xb2,0xb3,0xb4,0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0xbb,0xbc,0xbd,0xbe,0xbf, - 0xc0,0xc1,0xc2,0xc3,0xc4,0xc5,0xc6,0xc7,0xc8,0xc9,0xca,0xcb,0xcc,0xcd,0xce,0xcf, - 0xd0,0xd1,0xd2,0xd3,0xd4,0xd5,0xd6,0xd7,0xd8,0xd9,0xda,0xdb,0xdc,0xdd,0xde,0xdf, - 0xc0,0xc1,0xc2,0xc3,0xc4,0xc5,0xc6,0xc7,0xc8,0xc9,0xca,0xcb,0xcc,0xcd,0xce,0xcf, - 0xd0,0xd1,0xd2,0xd3,0xd4,0xd5,0xd6,0xf7,0xd8,0xd9,0xda,0xdb,0xdc,0xdd,0xde,0xff -}; +// ASCII case system, used by QByteArray::to{Upper,Lower}() and qstr(n)icmp(): +static constexpr inline uchar asciiUpper(uchar c) +{ + return c >= 'a' && c <= 'z' ? c & ~0x20 : c; +} -/* -#!/usr/bin/perl -l -use feature "unicode_strings"; -for (0..255) { - $up = lc(chr($_)); - $up = chr($_) if ord($up) > 0x100 || length $up > 1; - printf "0x%02x,", ord($up); - print "" if ($_ & 0xf) == 0xf; -} -*/ -static const uchar latin1_lowercased[256] = { - 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f, - 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f, - 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2a,0x2b,0x2c,0x2d,0x2e,0x2f, - 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3a,0x3b,0x3c,0x3d,0x3e,0x3f, - 0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x6b,0x6c,0x6d,0x6e,0x6f, - 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,0x5b,0x5c,0x5d,0x5e,0x5f, - 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x6b,0x6c,0x6d,0x6e,0x6f, - 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,0x7b,0x7c,0x7d,0x7e,0x7f, - 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,0x88,0x89,0x8a,0x8b,0x8c,0x8d,0x8e,0x8f, - 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9a,0x9b,0x9c,0x9d,0x9e,0x9f, - 0xa0,0xa1,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,0xaa,0xab,0xac,0xad,0xae,0xaf, - 0xb0,0xb1,0xb2,0xb3,0xb4,0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0xbb,0xbc,0xbd,0xbe,0xbf, - 0xe0,0xe1,0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,0xe9,0xea,0xeb,0xec,0xed,0xee,0xef, - 0xf0,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xd7,0xf8,0xf9,0xfa,0xfb,0xfc,0xfd,0xfe,0xdf, - 0xe0,0xe1,0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,0xe9,0xea,0xeb,0xec,0xed,0xee,0xef, - 0xf0,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,0xf8,0xf9,0xfa,0xfb,0xfc,0xfd,0xfe,0xff -}; +static constexpr inline uchar asciiLower(uchar c) +{ + return c >= 'A' && c <= 'Z' ? c | 0x20 : c; +} int qFindByteArray( const char *haystack0, int haystackLen, int from, @@ -293,8 +245,8 @@ int qstrcmp(const char *str1, const char *str2) A safe \c stricmp() function. - Compares \a str1 and \a str2 ignoring the case of the - characters. The encoding of the strings is assumed to be Latin-1. + Compares \a str1 and \a str2, ignoring differences in the case of any ASCII + characters. Returns a negative value if \a str1 is less than \a str2, 0 if \a str1 is equal to \a str2 or a positive value if \a str1 is greater @@ -323,11 +275,10 @@ int qstricmp(const char *str1, const char *str2) auto innerCompare = [=, &offset](qptrdiff max, bool unlimited) { max += offset; do { - uchar c = latin1_lowercased[s1[offset]]; - int res = c - latin1_lowercased[s2[offset]]; - if (Q_UNLIKELY(res)) + uchar c = s1[offset]; + if (int res = asciiLower(c) - asciiLower(s2[offset])) return res; - if (Q_UNLIKELY(!c)) + if (!c) return 0; ++offset; } while (unlimited || offset < max); @@ -385,9 +336,8 @@ int qstricmp(const char *str1, const char *str2) A safe \c strnicmp() function. - Compares at most \a len bytes of \a str1 and \a str2 ignoring the - case of the characters. The encoding of the strings is assumed to - be Latin-1. + Compares at most \a len bytes of \a str1 and \a str2, ignoring differences + in the case of any ASCII characters. Returns a negative value if \a str1 is less than \a str2, 0 if \a str1 is equal to \a str2 or a positive value if \a str1 is greater than \a @@ -406,12 +356,11 @@ int qstrnicmp(const char *str1, const char *str2, uint len) { const uchar *s1 = reinterpret_cast<const uchar *>(str1); const uchar *s2 = reinterpret_cast<const uchar *>(str2); - int res; - uchar c; if (!s1 || !s2) return s1 ? 1 : (s2 ? -1 : 0); - for (; len--; s1++, s2++) { - if ((res = (c = latin1_lowercased[*s1]) - latin1_lowercased[*s2])) + for (; len--; ++s1, ++s2) { + const uchar c = *s1; + if (int res = asciiLower(c) - asciiLower(*s2)) return res; if (!c) // strings are equal break; @@ -437,28 +386,23 @@ int qstrnicmp(const char *str1, qsizetype len1, const char *str2, qsizetype len2 if (!s2) return len1 == 0 ? 0 : 1; - int res; - uchar c; if (len2 == -1) { // null-terminated str2 qsizetype i; for (i = 0; i < len1; ++i) { - c = latin1_lowercased[s2[i]]; + const uchar c = s2[i]; if (!c) return 1; - res = latin1_lowercased[s1[i]] - c; - if (res) + if (int res = asciiLower(s1[i]) - asciiLower(c)) return res; } - c = latin1_lowercased[s2[i]]; - return c ? -1 : 0; + return s2[i] ? -1 : 0; } else { // not null-terminated - for (qsizetype i = 0; i < qMin(len1, len2); ++i) { - c = latin1_lowercased[s2[i]]; - res = latin1_lowercased[s1[i]] - c; - if (res) + const qsizetype len = qMin(len1, len2); + for (qsizetype i = 0; i < len; ++i) { + if (int res = asciiLower(s1[i]) - asciiLower(s2[i])) return res; } if (len1 == len2) @@ -786,14 +730,14 @@ QByteArray qUncompress(const uchar* data, int nbytes) terminator, and uses \l{implicit sharing} (copy-on-write) to reduce memory usage and avoid needless copying of data. - In addition to QByteArray, Qt also provides the QString class to - store string data. For most purposes, QString is the class you - want to use. It stores 16-bit Unicode characters, making it easy - to store non-ASCII/non-Latin-1 characters in your application. - Furthermore, QString is used throughout in the Qt API. The two - main cases where QByteArray is appropriate are when you need to - store raw binary data, and when memory conservation is critical - (e.g., with Qt for Embedded Linux). + In addition to QByteArray, Qt also provides the QString class to store + string data. For most purposes, QString is the class you want to use. It + understands its content as Unicode text (encoded using UTF-16) where + QByteArray aims to avoid assumptions about the encoding or semantics of the + bytes it stores (aside from a few legacy cases where it uses ASCII). + Furthermore, QString is used throughout in the Qt API. The two main cases + where QByteArray is appropriate are when you need to store raw binary data, + and when memory conservation is critical (e.g., with Qt for Embedded Linux). One way to initialize a QByteArray is simply to pass a \c{const char *} to its constructor. For example, the following code @@ -868,13 +812,6 @@ QByteArray qUncompress(const uchar* data, int nbytes) memory QByteArray actually allocated. Data appended to an empty array is not copied. - A frequent requirement is to remove whitespace characters from a - byte array ('\\n', '\\t', ' ', etc.). If you want to remove - whitespace from both ends of a QByteArray, use trimmed(). If you - want to remove whitespace from both ends and replace multiple - consecutive whitespaces with a single space character within the - byte array, use simplified(). - If you want to find all occurrences of a particular character or substring in a QByteArray, use indexOf() or lastIndexOf(). The former searches forward starting from a given index position, the @@ -932,29 +869,40 @@ QByteArray qUncompress(const uchar* data, int nbytes) Such considerations, the configuration of such behavior or any mitigation are outside the scope of the QByteArray API. - \section1 Notes on Locale + \section1 C locale and ASCII functions + + QByteArray generally handles data as bytes, without presuming any semantics; + where it does presume semantics, it uses the C locale and ASCII encoding. + Standard Unicode encodings are supported by QString, other encodings may be + supported using QStringEncoder and QStringDecoder to convert to Unicode. For + locale-specific interpretation of text, use QLocale or QString. + + \section2 Spacing Characters + + A frequent requirement is to remove spacing characters from a byte array + ('\\n', '\\t', ' ', etc.). If you want to remove spacing from both ends of a + QByteArray, use trimmed(). If you want to remove spacing from both ends and + replace each run of spacing characters with a single space character within + the byte array, use simplified(). Only ASCII spacing characters are + recognized for these purposes. \section2 Number-String Conversions - Functions that perform conversions between numeric data types and - strings are performed in the C locale, irrespective of the user's - locale settings. Use QString to perform locale-aware conversions - between numbers and strings. + Functions that perform conversions between numeric data types and strings + are performed in the C locale, regardless of the user's locale settings. Use + QLocale to perform locale-aware conversions between numbers and strings. - \section2 8-bit Character Comparisons + \section2 Character Case - In QByteArray, the notion of uppercase and lowercase and of which - character is greater than or less than another character is done - in the Latin-1 locale. This affects functions that support a case - insensitive option or that compare or lowercase or uppercase - their arguments. Case insensitive operations and comparisons will - be accurate if both strings contain only Latin-1 characters. - Functions that this affects include contains(), indexOf(), - lastIndexOf(), operator<(), operator<=(), operator>(), - operator>=(), isLower(), isUpper(), toLower() and toUpper(). + In QByteArray, the notion of uppercase and lowercase and of case-independent + comparison is limited to ASCII. Non-ASCII characters are treated as + caseless, since their case depends on encoding. This affects functions that + support a case insensitive option or that change the case of their + arguments. Functions that this affects include contains(), indexOf(), + lastIndexOf(), isLower(), isUpper(), toLower() and toUpper(). - This issue does not apply to \l{QString}s since they represent - characters using Unicode. + This issue does not apply to \l{QString}s since they represent characters + using Unicode. \sa QString, QBitArray */ @@ -2899,22 +2847,16 @@ bool QByteArray::endsWith(const char *str) const } /* - Returns true if \a c is an uppercase Latin1 letter. - \note The multiplication sign 0xD7 and the sz ligature 0xDF are not - treated as uppercase Latin1. + Returns true if \a c is an uppercase ASCII letter. */ -static inline bool isUpperCaseLatin1(char c) +static constexpr inline bool isUpperCaseAscii(char c) { - if (c >= 'A' && c <= 'Z') - return true; - - return (uchar(c) >= 0xC0 && uchar(c) <= 0xDE && uchar(c) != 0xD7); + return c >= 'A' && c <= 'Z'; } /*! - Returns \c true if this byte array contains only uppercase letters, - otherwise returns \c false. The byte array is interpreted as a Latin-1 - encoded string. + Returns \c true if this byte array contains only ASCII uppercase letters, + otherwise returns \c false. \since 5.12 \sa isLower(), toUpper() @@ -2927,7 +2869,7 @@ bool QByteArray::isUpper() const const char *d = data(); for (int i = 0, max = size(); i < max; ++i) { - if (!isUpperCaseLatin1(d[i])) + if (!isUpperCaseAscii(d[i])) return false; } @@ -2935,22 +2877,16 @@ bool QByteArray::isUpper() const } /* - Returns true if \a c is an lowercase Latin1 letter. - \note The division sign 0xF7 is not treated as lowercase Latin1, - but the small y dieresis 0xFF is. + Returns true if \a c is an lowercase ASCII letter. */ -static inline bool isLowerCaseLatin1(char c) +static constexpr inline bool isLowerCaseAscii(char c) { - if (c >= 'a' && c <= 'z') - return true; - - return (uchar(c) >= 0xD0 && uchar(c) != 0xF7); + return c >= 'a' && c <= 'z'; } /*! - Returns \c true if this byte array contains only lowercase letters, - otherwise returns \c false. The byte array is interpreted as a Latin-1 - encoded string. + Returns \c true if this byte array contains only lowercase ASCII letters, + otherwise returns \c false. \since 5.12 \sa isUpper(), toLower() @@ -2963,7 +2899,7 @@ bool QByteArray::isLower() const const char *d = data(); for (int i = 0, max = size(); i < max; ++i) { - if (!isLowerCaseLatin1(d[i])) + if (!isLowerCaseAscii(d[i])) return false; } @@ -3076,8 +3012,8 @@ QByteArray QByteArray::mid(int pos, int len) const /*! \fn QByteArray QByteArray::toLower() const - Returns a lowercase copy of the byte array. The bytearray is - interpreted as a Latin-1 encoded string. + Returns a copy of the byte array in which each ASCII uppercase letter + converted to lowercase. Example: \snippet code/src_corelib_text_qbytearray.cpp 30 @@ -3090,7 +3026,7 @@ QByteArray QByteArray::mid(int pos, int len) const // (even with constant propagation, there's no gain in performance). template <typename T> Q_NEVER_INLINE -static QByteArray toCase_template(T &input, const uchar * table) +static QByteArray toCase_template(T &input, uchar (*lookup)(uchar)) { // find the first bad character in input const char *orig_begin = input.constBegin(); @@ -3098,7 +3034,7 @@ static QByteArray toCase_template(T &input, const uchar * table) const char *e = input.constEnd(); for ( ; firstBad != e ; ++firstBad) { uchar ch = uchar(*firstBad); - uchar converted = table[ch]; + uchar converted = lookup(ch); if (ch != converted) break; } @@ -3111,27 +3047,26 @@ static QByteArray toCase_template(T &input, const uchar * table) char *b = s.begin(); // will detach if necessary char *p = b + (firstBad - orig_begin); e = b + s.size(); - for ( ; p != e; ++p) { - *p = char(uchar(table[uchar(*p)])); - } + for ( ; p != e; ++p) + *p = char(lookup(uchar(*p))); return s; } QByteArray QByteArray::toLower_helper(const QByteArray &a) { - return toCase_template(a, latin1_lowercased); + return toCase_template(a, asciiLower); } QByteArray QByteArray::toLower_helper(QByteArray &a) { - return toCase_template(a, latin1_lowercased); + return toCase_template(a, asciiLower); } /*! \fn QByteArray QByteArray::toUpper() const - Returns an uppercase copy of the byte array. The bytearray is - interpreted as a Latin-1 encoded string. + Returns a copy of the byte array in which each ASCII lowercase letter + converted to uppercase. Example: \snippet code/src_corelib_text_qbytearray.cpp 31 @@ -3141,12 +3076,12 @@ QByteArray QByteArray::toLower_helper(QByteArray &a) QByteArray QByteArray::toUpper_helper(const QByteArray &a) { - return toCase_template(a, latin1_uppercased); + return toCase_template(a, asciiUpper); } QByteArray QByteArray::toUpper_helper(QByteArray &a) { - return toCase_template(a, latin1_uppercased); + return toCase_template(a, asciiUpper); } /*! \fn void QByteArray::clear() @@ -4226,7 +4161,7 @@ QByteArray &QByteArray::setNum(double n, char f, int prec) QLocaleData::DoubleForm form = QLocaleData::DFDecimal; uint flags = QLocaleData::ZeroPadExponent; - char lower = latin1_lowercased[uchar(f)]; + char lower = asciiLower(uchar(f)); if (f != lower) flags |= QLocaleData::CapitalEorX; f = lower; @@ -4248,7 +4183,7 @@ QByteArray &QByteArray::setNum(double n, char f, int prec) break; } - *this = QLocaleData::c()->doubleToString(n, prec, form, -1, flags).toLatin1(); + *this = QLocaleData::c()->doubleToString(n, prec, form, -1, flags).toUtf8(); return *this; } diff --git a/src/corelib/text/qstring.cpp b/src/corelib/text/qstring.cpp index 15f19c03ab..b84262340b 100644 --- a/src/corelib/text/qstring.cpp +++ b/src/corelib/text/qstring.cpp @@ -1192,6 +1192,46 @@ static int ucstrcmp(const QChar *a, size_t alen, const char *b, size_t blen) return cmp ? cmp : lencmp(alen, blen); } +static int latin1nicmp(const char *lhsChar, int lSize, const char *rhsChar, int rSize) +{ + constexpr uchar latin1Lower[256] = { + 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f, + 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f, + 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2a,0x2b,0x2c,0x2d,0x2e,0x2f, + 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3a,0x3b,0x3c,0x3d,0x3e,0x3f, + 0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x6b,0x6c,0x6d,0x6e,0x6f, + 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,0x5b,0x5c,0x5d,0x5e,0x5f, + 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x6b,0x6c,0x6d,0x6e,0x6f, + 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,0x7b,0x7c,0x7d,0x7e,0x7f, + 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,0x88,0x89,0x8a,0x8b,0x8c,0x8d,0x8e,0x8f, + 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9a,0x9b,0x9c,0x9d,0x9e,0x9f, + 0xa0,0xa1,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,0xaa,0xab,0xac,0xad,0xae,0xaf, + 0xb0,0xb1,0xb2,0xb3,0xb4,0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0xbb,0xbc,0xbd,0xbe,0xbf, + // 0xd7 (multiplication sign) and 0xdf (sz ligature) complicate life + 0xe0,0xe1,0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,0xe9,0xea,0xeb,0xec,0xed,0xee,0xef, + 0xf0,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xd7,0xf8,0xf9,0xfa,0xfb,0xfc,0xfd,0xfe,0xdf, + 0xe0,0xe1,0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,0xe9,0xea,0xeb,0xec,0xed,0xee,0xef, + 0xf0,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,0xf8,0xf9,0xfa,0xfb,0xfc,0xfd,0xfe,0xff + }; + // We're called with QLatin1String's .data() and .size(): + Q_ASSERT(lSize >= 0 && rSize >= 0); + if (!lSize) + return rSize ? -1 : 0; + if (!rSize) + return 1; + const int size = std::min(lSize, rSize); + + const uchar *lhs = reinterpret_cast<const uchar *>(lhsChar); + const uchar *rhs = reinterpret_cast<const uchar *>(rhsChar); + Q_ASSERT(lhs && rhs); // since both lSize and rSize are positive + for (int i = 0; i < size; i++) { + Q_ASSERT(lhs[i] && rhs[i]); + if (int res = latin1Lower[lhs[i]] - latin1Lower[rhs[i]]) + return res; + } + return lencmp(lSize, rSize); +} + static int qt_compare_strings(QStringView lhs, QStringView rhs, Qt::CaseSensitivity cs) noexcept { if (cs == Qt::CaseSensitive) @@ -1218,7 +1258,7 @@ static int qt_compare_strings(QLatin1String lhs, QLatin1String rhs, Qt::CaseSens if (lhs.isEmpty()) return lencmp(0, rhs.size()); if (cs == Qt::CaseInsensitive) - return qstrnicmp(lhs.data(), lhs.size(), rhs.data(), rhs.size()); + return latin1nicmp(lhs.data(), lhs.size(), rhs.data(), rhs.size()); const auto l = std::min(lhs.size(), rhs.size()); int r = qstrncmp(lhs.data(), rhs.data(), l); return r ? r : lencmp(lhs.size(), rhs.size()); diff --git a/tests/auto/corelib/text/qbytearray/tst_qbytearray.cpp b/tests/auto/corelib/text/qbytearray/tst_qbytearray.cpp index e90f4ff14e..16e28cc1d6 100644 --- a/tests/auto/corelib/text/qbytearray/tst_qbytearray.cpp +++ b/tests/auto/corelib/text/qbytearray/tst_qbytearray.cpp @@ -176,9 +176,9 @@ QByteArray verifyZeroTermination(const QByteArray &ba) int baSize = ba.size(); char baTerminator = ba.constData()[baSize]; if ('\0' != baTerminator) - return QString::fromLatin1( - "*** Result ('%1') not null-terminated: 0x%2 ***").arg(QString::fromLatin1(ba)) - .arg(baTerminator, 2, 16, QChar('0')).toLatin1(); + return QString::fromUtf8( + "*** Result ('%1') not null-terminated: 0x%2 ***").arg(QString::fromUtf8(ba)) + .arg(baTerminator, 2, 16, QChar('0')).toUtf8(); // Skip mutating checks on shared strings if (baDataPtr->isShared()) @@ -934,30 +934,30 @@ void tst_QByteArray::qstricmp() QFETCH(QString, str1); QFETCH(QString, str2); - int expected = strcmp(str1.toUpper().toLatin1(), - str2.toUpper().toLatin1()); + int expected = strcmp(str1.toUpper().toUtf8(), + str2.toUpper().toUtf8()); if ( expected != 0 ) { expected = (expected < 0 ? -1 : 1); } - int actual = ::qstricmp(str1.toLatin1(), str2.toLatin1()); + int actual = ::qstricmp(str1.toUtf8(), str2.toUtf8()); if ( actual != 0 ) { actual = (actual < 0 ? -1 : 1); } QCOMPARE(actual, expected); - actual = ::qstricmp("012345679abcd" + str1.toLatin1(), "012345679AbCd" + str2.toLatin1()); + actual = ::qstricmp("012345679abcd" + str1.toUtf8(), "012345679AbCd" + str2.toUtf8()); if ( actual != 0 ) { actual = (actual < 0 ? -1 : 1); } QCOMPARE(actual, expected); - actual = str1.toLatin1().compare(str2.toLatin1(), Qt::CaseInsensitive); + actual = str1.toUtf8().compare(str2.toUtf8(), Qt::CaseInsensitive); if ( actual != 0 ) { actual = (actual < 0 ? -1 : 1); } QCOMPARE(actual, expected); - actual = str1.toLatin1().compare(str2.toLatin1().constData(), Qt::CaseInsensitive); + actual = str1.toUtf8().compare(str2.toUtf8().constData(), Qt::CaseInsensitive); if ( actual != 0 ) { actual = (actual < 0 ? -1 : 1); } @@ -1468,7 +1468,7 @@ void tst_QByteArray::toULong_data() QTest::addColumn<bool>("ok"); ulong LongMaxPlusOne = (ulong)LONG_MAX + 1; - QTest::newRow("LONG_MAX+1") << QString::number(LongMaxPlusOne).toLatin1() << 10 << LongMaxPlusOne << true; + QTest::newRow("LONG_MAX+1") << QString::number(LongMaxPlusOne).toUtf8() << 10 << LongMaxPlusOne << true; QTest::newRow("default") << QByteArray() << 10 << 0UL << false; QTest::newRow("empty") << QByteArray("") << 10 << 0UL << false; QTest::newRow("ulong1") << QByteArray("3234567890") << 10 << 3234567890UL << true; @@ -1990,7 +1990,7 @@ void tst_QByteArray::compareCharStar() const bool isEqual = result == 0; const bool isLess = result < 0; const bool isGreater = result > 0; - QByteArray qba = string2.toLatin1(); + QByteArray qba = string2.toUtf8(); const char *str2 = qba.constData(); if (string2.isNull()) str2 = 0; @@ -2297,6 +2297,14 @@ void tst_QByteArray::toUpperLower_data() QTest::addColumn<QByteArray>("upper"); QTest::addColumn<QByteArray>("lower"); + { + QByteArray nonAscii(128, Qt::Uninitialized); + char *data = nonAscii.data(); + for (unsigned char i = 0; i < 128; ++i) + data[i] = i + 128; + QTest::newRow("non-ASCII") << nonAscii << nonAscii << nonAscii; + } + QTest::newRow("empty") << QByteArray() << QByteArray() << QByteArray(); QTest::newRow("literal") << QByteArrayLiteral("Hello World") << QByteArrayLiteral("HELLO WORLD") @@ -2304,9 +2312,6 @@ void tst_QByteArray::toUpperLower_data() QTest::newRow("ascii") << QByteArray("Hello World, this is a STRING") << QByteArray("HELLO WORLD, THIS IS A STRING") << QByteArray("hello world, this is a string"); - QTest::newRow("latin1") << QByteArray("R\311sum\351") - << QByteArray("R\311SUM\311") - << QByteArray("r\351sum\351"); QTest::newRow("nul") << QByteArray("a\0B", 3) << QByteArray("A\0B", 3) << QByteArray("a\0b", 3); } @@ -2350,9 +2355,9 @@ void tst_QByteArray::isUpper() QVERIFY(!QByteArray().isUpper()); QVERIFY(!QByteArray("").isUpper()); QVERIFY(QByteArray("TEXT").isUpper()); - QVERIFY(QByteArray("\xD0\xDE").isUpper()); - QVERIFY(!QByteArray("\xD7").isUpper()); // multiplication sign is not upper - QVERIFY(!QByteArray("\xDF").isUpper()); // sz ligature is not upper + QVERIFY(!QByteArray("\xD0\xDE").isUpper()); // non-ASCII is neither upper nor lower + QVERIFY(!QByteArray("\xD7").isUpper()); + QVERIFY(!QByteArray("\xDF").isUpper()); QVERIFY(!QByteArray("text").isUpper()); QVERIFY(!QByteArray("Text").isUpper()); QVERIFY(!QByteArray("tExt").isUpper()); @@ -2373,8 +2378,8 @@ void tst_QByteArray::isLower() QVERIFY(!QByteArray().isLower()); QVERIFY(!QByteArray("").isLower()); QVERIFY(QByteArray("text").isLower()); - QVERIFY(QByteArray("\xE0\xFF").isLower()); - QVERIFY(!QByteArray("\xF7").isLower()); // division sign is not lower + QVERIFY(!QByteArray("\xE0\xFF").isLower()); // non-ASCII is neither upper nor lower + QVERIFY(!QByteArray("\xF7").isLower()); QVERIFY(!QByteArray("Text").isLower()); QVERIFY(!QByteArray("tExt").isLower()); QVERIFY(!QByteArray("teXt").isLower()); @@ -2416,7 +2421,6 @@ void tst_QByteArray::stdString() QVERIFY(l1str.length() < utf8str.length()); } - const char globalChar = '1'; QTEST_MAIN(tst_QByteArray) diff --git a/tests/auto/corelib/text/qstringapisymmetry/tst_qstringapisymmetry.cpp b/tests/auto/corelib/text/qstringapisymmetry/tst_qstringapisymmetry.cpp index 37cc7db841..f3a7e93be2 100644 --- a/tests/auto/corelib/text/qstringapisymmetry/tst_qstringapisymmetry.cpp +++ b/tests/auto/corelib/text/qstringapisymmetry/tst_qstringapisymmetry.cpp @@ -972,7 +972,7 @@ void tst_QStringApiSymmetry::compare_data(bool hasConceptOfNullAndEmpty) << 0 << 0; } -#define ROW(lhs, rhs) \ +#define ROW(lhs, rhs, caseless) \ do { \ static const QString pinned[] = { \ QString(QLatin1String(lhs)), \ @@ -981,16 +981,19 @@ void tst_QStringApiSymmetry::compare_data(bool hasConceptOfNullAndEmpty) QTest::newRow(qUtf8Printable(QLatin1String("'" lhs "' <> '" rhs "': "))) \ << QStringRef(&pinned[0]) << QLatin1String(lhs) \ << QStringRef(&pinned[1]) << QLatin1String(rhs) \ - << sign(qstrcmp(lhs, rhs)) << sign(qstricmp(lhs, rhs)); \ + << sign(qstrcmp(lhs, rhs)) << caseless; \ } while (false) - ROW("", "0"); - ROW("0", ""); - ROW("0", "1"); - ROW("0", "0"); - ROW("10", "0"); - ROW("01", "1"); - ROW("\xE4", "\xE4"); // ä <> ä - ROW("\xE4", "\xC4"); // ä <> Ä +#define ASCIIROW(lhs, rhs) ROW(lhs, rhs, sign(qstricmp(lhs, rhs))) + ASCIIROW("", "0"); + ASCIIROW("0", ""); + ASCIIROW("0", "1"); + ASCIIROW("0", "0"); + ASCIIROW("10", "0"); + ASCIIROW("01", "1"); + ASCIIROW("e", "e"); + ASCIIROW("e", "E"); + ROW("\xE4", "\xE4", 0); // ä <> ä + ROW("\xE4", "\xC4", 0); // ä <> Ä #undef ROW } |