diff options
Diffstat (limited to 'src/corelib')
-rw-r--r-- | src/corelib/codecs/qutfcodec.cpp | 7 | ||||
-rw-r--r-- | src/corelib/io/qurlrecode.cpp | 15 | ||||
-rw-r--r-- | src/corelib/json/qjsonparser.cpp | 5 | ||||
-rw-r--r-- | src/corelib/json/qjsonwriter.cpp | 3 | ||||
-rw-r--r-- | src/corelib/tools/qchar.cpp | 100 | ||||
-rw-r--r-- | src/corelib/tools/qchar.h | 19 | ||||
-rw-r--r-- | src/corelib/tools/qunicodetables_p.h | 14 | ||||
-rw-r--r-- | src/corelib/xml/qxmlstream.cpp | 4 |
8 files changed, 96 insertions, 71 deletions
diff --git a/src/corelib/codecs/qutfcodec.cpp b/src/corelib/codecs/qutfcodec.cpp index c3d9dbbd31..0fb4bb6761 100644 --- a/src/corelib/codecs/qutfcodec.cpp +++ b/src/corelib/codecs/qutfcodec.cpp @@ -43,7 +43,6 @@ #include "qlist.h" #include "qendian.h" #include "qchar.h" -#include <private/qunicodetables_p.h> QT_BEGIN_NAMESPACE @@ -108,7 +107,7 @@ QByteArray QUtf8::convertFromUnicode(const QChar *uc, int len, QTextCodec::Conve *cursor++ = 0xc0 | ((uchar) (u >> 6)); } else { // is it one of the Unicode non-characters? - if (QUnicodeTables::isNonCharacter(u)) { + if (QChar::isNonCharacter(u)) { *cursor++ = replacement; ++ch; ++invalid; @@ -184,12 +183,12 @@ QString QUtf8::convertToUnicode(const char *chars, int len, QTextCodec::Converte bool nonCharacter; if (!headerdone && uc == 0xfeff) { // don't do anything, just skip the BOM - } else if (!(nonCharacter = QUnicodeTables::isNonCharacter(uc)) && QChar::requiresSurrogates(uc) && uc < 0x110000) { + } else if (!(nonCharacter = QChar::isNonCharacter(uc)) && QChar::requiresSurrogates(uc) && uc <= QChar::LastValidCodePoint) { // surrogate pair Q_ASSERT((qch - (ushort*)result.unicode()) + 2 < result.length()); *qch++ = QChar::highSurrogate(uc); *qch++ = QChar::lowSurrogate(uc); - } else if ((uc < min_uc) || (uc >= 0xd800 && uc <= 0xdfff) || nonCharacter || uc >= 0x110000) { + } else if ((uc < min_uc) || QChar::isSurrogate(uc) || nonCharacter || uc > QChar::LastValidCodePoint) { // error: overlong sequence, UTF16 surrogate or non-character *qch++ = replacement; ++invalid; diff --git a/src/corelib/io/qurlrecode.cpp b/src/corelib/io/qurlrecode.cpp index de9c5aa7db..3d985dbdc0 100644 --- a/src/corelib/io/qurlrecode.cpp +++ b/src/corelib/io/qurlrecode.cpp @@ -285,19 +285,6 @@ static void ensureDetached(QString &result, ushort *&output, const ushort *begin } } -static inline bool isUnicodeNonCharacter(uint ucs4) -{ - // Unicode has a couple of "non-characters" that one can use internally, - // but are not allowed to be used for text interchange. - // - // Those are the last two entries each Unicode Plane (U+FFFE, U+FFFF, - // U+1FFFE, U+1FFFF, etc.) as well as the entries between U+FDD0 and - // U+FDEF (inclusive) - - return (ucs4 & 0xfffe) == 0xfffe - || (ucs4 - 0xfdd0U) < 16; -} - // returns true if we performed an UTF-8 decoding static bool encodedUtf8ToUtf16(QString &result, ushort *&output, const ushort *begin, const ushort *&input, const ushort *end, ushort decoded) @@ -370,7 +357,7 @@ static bool encodedUtf8ToUtf16(QString &result, ushort *&output, const ushort *b // we've decoded something; safety-check it if (uc < min_uc) return false; - if (isUnicodeNonCharacter(uc) || (uc >= 0xD800 && uc <= 0xDFFF) || uc >= 0x110000) + if (QChar::isSurrogate(uc) || QChar::isNonCharacter(uc) || uc > QChar::LastValidCodePoint) return false; if (!QChar::requiresSurrogates(uc)) { diff --git a/src/corelib/json/qjsonparser.cpp b/src/corelib/json/qjsonparser.cpp index 5fecb8d4e7..6706f12b24 100644 --- a/src/corelib/json/qjsonparser.cpp +++ b/src/corelib/json/qjsonparser.cpp @@ -45,7 +45,6 @@ #include <qdebug.h> #include "qjsonparser_p.h" #include "qjson_p.h" -#include <private/qunicodetables_p.h> //#define PARSER_DEBUG #ifdef PARSER_DEBUG @@ -769,8 +768,8 @@ static inline bool scanUtf8Char(const char *&json, const char *end, uint *result uc = (uc << 6) | (ch & 0x3f); } - if (uc < min_uc || QUnicodeTables::isNonCharacter(uc) || - (uc >= 0xd800 && uc <= 0xdfff) || uc >= 0x110000) { + if (uc < min_uc || QChar::isNonCharacter(uc) || + QChar::isSurrogate(uc) || uc > QChar::LastValidCodePoint) { return false; } diff --git a/src/corelib/json/qjsonwriter.cpp b/src/corelib/json/qjsonwriter.cpp index b086cbdea9..c591657f4c 100644 --- a/src/corelib/json/qjsonwriter.cpp +++ b/src/corelib/json/qjsonwriter.cpp @@ -41,7 +41,6 @@ #include "qjsonwriter_p.h" #include "qjson_p.h" -#include <private/qunicodetables_p.h> QT_BEGIN_NAMESPACE @@ -140,7 +139,7 @@ static QByteArray escapedString(const QString &s) *cursor++ = 0xc0 | ((uchar) (u >> 6)); } else { // is it one of the Unicode non-characters? - if (QUnicodeTables::isNonCharacter(u)) { + if (QChar::isNonCharacter(u)) { *cursor++ = replacement; ++ch; continue; diff --git a/src/corelib/tools/qchar.cpp b/src/corelib/tools/qchar.cpp index 138c3a69ee..9c5a515dd7 100644 --- a/src/corelib/tools/qchar.cpp +++ b/src/corelib/tools/qchar.cpp @@ -377,6 +377,7 @@ QT_BEGIN_NAMESPACE \value ByteOrderSwapped \value ParagraphSeparator \value LineSeparator + \value LastValidCodePoint */ /*! @@ -499,7 +500,7 @@ QT_BEGIN_NAMESPACE */ bool QChar::isPrint(uint ucs4) { - if (ucs4 > UNICODE_LAST_CODEPOINT) + if (ucs4 > LastValidCodePoint) return false; const int test = FLAG(Other_Control) | FLAG(Other_Format) | @@ -532,7 +533,7 @@ bool QChar::isPrint(uint ucs4) */ bool QT_FASTCALL QChar::isSpace_helper(uint ucs4) { - if (ucs4 > UNICODE_LAST_CODEPOINT) + if (ucs4 > LastValidCodePoint) return false; const int test = FLAG(Separator_Space) | FLAG(Separator_Line) | @@ -558,7 +559,7 @@ bool QT_FASTCALL QChar::isSpace_helper(uint ucs4) */ bool QChar::isMark(uint ucs4) { - if (ucs4 > UNICODE_LAST_CODEPOINT) + if (ucs4 > LastValidCodePoint) return false; const int test = FLAG(Mark_NonSpacing) | FLAG(Mark_SpacingCombining) | @@ -582,7 +583,7 @@ bool QChar::isMark(uint ucs4) */ bool QChar::isPunct(uint ucs4) { - if (ucs4 > UNICODE_LAST_CODEPOINT) + if (ucs4 > LastValidCodePoint) return false; const int test = FLAG(Punctuation_Connector) | FLAG(Punctuation_Dash) | @@ -610,7 +611,7 @@ bool QChar::isPunct(uint ucs4) */ bool QChar::isSymbol(uint ucs4) { - if (ucs4 > UNICODE_LAST_CODEPOINT) + if (ucs4 > LastValidCodePoint) return false; const int test = FLAG(Symbol_Math) | FLAG(Symbol_Currency) | @@ -640,7 +641,7 @@ bool QChar::isSymbol(uint ucs4) */ bool QT_FASTCALL QChar::isLetter_helper(uint ucs4) { - if (ucs4 > UNICODE_LAST_CODEPOINT) + if (ucs4 > LastValidCodePoint) return false; const int test = FLAG(Letter_Uppercase) | FLAG(Letter_Lowercase) | @@ -675,7 +676,7 @@ bool QT_FASTCALL QChar::isLetter_helper(uint ucs4) */ bool QT_FASTCALL QChar::isNumber_helper(uint ucs4) { - if (ucs4 > UNICODE_LAST_CODEPOINT) + if (ucs4 > LastValidCodePoint) return false; const int test = FLAG(Number_DecimalDigit) | FLAG(Number_Letter) | @@ -704,7 +705,7 @@ bool QT_FASTCALL QChar::isNumber_helper(uint ucs4) */ bool QT_FASTCALL QChar::isLetterOrNumber_helper(uint ucs4) { - if (ucs4 > UNICODE_LAST_CODEPOINT) + if (ucs4 > LastValidCodePoint) return false; const int test = FLAG(Letter_Uppercase) | FLAG(Letter_Lowercase) | @@ -738,17 +739,54 @@ bool QT_FASTCALL QChar::isLetterOrNumber_helper(uint ucs4) */ /*! + \fn bool QChar::isNonCharacter() const + \since 5.0 + + Returns true if the QChar is a non-character; false otherwise. + + Unicode has a certain number of code points that are classified + as "non-characters:" that is, they can be used for internal purposes + in applications but cannot be used for text interchange. + Those are the last two entries each Unicode Plane ([0xfffe..0xffff], + [0x1fffe..0x1ffff], etc.) as well as the entries in range [0xfdd0..0xfdef]. +*/ + +/*! \fn bool QChar::isHighSurrogate() const Returns true if the QChar is the high part of a UTF16 surrogate - (i.e. if it's code point in range [0xd800..0xdbff]). + (i.e. if its code point is in range [0xd800..0xdbff]); false otherwise. */ /*! \fn bool QChar::isLowSurrogate() const Returns true if the QChar is the low part of a UTF16 surrogate - (i.e. if it's code point in range [0xdc00..0xdfff]). + (i.e. if its code point is in range [0xdc00..0xdfff]); false otherwise. +*/ + +/*! + \fn bool QChar::isSurrogate() const + \since 5.0 + + Returns true if the QChar contains a code point that is in either + the high or the low part of the UTF-16 surrogate range + (i.e. if its code point is in range [0xd800..0xdfff]); false otherwise. +*/ + +/*! + \fn static bool isNonCharacter(uint ucs4) + \overload + \since 5.0 + + Returns true if the UCS-4-encoded character specified by \a ucs4 + is a non-character; false otherwise. + + Unicode has a certain number of code points that are classified + as "non-characters:" that is, they can be used for internal purposes + in applications but cannot be used for text interchange. + Those are the last two entries each Unicode Plane ([0xfffe..0xffff], + [0x1fffe..0x1ffff], etc.) as well as the entries in range [0xfdd0..0xfdef]. */ /*! @@ -757,7 +795,7 @@ bool QT_FASTCALL QChar::isLetterOrNumber_helper(uint ucs4) Returns true if the UCS-4-encoded character specified by \a ucs4 is the high part of a UTF16 surrogate - (i.e. if it's code point in range [0xd800..0xdbff]). + (i.e. if its code point is in range [0xd800..0xdbff]); false otherwise. */ /*! @@ -766,7 +804,18 @@ bool QT_FASTCALL QChar::isLetterOrNumber_helper(uint ucs4) Returns true if the UCS-4-encoded character specified by \a ucs4 is the low part of a UTF16 surrogate - (i.e. if it's code point in range [0xdc00..0xdfff]). + (i.e. if its code point is in range [0xdc00..0xdfff]); false otherwise. +*/ + +/*! + \fn static bool QChar::isSurrogate(uint ucs4) + \overload + \since 5.0 + + Returns true if the UCS-4-encoded character specified by \a ucs4 + contains a code point that is in either the high or the low part of the + UTF-16 surrogate range (i.e. if its code point is in range [0xd800..0xdfff]); + false otherwise. */ /*! @@ -774,7 +823,8 @@ bool QT_FASTCALL QChar::isLetterOrNumber_helper(uint ucs4) Returns true if the UCS-4-encoded character specified by \a ucs4 can be split into the high and low parts of a UTF16 surrogate - (i.e. if it's code point is greater than or equals to 0x10000). + (i.e. if its code point is greater than or equals to 0x10000); + false otherwise. */ /*! @@ -818,7 +868,7 @@ bool QT_FASTCALL QChar::isLetterOrNumber_helper(uint ucs4) */ int QChar::digitValue(uint ucs4) { - if (ucs4 > UNICODE_LAST_CODEPOINT) + if (ucs4 > LastValidCodePoint) return -1; return qGetProp(ucs4)->digitValue; } @@ -835,7 +885,7 @@ int QChar::digitValue(uint ucs4) */ QChar::Category QChar::category(uint ucs4) { - if (ucs4 > UNICODE_LAST_CODEPOINT) + if (ucs4 > LastValidCodePoint) return QChar::Other_NotAssigned; return (QChar::Category) qGetProp(ucs4)->category; } @@ -852,7 +902,7 @@ QChar::Category QChar::category(uint ucs4) */ QChar::Direction QChar::direction(uint ucs4) { - if (ucs4 > UNICODE_LAST_CODEPOINT) + if (ucs4 > LastValidCodePoint) return QChar::DirL; return (QChar::Direction) qGetProp(ucs4)->direction; } @@ -871,7 +921,7 @@ QChar::Direction QChar::direction(uint ucs4) */ QChar::Joining QChar::joining(uint ucs4) { - if (ucs4 > UNICODE_LAST_CODEPOINT) + if (ucs4 > LastValidCodePoint) return QChar::OtherJoining; return (QChar::Joining) qGetProp(ucs4)->joining; } @@ -900,7 +950,7 @@ QChar::Joining QChar::joining(uint ucs4) */ bool QChar::hasMirrored(uint ucs4) { - if (ucs4 > UNICODE_LAST_CODEPOINT) + if (ucs4 > LastValidCodePoint) return false; return qGetProp(ucs4)->mirrorDiff != 0; } @@ -950,7 +1000,7 @@ bool QChar::hasMirrored(uint ucs4) */ uint QChar::mirroredChar(uint ucs4) { - if (ucs4 > UNICODE_LAST_CODEPOINT) + if (ucs4 > LastValidCodePoint) return ucs4; return ucs4 + qGetProp(ucs4)->mirrorDiff; } @@ -1060,7 +1110,7 @@ QChar::Decomposition QChar::decompositionTag(uint ucs4) */ unsigned char QChar::combiningClass(uint ucs4) { - if (ucs4 > UNICODE_LAST_CODEPOINT) + if (ucs4 > LastValidCodePoint) return 0; return (unsigned char) qGetProp(ucs4)->combiningClass; } @@ -1078,7 +1128,7 @@ unsigned char QChar::combiningClass(uint ucs4) */ QChar::UnicodeVersion QChar::unicodeVersion(uint ucs4) { - if (ucs4 > UNICODE_LAST_CODEPOINT) + if (ucs4 > LastValidCodePoint) return QChar::Unicode_Unassigned; return (QChar::UnicodeVersion) qGetProp(ucs4)->unicodeVersion; } @@ -1155,7 +1205,7 @@ static inline T toCaseFolded_helper(T uc) */ uint QChar::toLower(uint ucs4) { - if (ucs4 > UNICODE_LAST_CODEPOINT) + if (ucs4 > LastValidCodePoint) return ucs4; return toLowerCase_helper<uint>(ucs4); } @@ -1175,7 +1225,7 @@ uint QChar::toLower(uint ucs4) */ uint QChar::toUpper(uint ucs4) { - if (ucs4 > UNICODE_LAST_CODEPOINT) + if (ucs4 > LastValidCodePoint) return ucs4; return toUpperCase_helper<uint>(ucs4); } @@ -1195,7 +1245,7 @@ uint QChar::toUpper(uint ucs4) */ uint QChar::toTitleCase(uint ucs4) { - if (ucs4 > UNICODE_LAST_CODEPOINT) + if (ucs4 > LastValidCodePoint) return ucs4; return toTitleCase_helper<uint>(ucs4); } @@ -1236,7 +1286,7 @@ static inline ushort foldCase(ushort ch) */ uint QChar::toCaseFolded(uint ucs4) { - if (ucs4 > UNICODE_LAST_CODEPOINT) + if (ucs4 > LastValidCodePoint) return ucs4; return toCaseFolded_helper<uint>(ucs4); } diff --git a/src/corelib/tools/qchar.h b/src/corelib/tools/qchar.h index 07333c9535..6c423859ec 100644 --- a/src/corelib/tools/qchar.h +++ b/src/corelib/tools/qchar.h @@ -73,7 +73,8 @@ public: ByteOrderMark = 0xfeff, ByteOrderSwapped = 0xfffe, ParagraphSeparator = 0x2029, - LineSeparator = 0x2028 + LineSeparator = 0x2028, + LastValidCodePoint = 0x10ffff }; Q_DECL_CONSTEXPR QChar() : ucs(0) {} @@ -245,24 +246,28 @@ public: inline bool isUpper() const { return QChar::isUpper(ucs); } inline bool isTitleCase() const { return QChar::isTitleCase(ucs); } - inline bool isHighSurrogate() const { - return ((ucs & 0xfc00) == 0xd800); - } - inline bool isLowSurrogate() const { - return ((ucs & 0xfc00) == 0xdc00); - } + inline bool isNonCharacter() const { return QChar::isNonCharacter(ucs); } + inline bool isHighSurrogate() const { return QChar::isHighSurrogate(ucs); } + inline bool isLowSurrogate() const { return QChar::isLowSurrogate(ucs); } + inline bool isSurrogate() const { return QChar::isSurrogate(ucs); } inline uchar cell() const { return uchar(ucs & 0xff); } inline uchar row() const { return uchar((ucs>>8)&0xff); } inline void setCell(uchar cell); inline void setRow(uchar row); + static inline bool isNonCharacter(uint ucs4) { + return ucs4 >= 0xfdd0 && (ucs4 <= 0xfdef || (ucs4 & 0xfffe) == 0xfffe); + } static inline bool isHighSurrogate(uint ucs4) { return ((ucs4 & 0xfffffc00) == 0xd800); } static inline bool isLowSurrogate(uint ucs4) { return ((ucs4 & 0xfffffc00) == 0xdc00); } + static inline bool isSurrogate(uint ucs4) { + return (ucs4 - 0xd800u < 2048u); + } static inline bool requiresSurrogates(uint ucs4) { return (ucs4 >= 0x10000); } diff --git a/src/corelib/tools/qunicodetables_p.h b/src/corelib/tools/qunicodetables_p.h index 15d5415b0b..293f03b94f 100644 --- a/src/corelib/tools/qunicodetables_p.h +++ b/src/corelib/tools/qunicodetables_p.h @@ -61,8 +61,6 @@ QT_BEGIN_NAMESPACE #define UNICODE_DATA_VERSION QChar::Unicode_5_0 -#define UNICODE_LAST_CODEPOINT 0x10ffff - namespace QUnicodeTables { struct Properties { @@ -237,18 +235,6 @@ namespace QUnicodeTables { inline int script(QChar ch) { return script(ch.unicode()); } - - inline bool isNonCharacter(uint ucs4) - { - // Noncharacter_Code_Point: - // Unicode has a couple of "non-characters" that one can use internally, - // but are not allowed to be used for text interchange. - // Those are the last two entries each Unicode Plane (U+FFFE..U+FFFF, - // U+1FFFE..U+1FFFF, etc.) as well as the entries in range U+FDD0..U+FDEF - - return ucs4 >= 0xfdd0 && (ucs4 <= 0xfdef || (ucs4 & 0xfffe) == 0xfffe); - } - } // namespace QUnicodeTables QT_END_NAMESPACE diff --git a/src/corelib/xml/qxmlstream.cpp b/src/corelib/xml/qxmlstream.cpp index 769cdc9495..b37675fea9 100644 --- a/src/corelib/xml/qxmlstream.cpp +++ b/src/corelib/xml/qxmlstream.cpp @@ -986,7 +986,7 @@ bool QXmlStreamReaderPrivate::scanUntil(const char *str, short tokenToInject) textBuffer += QChar(c); continue; default: - if(c < 0x20 || (c > 0xFFFD && c < 0x10000) || c > 0x10FFFF ) { + if (c < 0x20 || (c > 0xFFFD && c < 0x10000) || c > QChar::LastValidCodePoint ) { raiseWellFormedError(QXmlStream::tr("Invalid XML character.")); lineNumber = oldLineNumber; return false; @@ -1718,7 +1718,7 @@ uint QXmlStreamReaderPrivate::resolveCharRef(int symbolIndex) s = symString(symbolIndex).toString().toUInt(&ok, 10); ok &= (s == 0x9 || s == 0xa || s == 0xd || (s >= 0x20 && s <= 0xd7ff) - || (s >= 0xe000 && s <= 0xfffd) || (s >= 0x10000 && s <= 0x10ffff)); + || (s >= 0xe000 && s <= 0xfffd) || (s >= 0x10000 && s <= QChar::LastValidCodePoint)); return ok ? s : 0; } |