diff options
author | Kurt Pattyn <pattyn.kurt@gmail.com> | 2013-10-06 11:40:47 +0200 |
---|---|---|
committer | The Qt Project <gerrit-noreply@qt-project.org> | 2013-10-17 09:50:58 +0200 |
commit | add2bf739ae96603cb919b908cbb53c00d0628cc (patch) | |
tree | 9702a95d145fc9f429aa6f2ec104cfab75cae753 /src/corelib | |
parent | e8853506bf82e569009e68a23437d6a134176f63 (diff) |
Allow non-character codes in utf8 strings
Changed the processing of non-character code handling in the UTF8 codec.
Non-character codes are now accepted in QStrings, QUrls and QJson strings.
Unit tests were adapted accordingly.
For more info about non-character codes,
see: http://www.unicode.org/versions/corrigendum9.html
[ChangeLog][QtCore][QUtf8]
UTF-8 now accepts non-character unicode points; these are not replaced
by the replacement character anymore
[ChangeLog][QtCore][QUrl]
QUrl now fully accepts non-character unicode points; they are encoded as
percent characters; they can also be pretty decoded
[ChangeLog][QtCore][QJson]
The Writer and the Parser now fully accept non-character unicode points.
Change-Id: I77cf4f0e6210741eac8082912a0b6118eced4f77
Task-number: QTBUG-33229
Reviewed-by: Lars Knoll <lars.knoll@digia.com>
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
Diffstat (limited to 'src/corelib')
-rw-r--r-- | src/corelib/codecs/qutfcodec.cpp | 13 | ||||
-rw-r--r-- | src/corelib/io/qurlrecode.cpp | 2 | ||||
-rw-r--r-- | src/corelib/json/qjsonparser.cpp | 2 | ||||
-rw-r--r-- | src/corelib/json/qjsonwriter.cpp | 7 |
4 files changed, 4 insertions, 20 deletions
diff --git a/src/corelib/codecs/qutfcodec.cpp b/src/corelib/codecs/qutfcodec.cpp index aeedcf1aa1..e425f8634c 100644 --- a/src/corelib/codecs/qutfcodec.cpp +++ b/src/corelib/codecs/qutfcodec.cpp @@ -106,14 +106,6 @@ QByteArray QUtf8::convertFromUnicode(const QChar *uc, int len, QTextCodec::Conve if (u < 0x0800) { *cursor++ = 0xc0 | ((uchar) (u >> 6)); } else { - // is it one of the Unicode non-characters? - if (QChar::isNonCharacter(u)) { - *cursor++ = replacement; - ++ch; - ++invalid; - continue; - } - if (QChar::requiresSurrogates(u)) { *cursor++ = 0xf0 | ((uchar) (u >> 18)); *cursor++ = 0x80 | (((uchar) (u >> 12)) & 0x3f); @@ -180,15 +172,14 @@ QString QUtf8::convertToUnicode(const char *chars, int len, QTextCodec::Converte --need; if (!need) { // utf-8 bom composes into 0xfeff code point - bool nonCharacter; if (!headerdone && uc == 0xfeff) { // don't do anything, just skip the BOM - } else if (!(nonCharacter = QChar::isNonCharacter(uc)) && QChar::requiresSurrogates(uc) && uc <= QChar::LastValidCodePoint) { + } else if (QChar::requiresSurrogates(uc) && uc <= QChar::LastValidCodePoint) { // surrogate pair Q_ASSERT((qch - (ushort*)result.unicode()) + 2 < result.length()); *qch++ = QChar::highSurrogate(uc); *qch++ = QChar::lowSurrogate(uc); - } else if ((uc < min_uc) || QChar::isSurrogate(uc) || nonCharacter || uc > QChar::LastValidCodePoint) { + } else if ((uc < min_uc) || QChar::isSurrogate(uc) || uc > QChar::LastValidCodePoint) { // error: overlong sequence, UTF16 surrogate or non-character *qch++ = replacement; ++invalid; diff --git a/src/corelib/io/qurlrecode.cpp b/src/corelib/io/qurlrecode.cpp index 7e77b9c251..ba1a77744c 100644 --- a/src/corelib/io/qurlrecode.cpp +++ b/src/corelib/io/qurlrecode.cpp @@ -304,7 +304,7 @@ static bool encodedUtf8ToUtf16(QString &result, ushort *&output, const ushort *b // we've decoded something; safety-check it if (uc < min_uc) return false; - if (QChar::isSurrogate(uc) || QChar::isNonCharacter(uc) || uc > QChar::LastValidCodePoint) + if (QChar::isSurrogate(uc) || uc > QChar::LastValidCodePoint) return false; if (!QChar::requiresSurrogates(uc)) { diff --git a/src/corelib/json/qjsonparser.cpp b/src/corelib/json/qjsonparser.cpp index 8721f06064..516c53775c 100644 --- a/src/corelib/json/qjsonparser.cpp +++ b/src/corelib/json/qjsonparser.cpp @@ -853,7 +853,7 @@ static inline bool scanUtf8Char(const char *&json, const char *end, uint *result uc = (uc << 6) | (ch & 0x3f); } - if (uc < min_uc || QChar::isNonCharacter(uc) || + if (uc < min_uc || QChar::isSurrogate(uc) || uc > QChar::LastValidCodePoint) { return false; } diff --git a/src/corelib/json/qjsonwriter.cpp b/src/corelib/json/qjsonwriter.cpp index 8426b351f6..86cca4bb26 100644 --- a/src/corelib/json/qjsonwriter.cpp +++ b/src/corelib/json/qjsonwriter.cpp @@ -138,13 +138,6 @@ static QByteArray escapedString(const QString &s) if (u < 0x0800) { *cursor++ = 0xc0 | ((uchar) (u >> 6)); } else { - // is it one of the Unicode non-characters? - if (QChar::isNonCharacter(u)) { - *cursor++ = replacement; - ++ch; - continue; - } - if (QChar::requiresSurrogates(u)) { *cursor++ = 0xf0 | ((uchar) (u >> 18)); *cursor++ = 0x80 | (((uchar) (u >> 12)) & 0x3f); |