summaryrefslogtreecommitdiffstats
path: root/src/corelib/codecs/qutfcodec.cpp
diff options
context:
space:
mode:
authorKurt Pattyn <pattyn.kurt@gmail.com>2013-10-06 11:40:47 +0200
committerThe Qt Project <gerrit-noreply@qt-project.org>2013-10-17 09:50:58 +0200
commitadd2bf739ae96603cb919b908cbb53c00d0628cc (patch)
tree9702a95d145fc9f429aa6f2ec104cfab75cae753 /src/corelib/codecs/qutfcodec.cpp
parente8853506bf82e569009e68a23437d6a134176f63 (diff)
Allow non-character codes in utf8 strings
Changed the processing of non-character code handling in the UTF8 codec. Non-character codes are now accepted in QStrings, QUrls and QJson strings. Unit tests were adapted accordingly. For more info about non-character codes, see: http://www.unicode.org/versions/corrigendum9.html [ChangeLog][QtCore][QUtf8] UTF-8 now accepts non-character unicode points; these are not replaced by the replacement character anymore [ChangeLog][QtCore][QUrl] QUrl now fully accepts non-character unicode points; they are encoded as percent characters; they can also be pretty decoded [ChangeLog][QtCore][QJson] The Writer and the Parser now fully accept non-character unicode points. Change-Id: I77cf4f0e6210741eac8082912a0b6118eced4f77 Task-number: QTBUG-33229 Reviewed-by: Lars Knoll <lars.knoll@digia.com> Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
Diffstat (limited to 'src/corelib/codecs/qutfcodec.cpp')
-rw-r--r--src/corelib/codecs/qutfcodec.cpp13
1 files changed, 2 insertions, 11 deletions
diff --git a/src/corelib/codecs/qutfcodec.cpp b/src/corelib/codecs/qutfcodec.cpp
index aeedcf1aa1..e425f8634c 100644
--- a/src/corelib/codecs/qutfcodec.cpp
+++ b/src/corelib/codecs/qutfcodec.cpp
@@ -106,14 +106,6 @@ QByteArray QUtf8::convertFromUnicode(const QChar *uc, int len, QTextCodec::Conve
if (u < 0x0800) {
*cursor++ = 0xc0 | ((uchar) (u >> 6));
} else {
- // is it one of the Unicode non-characters?
- if (QChar::isNonCharacter(u)) {
- *cursor++ = replacement;
- ++ch;
- ++invalid;
- continue;
- }
-
if (QChar::requiresSurrogates(u)) {
*cursor++ = 0xf0 | ((uchar) (u >> 18));
*cursor++ = 0x80 | (((uchar) (u >> 12)) & 0x3f);
@@ -180,15 +172,14 @@ QString QUtf8::convertToUnicode(const char *chars, int len, QTextCodec::Converte
--need;
if (!need) {
// utf-8 bom composes into 0xfeff code point
- bool nonCharacter;
if (!headerdone && uc == 0xfeff) {
// don't do anything, just skip the BOM
- } else if (!(nonCharacter = QChar::isNonCharacter(uc)) && QChar::requiresSurrogates(uc) && uc <= QChar::LastValidCodePoint) {
+ } else if (QChar::requiresSurrogates(uc) && uc <= QChar::LastValidCodePoint) {
// surrogate pair
Q_ASSERT((qch - (ushort*)result.unicode()) + 2 < result.length());
*qch++ = QChar::highSurrogate(uc);
*qch++ = QChar::lowSurrogate(uc);
- } else if ((uc < min_uc) || QChar::isSurrogate(uc) || nonCharacter || uc > QChar::LastValidCodePoint) {
+ } else if ((uc < min_uc) || QChar::isSurrogate(uc) || uc > QChar::LastValidCodePoint) {
// error: overlong sequence, UTF16 surrogate or non-character
*qch++ = replacement;
++invalid;