summaryrefslogtreecommitdiffstats
path: root/src/corelib/codecs
diff options
context:
space:
mode:
authorQt Continuous Integration System <qt-info@nokia.com>2010-03-10 07:53:20 +0100
committerQt Continuous Integration System <qt-info@nokia.com>2010-03-10 07:53:20 +0100
commit12d63b05a942281e688d857d17c190564c77b698 (patch)
treea42f444e588074d484875bfbbaf186ff7934bf3b /src/corelib/codecs
parent4e8b2039ecbbda87b5c23cc0a574b7accfc2ad33 (diff)
parentcbda8ea64d8492d7c8f125cc1b43396b91ecdd8e (diff)
Merge branch '4.7-cutoff' of scm.dev.nokia.troll.no:qt/oslo-staging-1 into 4.7-integration
* '4.7-cutoff' of scm.dev.nokia.troll.no:qt/oslo-staging-1: (83 commits) Don't wait forever when scanning for wifi networks on win32 fails. Revert 12b6987031be9faee3886d7623888feb4e1762af Changed TEST_COMPILER from CC to CXX in configure script. doc: Fixed several qdoc errors. Carbon: Native filedialog does not apply filters on app-bundles Cocoa: Native filedialog does not apply filters on app-bundles Improve performance of QTimer::singleShot Add a benchmark comparing single shot timer with invokeMethod use Qt's private mac functions, reduce code redundancy Assistant: Fix compile warning for empty header. Doc: mark QEasingCurve support functions as new in 4.7. Modify the XML test suite not to use non-characters. Autotest: Fix failing QTextCodec tests Doc: mark methods as internal (as they were in previous releases) Doc: add image for Qt Quick to "What's New" page. qdoc: Clear a static multimap after each qdocconf file. qdoc: Added some debug output to track down a crash Fixed mouse wheel handling in scrollareas. Added two missing keys for X11 Carbon: usage of menu bars can cause exceptions to be thrown ...
Diffstat (limited to 'src/corelib/codecs')
-rw-r--r--src/corelib/codecs/qutfcodec.cpp39
1 files changed, 27 insertions, 12 deletions
diff --git a/src/corelib/codecs/qutfcodec.cpp b/src/corelib/codecs/qutfcodec.cpp
index 7655c510ae..f747bf754f 100644
--- a/src/corelib/codecs/qutfcodec.cpp
+++ b/src/corelib/codecs/qutfcodec.cpp
@@ -48,6 +48,19 @@ QT_BEGIN_NAMESPACE
enum { Endian = 0, Data = 1 };
+static inline bool isUnicodeNonCharacter(uint ucs4)
+{
+ // Unicode has a couple of "non-characters" that one can use internally,
+ // but are not allowed to be used for text interchange.
+ //
+ // Those are the last two entries each Unicode Plane (U+FFFE, U+FFFF,
+ // U+1FFFE, U+1FFFF, etc.) as well as the entries between U+FDD0 and
+ // U+FDEF (inclusive)
+
+ return (ucs4 & 0xfffe) == 0xfffe
+ || (ucs4 - 0xfdd0U) < 16;
+}
+
QByteArray QUtf8::convertFromUnicode(const QChar *uc, int len, QTextCodec::ConverterState *state)
{
uchar replacement = '?';
@@ -106,16 +119,17 @@ QByteArray QUtf8::convertFromUnicode(const QChar *uc, int len, QTextCodec::Conve
if (u < 0x0800) {
*cursor++ = 0xc0 | ((uchar) (u >> 6));
} else {
+ // is it one of the Unicode non-characters?
+ if (isUnicodeNonCharacter(u)) {
+ *cursor++ = replacement;
+ ++ch;
+ ++invalid;
+ continue;
+ }
+
if (u > 0xffff) {
- // see QString::fromUtf8() and QString::utf8() for explanations
- if (u > 0x10fe00 && u < 0x10ff00) {
- *cursor++ = (u - 0x10fe00);
- ++ch;
- continue;
- } else {
- *cursor++ = 0xf0 | ((uchar) (u >> 18));
- *cursor++ = 0x80 | (((uchar) (u >> 12)) & 0x3f);
- }
+ *cursor++ = 0xf0 | ((uchar) (u >> 18));
+ *cursor++ = 0x80 | (((uchar) (u >> 12)) & 0x3f);
} else {
*cursor++ = 0xe0 | (((uchar) (u >> 12)) & 0x3f);
}
@@ -179,15 +193,16 @@ QString QUtf8::convertToUnicode(const char *chars, int len, QTextCodec::Converte
--need;
if (!need) {
// utf-8 bom composes into 0xfeff code point
+ bool nonCharacter;
if (!headerdone && uc == 0xfeff) {
// dont do anything, just skip the BOM
- } else if (uc > 0xffff && uc < 0x110000) {
+ } else if (!(nonCharacter = isUnicodeNonCharacter(uc)) && uc > 0xffff && uc < 0x110000) {
// surrogate pair
Q_ASSERT((qch - (ushort*)result.unicode()) + 2 < result.length());
*qch++ = QChar::highSurrogate(uc);
*qch++ = QChar::lowSurrogate(uc);
- } else if ((uc < min_uc) || (uc >= 0xd800 && uc <= 0xdfff) || (uc >= 0xfffe)) {
- // error: overlong sequence, UTF16 surrogate or BOM
+ } else if ((uc < min_uc) || (uc >= 0xd800 && uc <= 0xdfff) || nonCharacter || uc >= 0x110000) {
+ // error: overlong sequence, UTF16 surrogate or non-character
*qch++ = replacement;
++invalid;
} else {