diff options
author | Sona Kurazyan <sona.kurazyan@qt.io> | 2022-06-17 14:43:17 +0200 |
---|---|---|
committer | Marc Mutz <marc.mutz@qt.io> | 2022-06-20 21:29:04 +0000 |
commit | da0d7f61c851431d14430684c62345bc23dbf001 (patch) | |
tree | 3e7bf228e48edc2ef9972806b46c83be78a585fa /src/corelib/serialization/qxmlutils.cpp | |
parent | 3d73aa660b5e1af5758ae7207ce1c05d7a0458e1 (diff) |
QDom: Stop treating non-BMP characters as invalid
According to https://www.w3.org/TR/REC-xml/#NT-Char unicode characters
within the range of [#x10000-#x10FFFF] are considered to be valid, so
fix the check for valid characters accordingly. This requires changing
the loop over the input QString to iterate over code points (instead of
code units).
Fixes: QTBUG-104362
Pick-to: 6.4 6.3 6.2 5.15
Change-Id: I7dcf5cad05265a54882807a50522d28b647e06ee
Reviewed-by: Marc Mutz <marc.mutz@qt.io>
Diffstat (limited to 'src/corelib/serialization/qxmlutils.cpp')
-rw-r--r-- | src/corelib/serialization/qxmlutils.cpp | 15 |
1 files changed, 9 insertions, 6 deletions
diff --git a/src/corelib/serialization/qxmlutils.cpp b/src/corelib/serialization/qxmlutils.cpp index 74a0cf0c23..778e8de72d 100644 --- a/src/corelib/serialization/qxmlutils.cpp +++ b/src/corelib/serialization/qxmlutils.cpp @@ -235,13 +235,16 @@ bool QXmlUtils::isLetter(const QChar c) \sa {http://www.w3.org/TR/REC-xml/#NT-Char}, {Extensible Markup Language (XML) 1.0 (Fourth Edition), [2] Char} */ -bool QXmlUtils::isChar(const QChar c) +bool QXmlUtils::isChar(const char32_t c) { - return (c.unicode() >= 0x0020 && c.unicode() <= 0xD7FF) - || c.unicode() == 0x0009 - || c.unicode() == 0x000A - || c.unicode() == 0x000D - || (c.unicode() >= 0xE000 && c.unicode() <= 0xFFFD); + // The valid range is defined by https://www.w3.org/TR/REC-xml/#NT-Char as following: + // Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF] + return (c >= 0x0020 && c <= 0xD7FF) + || c == 0x0009 + || c == 0x000A + || c == 0x000D + || (c >= 0xE000 && c <= 0xFFFD) + || (c >= 0x10000 && c <= 0x10FFFF); } /*! |