From a2ddd96ac8b7657c2ef64f2a8f51db5cd8a8d96a Mon Sep 17 00:00:00 2001 From: Giuseppe D'Angelo Date: Wed, 18 Dec 2019 20:23:11 +0100 Subject: Introduce QString(View)::isValidUtf16 QString(View)s can be built or manipulated in ways that make them contain/refer to improperly encoded UTF-16 data. Problem is, we don't have public APIs to check whether a string contains valid UTF-16. This knowledge is precious if the string is to be fed in algorithms, regular expressions, etc. that expect validated input (e.g. QRegularExpression can be faster if it can assume valid UTF-16, otherwise it has to employ extra checks). Add a function that does the validation. [ChangeLog][QtCore][QStringView] Added QStringView::isValidUtf16. [ChangeLog][QtCore][QString] Added QString::isValidUtf16. Change-Id: Idd699183f6ec08013046c76c6a5a7c524b6c6fbc Reviewed-by: Thiago Macieira --- src/corelib/text/qstring.cpp | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) (limited to 'src/corelib/text/qstring.cpp') diff --git a/src/corelib/text/qstring.cpp b/src/corelib/text/qstring.cpp index 4d83f19db7..82b2c10a93 100644 --- a/src/corelib/text/qstring.cpp +++ b/src/corelib/text/qstring.cpp @@ -591,6 +591,20 @@ bool QtPrivate::isLatin1(QStringView s) noexcept return true; } +bool QtPrivate::isValidUtf16(QStringView s) noexcept +{ + Q_CONSTEXPR uint InvalidCodePoint = UINT_MAX; + + QStringIterator i(s); + while (i.hasNext()) { + uint c = i.next(InvalidCodePoint); + if (c == InvalidCodePoint) + return false; + } + + return true; +} + // conversion between Latin 1 and UTF-16 void qt_from_latin1(ushort *dst, const char *str, size_t size) noexcept { @@ -9046,6 +9060,21 @@ bool QString::isRightToLeft() const return QtPrivate::isRightToLeft(QStringView(*this)); } +/*! + \fn bool QString::isValidUtf16() const noexcept + \since 5.15 + + Returns \c true if the string contains valid UTF-16 encoded data, + or \c false otherwise. + + Note that this function does not perform any special validation of the + data; it merely checks if it can be successfully decoded from UTF-16. + The data is assumed to be in host byte order; the presence of a BOM + is meaningless. + + \sa QStringView::isValidUtf16() +*/ + /*! \fn QChar *QString::data() Returns a pointer to the data stored in the QString. The pointer -- cgit v1.2.3