summaryrefslogtreecommitdiffstats
path: root/src/corelib
diff options
context:
space:
mode:
authorGiuseppe D'Angelo <giuseppe.dangelo@kdab.com>2019-12-18 20:23:11 +0100
committerGiuseppe D'Angelo <giuseppe.dangelo@kdab.com>2019-12-20 11:13:22 +0100
commita2ddd96ac8b7657c2ef64f2a8f51db5cd8a8d96a (patch)
treed118809cc7b6203f9e48c717313dd472cb5d7518 /src/corelib
parent556712f511a02ff8101e648d2e6f0090231d4f3d (diff)
Introduce QString(View)::isValidUtf16
QString(View)s can be built or manipulated in ways that make them contain/refer to improperly encoded UTF-16 data. Problem is, we don't have public APIs to check whether a string contains valid UTF-16. This knowledge is precious if the string is to be fed in algorithms, regular expressions, etc. that expect validated input (e.g. QRegularExpression can be faster if it can assume valid UTF-16, otherwise it has to employ extra checks). Add a function that does the validation. [ChangeLog][QtCore][QStringView] Added QStringView::isValidUtf16. [ChangeLog][QtCore][QString] Added QString::isValidUtf16. Change-Id: Idd699183f6ec08013046c76c6a5a7c524b6c6fbc Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
Diffstat (limited to 'src/corelib')
-rw-r--r--src/corelib/text/qstring.cpp29
-rw-r--r--src/corelib/text/qstring.h2
-rw-r--r--src/corelib/text/qstringalgorithms.h1
-rw-r--r--src/corelib/text/qstringview.cpp15
-rw-r--r--src/corelib/text/qstringview.h2
5 files changed, 49 insertions, 0 deletions
diff --git a/src/corelib/text/qstring.cpp b/src/corelib/text/qstring.cpp
index 4d83f19db7..82b2c10a93 100644
--- a/src/corelib/text/qstring.cpp
+++ b/src/corelib/text/qstring.cpp
@@ -591,6 +591,20 @@ bool QtPrivate::isLatin1(QStringView s) noexcept
return true;
}
+bool QtPrivate::isValidUtf16(QStringView s) noexcept
+{
+ Q_CONSTEXPR uint InvalidCodePoint = UINT_MAX;
+
+ QStringIterator i(s);
+ while (i.hasNext()) {
+ uint c = i.next(InvalidCodePoint);
+ if (c == InvalidCodePoint)
+ return false;
+ }
+
+ return true;
+}
+
// conversion between Latin 1 and UTF-16
void qt_from_latin1(ushort *dst, const char *str, size_t size) noexcept
{
@@ -9046,6 +9060,21 @@ bool QString::isRightToLeft() const
return QtPrivate::isRightToLeft(QStringView(*this));
}
+/*!
+ \fn bool QString::isValidUtf16() const noexcept
+ \since 5.15
+
+ Returns \c true if the string contains valid UTF-16 encoded data,
+ or \c false otherwise.
+
+ Note that this function does not perform any special validation of the
+ data; it merely checks if it can be successfully decoded from UTF-16.
+ The data is assumed to be in host byte order; the presence of a BOM
+ is meaningless.
+
+ \sa QStringView::isValidUtf16()
+*/
+
/*! \fn QChar *QString::data()
Returns a pointer to the data stored in the QString. The pointer
diff --git a/src/corelib/text/qstring.h b/src/corelib/text/qstring.h
index 1669d7c94a..1f4e856660 100644
--- a/src/corelib/text/qstring.h
+++ b/src/corelib/text/qstring.h
@@ -919,6 +919,8 @@ public:
bool isSimpleText() const;
bool isRightToLeft() const;
+ Q_REQUIRED_RESULT bool isValidUtf16() const noexcept
+ { return QStringView(*this).isValidUtf16(); }
QString(int size, Qt::Initialization);
Q_DECL_CONSTEXPR inline QString(QStringDataPtr dd) : d(dd.ptr) {}
diff --git a/src/corelib/text/qstringalgorithms.h b/src/corelib/text/qstringalgorithms.h
index d54e376aa9..0b7774b4f3 100644
--- a/src/corelib/text/qstringalgorithms.h
+++ b/src/corelib/text/qstringalgorithms.h
@@ -99,6 +99,7 @@ Q_REQUIRED_RESULT Q_CORE_EXPORT Q_DECL_PURE_FUNCTION bool isAscii(QLatin1String
Q_REQUIRED_RESULT Q_CORE_EXPORT Q_DECL_PURE_FUNCTION bool isAscii(QStringView s) noexcept;
Q_REQUIRED_RESULT Q_DECL_CONSTEXPR inline bool isLatin1(QLatin1String s) noexcept;
Q_REQUIRED_RESULT Q_CORE_EXPORT Q_DECL_PURE_FUNCTION bool isLatin1(QStringView s) noexcept;
+Q_REQUIRED_RESULT Q_CORE_EXPORT Q_DECL_PURE_FUNCTION bool isValidUtf16(QStringView s) noexcept;
} // namespace QtPRivate
diff --git a/src/corelib/text/qstringview.cpp b/src/corelib/text/qstringview.cpp
index 75de827583..c4ddb06ea4 100644
--- a/src/corelib/text/qstringview.cpp
+++ b/src/corelib/text/qstringview.cpp
@@ -865,6 +865,21 @@ QT_BEGIN_NAMESPACE
*/
/*!
+ \fn bool QStringView::isValidUtf16() const
+ \since 5.15
+
+ Returns \c true if the string contains valid UTF-16 encoded data,
+ or \c false otherwise.
+
+ Note that this function does not perform any special validation of the
+ data; it merely checks if it can be successfully decoded from UTF-16.
+ The data is assumed to be in host byte order; the presence of a BOM
+ is meaningless.
+
+ \sa QString::isValidUtf16()
+*/
+
+/*!
\fn QStringView::toWCharArray(wchar_t *array) const
\since 5.14
diff --git a/src/corelib/text/qstringview.h b/src/corelib/text/qstringview.h
index 4ab4d2570f..06391ffef4 100644
--- a/src/corelib/text/qstringview.h
+++ b/src/corelib/text/qstringview.h
@@ -294,6 +294,8 @@ public:
Q_REQUIRED_RESULT bool isRightToLeft() const noexcept
{ return QtPrivate::isRightToLeft(*this); }
+ Q_REQUIRED_RESULT bool isValidUtf16() const noexcept
+ { return QtPrivate::isValidUtf16(*this); }
Q_REQUIRED_RESULT inline int toWCharArray(wchar_t *array) const; // defined in qstring.h